From 8b40f475a31109cc6ecbdc0d14a0cee9e0303291 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Fri, 10 Nov 2023 11:06:26 +0800 Subject: [PATCH 1/4] Initial IPEX support --- modules/devices.py | 11 +++++++++-- modules/xpu_specific.py | 42 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 modules/xpu_specific.py diff --git a/modules/devices.py b/modules/devices.py index 1d4eb5635..be599736c 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,7 +3,7 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared +from modules import errors, shared, xpu_specific if sys.platform == "darwin": from modules import mac_specific @@ -30,6 +30,9 @@ def get_optimal_device_name(): if has_mps(): return "mps" + if xpu_specific.has_ipex: + return xpu_specific.get_xpu_device_string() + return "cpu" @@ -100,11 +103,15 @@ def autocast(disable=False): if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() + if xpu_specific.has_xpu: + return torch.autocast("xpu") + return torch.autocast("cuda") def without_autocast(disable=False): - return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + device_type = "xpu" if xpu_specific.has_xpu else "cuda" + return torch.autocast(device_type, enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() class NansException(Exception): diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py new file mode 100644 index 000000000..6417dd2d6 --- /dev/null +++ b/modules/xpu_specific.py @@ -0,0 +1,42 @@ +import contextlib +from modules import shared +from modules.sd_hijack_utils import CondFunc + +has_ipex = False +try: + import torch + import intel_extension_for_pytorch as ipex + has_ipex = True +except Exception: + pass + +def check_for_xpu(): + if not has_ipex: + return False + + return hasattr(torch, 'xpu') and torch.xpu.is_available() + +has_xpu = check_for_xpu() + +def get_xpu_device_string(): + if shared.cmd_opts.device_id is not None: + return f"xpu:{shared.cmd_opts.device_id}" + return "xpu" + +def return_null_context(*args, **kwargs): # pylint: disable=unused-argument + return contextlib.nullcontext() + +if has_xpu: + CondFunc('torch.Generator', + lambda orig_func, device=None: torch.xpu.Generator(device), + lambda orig_func, device=None: device is not None and device != torch.device("cpu") and device != "cpu") + + CondFunc('torch.nn.functional.layer_norm', + lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: + orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs), + lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: + weight is not None and input.dtype != weight.data.dtype) + + CondFunc('torch.nn.modules.GroupNorm.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) From 7499148ad4dbd3444215c843d02453f68c459707 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Sat, 2 Dec 2023 14:00:46 +0800 Subject: [PATCH 2/4] Disable ipex autocast due to its bad perf --- modules/cmd_args.py | 1 + modules/devices.py | 20 +++++++++++++------- modules/xpu_specific.py | 28 ++++++++++++++++++---------- webui-ipex-user.bat | 19 +++++++++++++++++++ 4 files changed, 51 insertions(+), 17 deletions(-) create mode 100644 webui-ipex-user.bat diff --git a/modules/cmd_args.py b/modules/cmd_args.py index a9fb9bfa3..da93eb266 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -70,6 +70,7 @@ parser.add_argument("--opt-sdp-no-mem-attention", action='store_true', help="pre parser.add_argument("--disable-opt-split-attention", action='store_true', help="prefer no cross-attention layer optimization for automatic choice of optimization") parser.add_argument("--disable-nan-check", action='store_true', help="do not check if produced images/latent spaces have nans; useful for running without a checkpoint in CI") parser.add_argument("--use-cpu", nargs='+', help="use CPU as torch device for specified modules", default=[], type=str.lower) +parser.add_argument("--use-ipex", action="store_true", help="use Intel XPU as torch device") parser.add_argument("--disable-model-loading-ram-optimization", action='store_true', help="disable an optimization that reduces RAM use when loading a model") parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) diff --git a/modules/devices.py b/modules/devices.py index be599736c..37ecca784 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -3,11 +3,18 @@ import contextlib from functools import lru_cache import torch -from modules import errors, shared, xpu_specific +from modules import errors, shared if sys.platform == "darwin": from modules import mac_specific +if shared.cmd_opts.use_ipex: + from modules import xpu_specific + + +def has_xpu() -> bool: + return shared.cmd_opts.use_ipex and xpu_specific.has_xpu + def has_mps() -> bool: if sys.platform != "darwin": @@ -30,7 +37,7 @@ def get_optimal_device_name(): if has_mps(): return "mps" - if xpu_specific.has_ipex: + if has_xpu(): return xpu_specific.get_xpu_device_string() return "cpu" @@ -57,6 +64,9 @@ def torch_gc(): if has_mps(): mac_specific.torch_mps_gc() + if has_xpu(): + xpu_specific.torch_xpu_gc() + def enable_tf32(): if torch.cuda.is_available(): @@ -103,15 +113,11 @@ def autocast(disable=False): if dtype == torch.float32 or shared.cmd_opts.precision == "full": return contextlib.nullcontext() - if xpu_specific.has_xpu: - return torch.autocast("xpu") - return torch.autocast("cuda") def without_autocast(disable=False): - device_type = "xpu" if xpu_specific.has_xpu else "cuda" - return torch.autocast(device_type, enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() + return torch.autocast("cuda", enabled=False) if torch.is_autocast_enabled() and not disable else contextlib.nullcontext() class NansException(Exception): diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py index 6417dd2d6..2df68665a 100644 --- a/modules/xpu_specific.py +++ b/modules/xpu_specific.py @@ -1,4 +1,3 @@ -import contextlib from modules import shared from modules.sd_hijack_utils import CondFunc @@ -10,33 +9,42 @@ try: except Exception: pass + def check_for_xpu(): - if not has_ipex: - return False + return has_ipex and hasattr(torch, 'xpu') and torch.xpu.is_available() - return hasattr(torch, 'xpu') and torch.xpu.is_available() - -has_xpu = check_for_xpu() def get_xpu_device_string(): if shared.cmd_opts.device_id is not None: return f"xpu:{shared.cmd_opts.device_id}" return "xpu" -def return_null_context(*args, **kwargs): # pylint: disable=unused-argument - return contextlib.nullcontext() + +def torch_xpu_gc(): + with torch.xpu.device(get_xpu_device_string()): + torch.xpu.empty_cache() + + +has_xpu = check_for_xpu() if has_xpu: + # W/A for https://github.com/intel/intel-extension-for-pytorch/issues/452: torch.Generator API doesn't support XPU device CondFunc('torch.Generator', lambda orig_func, device=None: torch.xpu.Generator(device), - lambda orig_func, device=None: device is not None and device != torch.device("cpu") and device != "cpu") + lambda orig_func, device=None: device is not None and device.type == "xpu") + # W/A for some OPs that could not handle different input dtypes CondFunc('torch.nn.functional.layer_norm', lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: orig_func(input.to(weight.data.dtype), normalized_shape, weight, *args, **kwargs), lambda orig_func, input, normalized_shape=None, weight=None, *args, **kwargs: weight is not None and input.dtype != weight.data.dtype) - CondFunc('torch.nn.modules.GroupNorm.forward', lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), lambda orig_func, self, input: input.dtype != self.weight.data.dtype) + CondFunc('torch.nn.modules.linear.Linear.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) + CondFunc('torch.nn.modules.conv.Conv2d.forward', + lambda orig_func, self, input: orig_func(self, input.to(self.weight.data.dtype)), + lambda orig_func, self, input: input.dtype != self.weight.data.dtype) diff --git a/webui-ipex-user.bat b/webui-ipex-user.bat new file mode 100644 index 000000000..ab25a0400 --- /dev/null +++ b/webui-ipex-user.bat @@ -0,0 +1,19 @@ +@echo off + +set PYTHON= +@REM The "Nuullll/intel-extension-for-pytorch" wheels were built from IPEX source for Intel Arc GPU: https://github.com/intel/intel-extension-for-pytorch/tree/xpu-main +@REM This is NOT an Intel official release so please use it at your own risk!! +@REM See https://github.com/Nuullll/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu-master%2Bdll-bundle for details. +@REM +@REM Strengths (over official IPEX 2.0.110 windows release): +@REM - AOT build (for Arc GPU only) to eliminate JIT compilation overhead: https://github.com/intel/intel-extension-for-pytorch/issues/399 +@REM - Bundles minimal oneAPI 2023.2 dependencies into the python wheels, so users don't need to install oneAPI for the whole system. +@REM - Provides a compatible torchvision wheel: https://github.com/intel/intel-extension-for-pytorch/issues/465 +@REM Limitation: +@REM - Only works for python 3.10 +set "TORCH_COMMAND=pip install https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torch-2.0.0a0+gite9ebda2-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torchvision-0.15.2a0+fa99a53-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/intel_extension_for_pytorch-2.0.110+gitc6ea20b-cp310-cp310-win_amd64.whl" +set GIT= +set VENV_DIR= +set "COMMANDLINE_ARGS=--use-ipex --skip-torch-cuda-test --skip-version-check --opt-sdp-attention" + +call webui.bat From 87cd07b3af74c447b02570bf3963ba83ade2e203 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Sat, 2 Dec 2023 15:54:25 +0800 Subject: [PATCH 3/4] Fix fp64 --- modules/sd_samplers_timesteps_impl.py | 4 ++-- modules/xpu_specific.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/sd_samplers_timesteps_impl.py b/modules/sd_samplers_timesteps_impl.py index a72daafd4..930a64af5 100644 --- a/modules/sd_samplers_timesteps_impl.py +++ b/modules/sd_samplers_timesteps_impl.py @@ -11,7 +11,7 @@ from modules.models.diffusion.uni_pc import uni_pc def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta=0.0): alphas_cumprod = model.inner_model.inner_model.alphas_cumprod alphas = alphas_cumprod[timesteps] - alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' else torch.float32) + alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32) sqrt_one_minus_alphas = torch.sqrt(1 - alphas) sigmas = eta * np.sqrt((1 - alphas_prev.cpu().numpy()) / (1 - alphas.cpu()) * (1 - alphas.cpu() / alphas_prev.cpu().numpy())) @@ -43,7 +43,7 @@ def ddim(model, x, timesteps, extra_args=None, callback=None, disable=None, eta= def plms(model, x, timesteps, extra_args=None, callback=None, disable=None): alphas_cumprod = model.inner_model.inner_model.alphas_cumprod alphas = alphas_cumprod[timesteps] - alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' else torch.float32) + alphas_prev = alphas_cumprod[torch.nn.functional.pad(timesteps[:-1], pad=(1, 0))].to(torch.float64 if x.device.type != 'mps' and x.device.type != 'xpu' else torch.float32) sqrt_one_minus_alphas = torch.sqrt(1 - alphas) extra_args = {} if extra_args is None else extra_args diff --git a/modules/xpu_specific.py b/modules/xpu_specific.py index 2df68665a..d933c7903 100644 --- a/modules/xpu_specific.py +++ b/modules/xpu_specific.py @@ -4,7 +4,7 @@ from modules.sd_hijack_utils import CondFunc has_ipex = False try: import torch - import intel_extension_for_pytorch as ipex + import intel_extension_for_pytorch as ipex # noqa: F401 has_ipex = True except Exception: pass From 96871e4f744471177d97e01c49f8587d7f67c125 Mon Sep 17 00:00:00 2001 From: Nuullll Date: Sat, 2 Dec 2023 17:11:11 +0800 Subject: [PATCH 4/4] Remove webui-ipex-user.bat --- modules/launch_utils.py | 22 ++++++++++++++++++++++ webui-ipex-user.bat | 19 ------------------- 2 files changed, 22 insertions(+), 19 deletions(-) delete mode 100644 webui-ipex-user.bat diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 264ec9ca6..586cdc7eb 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -310,6 +310,26 @@ def requirements_met(requirements_file): def prepare_environment(): torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://download.pytorch.org/whl/cu118") torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url {torch_index_url}") + if args.use_ipex: + if platform.system() == "Windows": + # The "Nuullll/intel-extension-for-pytorch" wheels were built from IPEX source for Intel Arc GPU: https://github.com/intel/intel-extension-for-pytorch/tree/xpu-main + # This is NOT an Intel official release so please use it at your own risk!! + # See https://github.com/Nuullll/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu-master%2Bdll-bundle for details. + # + # Strengths (over official IPEX 2.0.110 windows release): + # - AOT build (for Arc GPU only) to eliminate JIT compilation overhead: https://github.com/intel/intel-extension-for-pytorch/issues/399 + # - Bundles minimal oneAPI 2023.2 dependencies into the python wheels, so users don't need to install oneAPI for the whole system. + # - Provides a compatible torchvision wheel: https://github.com/intel/intel-extension-for-pytorch/issues/465 + # Limitation: + # - Only works for python 3.10 + url_prefix = "https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%2Bxpu-master%2Bdll-bundle" + torch_command = os.environ.get('TORCH_COMMAND', f"pip install {url_prefix}/torch-2.0.0a0+gite9ebda2-cp310-cp310-win_amd64.whl {url_prefix}/torchvision-0.15.2a0+fa99a53-cp310-cp310-win_amd64.whl {url_prefix}/intel_extension_for_pytorch-2.0.110+gitc6ea20b-cp310-cp310-win_amd64.whl") + else: + # Using official IPEX release for linux since it's already an AOT build. + # However, users still have to install oneAPI toolkit and activate oneAPI environment manually. + # See https://intel.github.io/intel-extension-for-pytorch/index.html#installation for details. + torch_index_url = os.environ.get('TORCH_INDEX_URL', "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/") + torch_command = os.environ.get('TORCH_COMMAND', f"pip install torch==2.0.0a0 intel-extension-for-pytorch==2.0.110+gitba7f6c1 --extra-index-url {torch_index_url}") requirements_file = os.environ.get('REQS_FILE', "requirements_versions.txt") xformers_package = os.environ.get('XFORMERS_PACKAGE', 'xformers==0.0.20') @@ -352,6 +372,8 @@ def prepare_environment(): run(f'"{python}" -m {torch_command}', "Installing torch and torchvision", "Couldn't install torch", live=True) startup_timer.record("install torch") + if args.use_ipex: + args.skip_torch_cuda_test = True if not args.skip_torch_cuda_test and not check_run_python("import torch; assert torch.cuda.is_available()"): raise RuntimeError( 'Torch is not able to use GPU; ' diff --git a/webui-ipex-user.bat b/webui-ipex-user.bat deleted file mode 100644 index ab25a0400..000000000 --- a/webui-ipex-user.bat +++ /dev/null @@ -1,19 +0,0 @@ -@echo off - -set PYTHON= -@REM The "Nuullll/intel-extension-for-pytorch" wheels were built from IPEX source for Intel Arc GPU: https://github.com/intel/intel-extension-for-pytorch/tree/xpu-main -@REM This is NOT an Intel official release so please use it at your own risk!! -@REM See https://github.com/Nuullll/intel-extension-for-pytorch/releases/tag/v2.0.110%2Bxpu-master%2Bdll-bundle for details. -@REM -@REM Strengths (over official IPEX 2.0.110 windows release): -@REM - AOT build (for Arc GPU only) to eliminate JIT compilation overhead: https://github.com/intel/intel-extension-for-pytorch/issues/399 -@REM - Bundles minimal oneAPI 2023.2 dependencies into the python wheels, so users don't need to install oneAPI for the whole system. -@REM - Provides a compatible torchvision wheel: https://github.com/intel/intel-extension-for-pytorch/issues/465 -@REM Limitation: -@REM - Only works for python 3.10 -set "TORCH_COMMAND=pip install https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torch-2.0.0a0+gite9ebda2-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/torchvision-0.15.2a0+fa99a53-cp310-cp310-win_amd64.whl https://github.com/Nuullll/intel-extension-for-pytorch/releases/download/v2.0.110%%2Bxpu-master%%2Bdll-bundle/intel_extension_for_pytorch-2.0.110+gitc6ea20b-cp310-cp310-win_amd64.whl" -set GIT= -set VENV_DIR= -set "COMMANDLINE_ARGS=--use-ipex --skip-torch-cuda-test --skip-version-check --opt-sdp-attention" - -call webui.bat