From bdaa36c84470adbdce3e98c01a69af5e95adfb02 Mon Sep 17 00:00:00 2001 From: brkirch Date: Fri, 30 Sep 2022 23:53:25 -0400 Subject: [PATCH 1/4] When device is MPS, use CPU for GFPGAN instead GFPGAN will not work if the device is MPS, so default to CPU instead. --- modules/devices.py | 2 +- modules/gfpgan_model.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index 07bb23397..08bb26d6f 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -34,7 +34,7 @@ errors.run(enable_tf32, "Enabling TF32") device = get_optimal_device() -device_codeformer = cpu if has_mps else device +device_gfpgan = device_codeformer = cpu if device.type == 'mps' else device def randn(seed, shape): diff --git a/modules/gfpgan_model.py b/modules/gfpgan_model.py index bb30d7330..fcd8544a5 100644 --- a/modules/gfpgan_model.py +++ b/modules/gfpgan_model.py @@ -21,7 +21,7 @@ def gfpgann(): global loaded_gfpgan_model global model_path if loaded_gfpgan_model is not None: - loaded_gfpgan_model.gfpgan.to(shared.device) + loaded_gfpgan_model.gfpgan.to(devices.device_gfpgan) return loaded_gfpgan_model if gfpgan_constructor is None: @@ -36,8 +36,8 @@ def gfpgann(): else: print("Unable to load gfpgan model!") return None - model = gfpgan_constructor(model_path=model_file, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None) - model.gfpgan.to(shared.device) + model = gfpgan_constructor(model_path=model_file, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None, device=devices.device_gfpgan) + model.gfpgan.to(devices.device_gfpgan) loaded_gfpgan_model = model return model From eeab7aedf532680a6ae9058ee272450bb07e41eb Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 4 Oct 2022 04:24:35 -0400 Subject: [PATCH 2/4] Add --use-cpu command line option Remove MPS detection to use CPU for GFPGAN / CodeFormer and add a --use-cpu command line option. --- modules/devices.py | 5 ++--- modules/esrgan_model.py | 9 ++++----- modules/scunet_model.py | 8 ++++---- modules/shared.py | 9 +++++++-- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index 5d9c7a076..b5a0cd29e 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -1,8 +1,8 @@ import torch -# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility from modules import errors +# has_mps is only available in nightly pytorch (for now), `getattr` for compatibility has_mps = getattr(torch, 'has_mps', False) cpu = torch.device("cpu") @@ -32,8 +32,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") -device = get_optimal_device() -device_gfpgan = device_codeformer = cpu if device.type == 'mps' else device +device = device_gfpgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() dtype = torch.float16 def randn(seed, shape): diff --git a/modules/esrgan_model.py b/modules/esrgan_model.py index 4aed9283c..d17e730f9 100644 --- a/modules/esrgan_model.py +++ b/modules/esrgan_model.py @@ -6,8 +6,7 @@ from PIL import Image from basicsr.utils.download_util import load_file_from_url import modules.esrgam_model_arch as arch -from modules import shared, modelloader, images -from modules.devices import has_mps +from modules import shared, modelloader, images, devices from modules.paths import models_path from modules.upscaler import Upscaler, UpscalerData from modules.shared import opts @@ -97,7 +96,7 @@ class UpscalerESRGAN(Upscaler): model = self.load_model(selected_model) if model is None: return img - model.to(shared.device) + model.to(devices.device_esrgan) img = esrgan_upscale(model, img) return img @@ -112,7 +111,7 @@ class UpscalerESRGAN(Upscaler): print("Unable to load %s from %s" % (self.model_path, filename)) return None - pretrained_net = torch.load(filename, map_location='cpu' if has_mps else None) + pretrained_net = torch.load(filename, map_location='cpu' if shared.device.type == 'mps' else None) crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32) pretrained_net = fix_model_layers(crt_model, pretrained_net) @@ -127,7 +126,7 @@ def upscale_without_tiling(model, img): img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(shared.device) + img = img.unsqueeze(0).to(devices.device_esrgan) with torch.no_grad(): output = model(img) output = output.squeeze().float().cpu().clamp_(0, 1).numpy() diff --git a/modules/scunet_model.py b/modules/scunet_model.py index 7987ac145..fb64b7409 100644 --- a/modules/scunet_model.py +++ b/modules/scunet_model.py @@ -8,7 +8,7 @@ import torch from basicsr.utils.download_util import load_file_from_url import modules.upscaler -from modules import shared, modelloader +from modules import devices, modelloader from modules.paths import models_path from modules.scunet_model_arch import SCUNet as net @@ -51,12 +51,12 @@ class UpscalerScuNET(modules.upscaler.Upscaler): if model is None: return img - device = shared.device + device = devices.device_scunet img = np.array(img) img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(shared.device) + img = img.unsqueeze(0).to(device) img = img.to(device) with torch.no_grad(): @@ -69,7 +69,7 @@ class UpscalerScuNET(modules.upscaler.Upscaler): return PIL.Image.fromarray(output, 'RGB') def load_model(self, path: str): - device = shared.device + device = devices.device_scunet if "http" in path: filename = load_file_from_url(url=self.model_url, model_dir=self.model_path, file_name="%s.pth" % self.name, progress=True) diff --git a/modules/shared.py b/modules/shared.py index 2a599e9cf..7899ab8d1 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -12,7 +12,7 @@ import modules.interrogate import modules.memmon import modules.sd_models import modules.styles -from modules.devices import get_optimal_device +import modules.devices as devices from modules.paths import script_path, sd_path sd_model_file = os.path.join(script_path, 'model.ckpt') @@ -46,6 +46,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.") parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") +parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU for specified modules", default=[]) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) @@ -63,7 +64,11 @@ parser.add_argument("--enable-console-prompts", action='store_true', help="print cmd_opts = parser.parse_args() -device = get_optimal_device() + +devices.device, devices.device_gfpgan, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ +(devices.cpu if x in cmd_opts.use_cpu else devices.get_optimal_device() for x in ['SD', 'GFPGAN', 'ESRGAN', 'SCUNet', 'CodeFormer']) + +device = devices.device batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram From 27ddc24fdee1fbe709054a43235ab7f9c51b3e9f Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 4 Oct 2022 05:18:17 -0400 Subject: [PATCH 3/4] Add BSRGAN to --add-cpu --- modules/bsrgan_model.py | 6 +++--- modules/devices.py | 2 +- modules/shared.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/bsrgan_model.py b/modules/bsrgan_model.py index e62c66577..3bd80791a 100644 --- a/modules/bsrgan_model.py +++ b/modules/bsrgan_model.py @@ -8,7 +8,7 @@ import torch from basicsr.utils.download_util import load_file_from_url import modules.upscaler -from modules import shared, modelloader +from modules import devices, modelloader from modules.bsrgan_model_arch import RRDBNet from modules.paths import models_path @@ -44,13 +44,13 @@ class UpscalerBSRGAN(modules.upscaler.Upscaler): model = self.load_model(selected_file) if model is None: return img - model.to(shared.device) + model.to(devices.device_bsrgan) torch.cuda.empty_cache() img = np.array(img) img = img[:, :, ::-1] img = np.moveaxis(img, 2, 0) / 255 img = torch.from_numpy(img).float() - img = img.unsqueeze(0).to(shared.device) + img = img.unsqueeze(0).to(devices.device_bsrgan) with torch.no_grad(): output = model(img) output = output.squeeze().float().cpu().clamp_(0, 1).numpy() diff --git a/modules/devices.py b/modules/devices.py index b5a0cd29e..b78996322 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -32,7 +32,7 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") -device = device_gfpgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() +device = device_gfpgan = device_bsrgan = device_esrgan = device_scunet = device_codeformer = get_optimal_device() dtype = torch.float16 def randn(seed, shape): diff --git a/modules/shared.py b/modules/shared.py index 7899ab8d1..95b98a06e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,7 +46,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.") parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") -parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU for specified modules", default=[]) +parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU for specified modules", default=[]) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False) @@ -65,8 +65,8 @@ parser.add_argument("--enable-console-prompts", action='store_true', help="print cmd_opts = parser.parse_args() -devices.device, devices.device_gfpgan, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ -(devices.cpu if x in cmd_opts.use_cpu else devices.get_optimal_device() for x in ['SD', 'GFPGAN', 'ESRGAN', 'SCUNet', 'CodeFormer']) +devices.device, devices.device_gfpgan, devices.device_bsrgan, devices.device_esrgan, devices.device_scunet, devices.device_codeformer = \ +(devices.cpu if x in cmd_opts.use_cpu else devices.get_optimal_device() for x in ['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer']) device = devices.device From dc9c5a97742e3a34d37da7108642d8adc0dc5858 Mon Sep 17 00:00:00 2001 From: brkirch Date: Tue, 4 Oct 2022 05:22:50 -0400 Subject: [PATCH 4/4] Modify --add-cpu description --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index 95b98a06e..25aff5b0e 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,7 +46,7 @@ parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.") parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization") parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find") -parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU for specified modules", default=[]) +parser.add_argument("--use-cpu", nargs='+',choices=['SD', 'GFPGAN', 'BSRGAN', 'ESRGAN', 'SCUNet', 'CodeFormer'], help="use CPU as torch device for specified modules", default=[]) parser.add_argument("--listen", action='store_true', help="launch gradio with 0.0.0.0 as server name, allowing to respond to network requests") parser.add_argument("--port", type=int, help="launch gradio with given server port, you need root/admin rights for ports < 1024, defaults to 7860 if available", default=None) parser.add_argument("--show-negative-prompt", action='store_true', help="does not do anything", default=False)