From 2920ca789294292640ec048dde8f2dd8d467e6b0 Mon Sep 17 00:00:00 2001 From: Elias Oenal Date: Sun, 11 Sep 2022 21:10:21 +0200 Subject: [PATCH 1/3] CodeFormer does not support mps/metal backend, implemented fallback to cpu backend. --- modules/codeformer_model.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py index 946b4a305..a75b84d78 100644 --- a/modules/codeformer_model.py +++ b/modules/codeformer_model.py @@ -1,3 +1,6 @@ +# Metal backend fixes written and placed +# into the public domain by Elias Oenal + import os import sys import traceback @@ -53,13 +56,19 @@ def setup_codeformer(): if self.net is not None and self.face_helper is not None: return self.net, self.face_helper - net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device) + if shared.device.type == 'mps': # CodeFormer currently does not support mps backend + net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(torch.device('cpu')) + else: + net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device) ckpt_path = load_file_from_url(url=pretrain_model_url, model_dir=os.path.join(path, 'weights/CodeFormer'), progress=True) checkpoint = torch.load(ckpt_path)['params_ema'] net.load_state_dict(checkpoint) net.eval() - face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device) + if shared.device.type == 'mps': # CodeFormer currently does not support mps backend + face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=torch.device('cpu')) + else: + face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device) if not cmd_opts.unload_gfpgan: self.net = net @@ -81,8 +90,10 @@ def setup_codeformer(): for idx, cropped_face in enumerate(face_helper.cropped_faces): cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True) normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) - cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device) - + if shared.device.type == 'mps': # CodeFormer currently does not support mps backend + cropped_face_t = cropped_face_t.unsqueeze(0).to(torch.device('cpu')) + else: + cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device) try: with torch.no_grad(): output = net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0] From 5dc05c0d0dc6a0040b0beb93f082ab314513d069 Mon Sep 17 00:00:00 2001 From: Elias Oenal Date: Sun, 11 Sep 2022 21:11:02 +0200 Subject: [PATCH 2/3] Implemented workaround to allow the use of seeds with the mps/metal backend. Fixed img2img's use of unsupported precision float64 with mps backend. --- modules/processing.py | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/modules/processing.py b/modules/processing.py index cf2e13d39..80bf7cc09 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1,3 +1,6 @@ +# Metal backend fixes written and placed +# into the public domain by Elias Oenal + import contextlib import json import math @@ -105,18 +108,32 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see for i, seed in enumerate(seeds): noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) + # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used. + if shared.device.type == 'mps': + g = torch.Generator(device='cpu') + subnoise = None if subseeds is not None: subseed = 0 if i >= len(subseeds) else subseeds[i] - torch.manual_seed(subseed) - subnoise = torch.randn(noise_shape, device=shared.device) + if shared.device.type == 'mps': + g.manual_seed(subseed) + subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') + else: # cpu or cuda + torch.manual_seed(subseed) + subnoise = torch.randn(noise_shape, device=shared.device) # randn results depend on device; gpu and cpu get different results for same seed; # the way I see it, it's better to do this on CPU, so that everyone gets same result; # but the original script had it like this, so I do not dare change it for now because # it will break everyone's seeds. - torch.manual_seed(seed) - noise = torch.randn(noise_shape, device=shared.device) + # When using the mps backend falling back to the cpu device is needed, since mps currently + # does not implement seeding properly. + if shared.device.type == 'mps': + g.manual_seed(seed) + noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') + else: # cpu or cuda + torch.manual_seed(seed) + x = torch.randn(shape, device=shared.device) if subnoise is not None: #noise = subnoise * subseed_strength + noise * (1 - subseed_strength) @@ -127,8 +144,12 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see # noise_shape = (64, 80) # shape = (64, 72) - torch.manual_seed(seed) - x = torch.randn(shape, device=shared.device) + if shared.device.type == 'mps': + g.manual_seed(seed) + x = torch.randn(shape, generator=g, device='cpu').to('mps') + else: + torch.manual_seed(seed) + x = torch.randn(shape, device=shared.device) dx = (shape[2] - noise_shape[2]) // 2 # -4 dy = (shape[1] - noise_shape[1]) // 2 w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx @@ -463,7 +484,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.image_mask is not None: init_mask = latent_mask latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) - latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 + if shared.device.type == 'mps': # mps backend does not support float64 + latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 + else: + latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 latmask = latmask[0] latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) From b7f95869b4542d356a12da6860b1e6c227784560 Mon Sep 17 00:00:00 2001 From: Elias Oenal Date: Mon, 12 Sep 2022 16:32:44 +0200 Subject: [PATCH 3/3] Refactored Metal/mps fixes. --- modules/codeformer_model.py | 21 ++++++------------- modules/processing.py | 42 +++++++++++++++++-------------------- modules/shared.py | 2 ++ 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/modules/codeformer_model.py b/modules/codeformer_model.py index a75b84d78..c638cb4d4 100644 --- a/modules/codeformer_model.py +++ b/modules/codeformer_model.py @@ -1,6 +1,3 @@ -# Metal backend fixes written and placed -# into the public domain by Elias Oenal - import os import sys import traceback @@ -50,25 +47,21 @@ def setup_codeformer(): def __init__(self): self.net = None self.face_helper = None + if shared.device.type == 'mps': # CodeFormer currently does not support mps backend + shared.device_codeformer = torch.device('cpu') def create_models(self): if self.net is not None and self.face_helper is not None: return self.net, self.face_helper - if shared.device.type == 'mps': # CodeFormer currently does not support mps backend - net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(torch.device('cpu')) - else: - net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device) + net = net_class(dim_embd=512, codebook_size=1024, n_head=8, n_layers=9, connect_list=['32', '64', '128', '256']).to(shared.device_codeformer) ckpt_path = load_file_from_url(url=pretrain_model_url, model_dir=os.path.join(path, 'weights/CodeFormer'), progress=True) checkpoint = torch.load(ckpt_path)['params_ema'] net.load_state_dict(checkpoint) net.eval() - if shared.device.type == 'mps': # CodeFormer currently does not support mps backend - face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=torch.device('cpu')) - else: - face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device) + face_helper = FaceRestoreHelper(1, face_size=512, crop_ratio=(1, 1), det_model='retinaface_resnet50', save_ext='png', use_parse=True, device=shared.device_codeformer) if not cmd_opts.unload_gfpgan: self.net = net @@ -90,10 +83,8 @@ def setup_codeformer(): for idx, cropped_face in enumerate(face_helper.cropped_faces): cropped_face_t = img2tensor(cropped_face / 255., bgr2rgb=True, float32=True) normalize(cropped_face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True) - if shared.device.type == 'mps': # CodeFormer currently does not support mps backend - cropped_face_t = cropped_face_t.unsqueeze(0).to(torch.device('cpu')) - else: - cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device) + cropped_face_t = cropped_face_t.unsqueeze(0).to(shared.device_codeformer) + try: with torch.no_grad(): output = net(cropped_face_t, w=w if w is not None else shared.opts.code_former_weight, adain=True)[0] diff --git a/modules/processing.py b/modules/processing.py index 80bf7cc09..542d1136a 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -1,6 +1,3 @@ -# Metal backend fixes written and placed -# into the public domain by Elias Oenal - import contextlib import json import math @@ -109,17 +106,19 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used. + generator = torch if shared.device.type == 'mps': - g = torch.Generator(device='cpu') + shared.device_seed_type = 'cpu' + generator = torch.Generator(device=shared.device_seed_type) subnoise = None if subseeds is not None: subseed = 0 if i >= len(subseeds) else subseeds[i] - if shared.device.type == 'mps': - g.manual_seed(subseed) - subnoise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') - else: # cpu or cuda - torch.manual_seed(subseed) + generator.manual_seed(subseed) + + if shared.device.type != shared.device_seed_type: + subnoise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device) + else: subnoise = torch.randn(noise_shape, device=shared.device) # randn results depend on device; gpu and cpu get different results for same seed; @@ -128,12 +127,11 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see # it will break everyone's seeds. # When using the mps backend falling back to the cpu device is needed, since mps currently # does not implement seeding properly. - if shared.device.type == 'mps': - g.manual_seed(seed) - noise = torch.randn(noise_shape, generator=g, device='cpu').to('mps') - else: # cpu or cuda - torch.manual_seed(seed) - x = torch.randn(shape, device=shared.device) + generator.manual_seed(seed) + if shared.device.type != shared.device_seed_type: + noise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device) + else: + noise = torch.randn(noise_shape, device=shared.device) if subnoise is not None: #noise = subnoise * subseed_strength + noise * (1 - subseed_strength) @@ -143,12 +141,10 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see #noise = torch.nn.functional.interpolate(noise.unsqueeze(1), size=shape[1:], mode="bilinear").squeeze() # noise_shape = (64, 80) # shape = (64, 72) - - if shared.device.type == 'mps': - g.manual_seed(seed) - x = torch.randn(shape, generator=g, device='cpu').to('mps') + generator.manual_seed(seed) + if shared.device.type != shared.device_seed_type: + x = torch.randn(shape, generator=generator, device=shared.device_seed_type).to(shared.device) else: - torch.manual_seed(seed) x = torch.randn(shape, device=shared.device) dx = (shape[2] - noise_shape[2]) // 2 # -4 dy = (shape[1] - noise_shape[1]) // 2 @@ -484,10 +480,10 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.image_mask is not None: init_mask = latent_mask latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) + precision = np.float64 if shared.device.type == 'mps': # mps backend does not support float64 - latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 - else: - latmask = np.moveaxis(np.array(latmask, dtype=np.float64), 2, 0) / 255 + precision = np.float32 + latmask = np.moveaxis(np.array(latmask, dtype=precision), 2, 0) / 255 latmask = latmask[0] latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) diff --git a/modules/shared.py b/modules/shared.py index 9eeb64e38..5312768bc 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -46,6 +46,8 @@ parser.add_argument("--ui-config-file", type=str, help="filename to use for ui c cmd_opts = parser.parse_args() device = get_optimal_device() +device_codeformer = device +device_seed_type = device batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram