From c7e0e28ccd5c5075cc6b9c637df02864bd468c2f Mon Sep 17 00:00:00 2001 From: AUTOMATIC <16777216c@gmail.com> Date: Mon, 12 Sep 2022 20:09:32 +0300 Subject: [PATCH] changes for #294 --- modules/devices.py | 17 +++++++++++++++++ modules/processing.py | 35 +++++------------------------------ modules/shared.py | 2 -- 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/modules/devices.py b/modules/devices.py index a93a245b0..e4430e1ad 100644 --- a/modules/devices.py +++ b/modules/devices.py @@ -31,3 +31,20 @@ def enable_tf32(): errors.run(enable_tf32, "Enabling TF32") + + +device = get_optimal_device() +device_codeformer = cpu if has_mps else device + + +def randn(seed, shape): + # Pytorch currently doesn't handle setting randomness correctly when the metal backend is used. + if device.type == 'mps': + generator = torch.Generator(device=cpu) + generator.manual_seed(seed) + noise = torch.randn(shape, generator=generator, device=cpu).to(device) + return noise + + torch.manual_seed(seed) + return torch.randn(shape, device=device) + diff --git a/modules/processing.py b/modules/processing.py index 1e6745ccd..23b0c08f0 100644 --- a/modules/processing.py +++ b/modules/processing.py @@ -103,33 +103,17 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see for i, seed in enumerate(seeds): noise_shape = shape if seed_resize_from_h <= 0 or seed_resize_from_w <= 0 else (shape[0], seed_resize_from_h//8, seed_resize_from_w//8) - # Pytorch currently doesn't handle seeting randomness correctly when the metal backend is used. - generator = torch - if shared.device.type == 'mps': - shared.device_seed_type = 'cpu' - generator = torch.Generator(device=shared.device_seed_type) - subnoise = None if subseeds is not None: subseed = 0 if i >= len(subseeds) else subseeds[i] - generator.manual_seed(subseed) - if shared.device.type != shared.device_seed_type: - subnoise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device) - else: - subnoise = torch.randn(noise_shape, device=shared.device) + subnoise = devices.randn(subseed, noise_shape) # randn results depend on device; gpu and cpu get different results for same seed; # the way I see it, it's better to do this on CPU, so that everyone gets same result; # but the original script had it like this, so I do not dare change it for now because # it will break everyone's seeds. - # When using the mps backend falling back to the cpu device is needed, since mps currently - # does not implement seeding properly. - generator.manual_seed(seed) - if shared.device.type != shared.device_seed_type: - noise = torch.randn(noise_shape, generator=generator, device=shared.device_seed_type).to(shared.device) - else: - noise = torch.randn(noise_shape, device=shared.device) + noise = devices.randn(seed, noise_shape) if subnoise is not None: #noise = subnoise * subseed_strength + noise * (1 - subseed_strength) @@ -137,14 +121,8 @@ def create_random_tensors(shape, seeds, subseeds=None, subseed_strength=0.0, see if noise_shape != shape: #noise = torch.nn.functional.interpolate(noise.unsqueeze(1), size=shape[1:], mode="bilinear").squeeze() - # noise_shape = (64, 80) - # shape = (64, 72) - generator.manual_seed(seed) - if shared.device.type != shared.device_seed_type: - x = torch.randn(shape, generator=generator, device=shared.device_seed_type).to(shared.device) - else: - x = torch.randn(shape, device=shared.device) - dx = (shape[2] - noise_shape[2]) // 2 # -4 + x = devices.randn(seed, shape) + dx = (shape[2] - noise_shape[2]) // 2 dy = (shape[1] - noise_shape[1]) // 2 w = noise_shape[2] if dx >= 0 else noise_shape[2] + 2 * dx h = noise_shape[1] if dy >= 0 else noise_shape[1] + 2 * dy @@ -482,10 +460,7 @@ class StableDiffusionProcessingImg2Img(StableDiffusionProcessing): if self.image_mask is not None: init_mask = latent_mask latmask = init_mask.convert('RGB').resize((self.init_latent.shape[3], self.init_latent.shape[2])) - precision = np.float64 - if shared.device.type == 'mps': # mps backend does not support float64 - precision = np.float32 - latmask = np.moveaxis(np.array(latmask, dtype=precision), 2, 0) / 255 + latmask = np.moveaxis(np.array(latmask, dtype=np.float32), 2, 0) / 255 latmask = latmask[0] latmask = np.around(latmask) latmask = np.tile(latmask[None], (4, 1, 1)) diff --git a/modules/shared.py b/modules/shared.py index 9002141af..ea1c879bf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -49,8 +49,6 @@ parser.add_argument("--opt-channelslast", action='store_true', help="change memo cmd_opts = parser.parse_args() device = get_optimal_device() -device_codeformer = device -device_seed_type = device batch_cond_uncond = cmd_opts.always_batch_cond_uncond or not (cmd_opts.lowvram or cmd_opts.medvram) parallel_processing_allowed = not cmd_opts.lowvram and not cmd_opts.medvram