refactored the deepbooru module to improve speed on running multiple interogations in a row. Added the option to generate deepbooru tags for textual inversion preproccessing.

2024-06-07 21:20:49 +00:00 · 2022-10-09 23:58:18 -05:00 · 2022-10-09 23:58:18 -05:00 · 1f92336be7
commit 1f92336be7
parent 45fbd1c5fe
3 changed files with 122 additions and 36 deletions
--- a/modules/deepbooru.py
+++ b/modules/deepbooru.py
@ -1,21 +1,74 @@
 import os.path
 from concurrent.futures import ProcessPoolExecutor
-from multiprocessing import get_context
+import multiprocessing
-def _load_tf_and_return_tags(pil_image, threshold):
+def get_deepbooru_tags(pil_image, threshold=0.5):
    """
    This method is for running only one image at a time for simple use.  Used to the img2img interrogate.
    """
    from modules import shared  # prevents circular reference
    create_deepbooru_process(threshold)
    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process_queue.put(pil_image)
    while shared.deepbooru_process_return["value"] == -1:
        time.sleep(0.2)
    release_process()
    return ret
 def deepbooru_process(queue, deepbooru_process_return, threshold):
    model, tags = get_deepbooru_tags_model()
    while True: # while process is running, keep monitoring queue for new image
        pil_image = queue.get()
        if pil_image == "QUIT":
            break
        else:
            deepbooru_process_return["value"] = get_deepbooru_tags_from_model(model, tags, pil_image, threshold)
 def create_deepbooru_process(threshold=0.5):
    """
    Creates deepbooru process.  A queue is created to send images into the process.  This enables multiple images
    to be processed in a row without reloading the model or creating a new process.  To return the data, a shared
    dictionary is created to hold the tags created.  To wait for tags to be returned, a value of -1 is assigned
    to the dictionary and the method adding the image to the queue should wait for this value to be updated with
    the tags.
    """
    from modules import shared  # prevents circular reference
    shared.deepbooru_process_manager = multiprocessing.Manager()
    shared.deepbooru_process_queue = shared.deepbooru_process_manager.Queue()
    shared.deepbooru_process_return = shared.deepbooru_process_manager.dict()
    shared.deepbooru_process_return["value"] = -1
    shared.deepbooru_process = multiprocessing.Process(target=deepbooru_process, args=(shared.deepbooru_process_queue, shared.deepbooru_process_return, threshold))
    shared.deepbooru_process.start()
 def release_process():
    """
    Stops the deepbooru process to return used memory
    """
    from modules import shared  # prevents circular reference
    shared.deepbooru_process_queue.put("QUIT")
    shared.deepbooru_process.join()
    shared.deepbooru_process_queue = None
    shared.deepbooru_process = None
    shared.deepbooru_process_return = None
    shared.deepbooru_process_manager = None
 def get_deepbooru_tags_model():
    import deepdanbooru as dd
    import tensorflow as tf
    import numpy as np
    this_folder = os.path.dirname(__file__)
    model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
    if not os.path.exists(os.path.join(model_path, 'project.json')):
        # there is no point importing these every time
        import zipfile
        from basicsr.utils.download_util import load_file_from_url
-        load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
+        load_file_from_url(
-                           model_path)
+            r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
            model_path)
        with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
            zip_ref.extractall(model_path)
        os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))
@ -24,7 +77,13 @@ def _load_tf_and_return_tags(pil_image, threshold):
    model = dd.project.load_model_from_project(
        model_path, compile_model=True
    )
    return model, tags
 def get_deepbooru_tags_from_model(model, tags, pil_image, threshold=0.5):
    import deepdanbooru as dd
    import tensorflow as tf
    import numpy as np
    width = model.input_shape[2]
    height = model.input_shape[1]
    image = np.array(pil_image)
@ -57,17 +116,4 @@ def _load_tf_and_return_tags(pil_image, threshold):
    print('\n'.join(sorted(result_tags_print, reverse=True)))
-    return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')
+    return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')
 def subprocess_init_no_cuda():
    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 def get_deepbooru_tags(pil_image, threshold=0.5):
    context = get_context('spawn')
    with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
        f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, )
        ret = f.result()  # will rethrow any exceptions
    return ret
--- a/modules/textual_inversion/preprocess.py
+++ b/modules/textual_inversion/preprocess.py
@ -3,11 +3,14 @@ from PIL import Image, ImageOps
 import platform
 import sys
 import tqdm
 import time
 from modules import shared, images
 from modules.shared import opts, cmd_opts
 if cmd_opts.deepdanbooru:
    import modules.deepbooru as deepbooru
-
+def preprocess(process_src, process_dst, process_flip, process_split, process_caption, process_caption_deepbooru=False):
 def preprocess(process_src, process_dst, process_flip, process_split, process_caption):
    size = 512
    src = os.path.abspath(process_src)
    dst = os.path.abspath(process_dst)
@ -24,10 +27,21 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
    if process_caption:
        shared.interrogator.load()
    if process_caption_deepbooru:
        deepbooru.create_deepbooru_process()
    def save_pic_with_caption(image, index):
        if process_caption:
            caption = "-" + shared.interrogator.generate_caption(image)
            caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png")
        elif process_caption_deepbooru:
            shared.deepbooru_process_return["value"] = -1
            shared.deepbooru_process_queue.put(image)
            while shared.deepbooru_process_return["value"] == -1:
                time.sleep(0.2)
            caption = "-" + shared.deepbooru_process_return["value"]
            caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png")
            shared.deepbooru_process_return["value"] = -1
        else:
            caption = filename
            caption = os.path.splitext(caption)[0]
@ -79,6 +93,10 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
    if process_caption:
        shared.interrogator.send_blip_to_ram()
    if process_caption_deepbooru:
        deepbooru.release_process()
 def sanitize_caption(base_path, original_caption, suffix):
    operating_system = platform.system().lower()
    if (operating_system == "windows"):
--- a/modules/ui.py
+++ b/modules/ui.py
@ -1034,6 +1034,9 @@ def create_ui(wrap_gradio_gpu_call):
                        process_flip = gr.Checkbox(label='Create flipped copies')
                        process_split = gr.Checkbox(label='Split oversized images into two')
                        process_caption = gr.Checkbox(label='Use BLIP caption as filename')
                        if cmd_opts.deepdanbooru:
                            process_caption_deepbooru = gr.Checkbox(label='Use deepbooru caption as filename')
                    with gr.Row():
                        with gr.Column(scale=3):
@ -1086,21 +1089,40 @@ def create_ui(wrap_gradio_gpu_call):
            ]
        )
-        run_preprocess.click(
+        if cmd_opts.deepdanbooru:
-            fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]),
+            # if process_caption_deepbooru is None, it will cause an error, as a result only include it if it is enabled
-            _js="start_training_textual_inversion",
+            run_preprocess.click(
-            inputs=[
+                fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]),
-                process_src,
+                _js="start_training_textual_inversion",
-                process_dst,
+                inputs=[
-                process_flip,
+                    process_src,
-                process_split,
+                    process_dst,
-                process_caption,
+                    process_flip,
-            ],
+                    process_split,
-            outputs=[
+                    process_caption,
-                ti_output,
+                    process_caption_deepbooru,
-                ti_outcome,
+                ],
-            ],
+                outputs=[
-        )
+                    ti_output,
                    ti_outcome,
                ],
            )
        else:
            run_preprocess.click(
                fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.preprocess, extra_outputs=[gr.update()]),
                _js="start_training_textual_inversion",
                inputs=[
                    process_src,
                    process_dst,
                    process_flip,
                    process_split,
                    process_caption,
                ],
                outputs=[
                    ti_output,
                    ti_outcome,
                ],
            )
        train_embedding.click(
            fn=wrap_gradio_gpu_call(modules.textual_inversion.ui.train_embedding, extra_outputs=[gr.update()]),