victorchall · nawnie · Jun 2, 2023 · Jun 2, 2023 · Jul 28, 2023
diff --git a/EveryDream_Tools_4_colab.ipynb b/EveryDream_Tools_4_colab.ipynb
diff --git a/NEW_EveryDream_Tools_4_colab.ipynb b/NEW_EveryDream_Tools_4_colab.ipynb
diff --git a/balance_data.py b/balance_data.py
@@ -0,0 +1,34 @@
+import os
+from tqdm import tqdm
+from pathlib import Path
+import argparse
+
+def count_images(input_dir):
+    folder_counts = {}
+    total_images = 0
+    total_folders = 0
+    for root, dirs, files in os.walk(input_dir):
+        image_count = sum(1 for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')))
+        if image_count > 0:
+            folder_counts[root] = image_count
+            total_images += image_count
+            total_folders += 1
+
+    return folder_counts, total_images / total_folders if total_folders > 0 else 0
+
+def write_multiplier(input_dir):
+    folder_counts, avg_count = count_images(input_dir)
+    progress_bar = tqdm(total=len(folder_counts), desc="Writing multipliers", position=0, leave=True)
+    for folder, count in folder_counts.items():
+        multiplier = avg_count / count if count > 0 else 0
+        with open(Path(folder) / 'multiply.txt', 'w') as f:
+            f.write(str(multiplier))
+        progress_bar.update(1)
+    progress_bar.close()
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Calculate image multiplication factors for subfolders")
+    parser.add_argument("input_dir", metavar="input_dir", type=str, help="the input directory to calculate multipliers")
+    args = parser.parse_args()
+
+    write_multiplier(args.input_dir)
diff --git a/balance_data_Readme.txt b/balance_data_Readme.txt
@@ -0,0 +1,39 @@
+# Data set Balancer via Image Multiplier Calculator
+
+This script calculates the multiplier that should be applied to the number of images in each subfolder of a given directory, to equalize the number of images across all subfolders. It writes the calculated multiplier to a text file named `multiply.txt` in each subfolder. 
+
+For example, if the average number of images across all subfolders is 50, and a particular subfolder has 25 images, then the multiplier for that folder would be 2. Conversely, if a folder has 100 images, the multiplier would be 0.5. 
+
+## Requirements
+- Python 3.7 or higher
+- tqdm library (`pip install tqdm`)
+
+## Usage
+
+```bash
+python image_multiplier.py /path/to/your/directory
+```
+
+Replace `/path/to/your/directory` with the path to the directory you want to process.
+
+## Output
+
+The script will create a `multiply.txt` file in each subfolder of the input directory. This file will contain a single number, which is the multiplier for that folder.
+
+## Progress
+
+The script uses a progress bar to indicate its progress. The progress bar updates after each subfolder's multiplier is calculated and written.
+
+## How it Works
+
+The script works by first traversing the input directory and counting the number of images in each subfolder. It then calculates the average number of images across all subfolders. 
+
+Next, the script calculates a multiplier for each subfolder by dividing the average count by the number of images in that subfolder. If a subfolder contains no images, its multiplier is set to 0.
+
+Finally, the script writes each subfolder's multiplier to a `multiply.txt` file in that subfolder.
+
+## Limitations
+
+This script counts only files with the following extensions as images: .png, .jpg, .jpeg, .gif, .webp. 
+
+If the script is run multiple times on the same directory, it will overwrite the existing `multiply.txt` files.
diff --git a/bulk rename_Readme.txt b/bulk rename_Readme.txt
@@ -0,0 +1,42 @@
+# Bulk Image Renaming Utility
+
+## Overview
+The Image Renaming Utility is a command-line Python script that renames image files in a specified directory and its subdirectories. The script replaces colons, backslashes, and underscores in the original file and directory names with commas, spaces, and hyphens, respectively. It also removes a specific string from the new names and adds an index number to each file.
+
+## Requirements
+- Python 3.6 or later
+
+## Installation
+1. Download the script `image_renaming_util.py` to your desired folder.
+2. Ensure you have Python 3.6 or later installed. You can check your Python version by running `python --version` in your command prompt or terminal.
+
+## Usage
+Run the script in the command prompt or terminal with the desired options:
+
+```
+python image_renaming_util.py [options]
+```
+
+### Options
+```
+--img_dir <path>       Path to the image directory (default: 'input')
+```
+
+### Example
+```
+python image_renaming_util.py --img_dir images
+```
+
+This command will rename image files in the 'images' directory according to the specified rules.
+
+## Customization
+To customize the renaming rules, modify the following lines in the script:
+- Replace the specific string you want to remove: `new_root = new_root.replace("C, , Users, shawn, Desktop,", "")`
+- Change the characters being replaced: `new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-")`
+
+## Notes
+- Ensure you have the necessary permissions to read and write to the input directory and the image files within it.
+- Be cautious when running the script on important files, as the renaming process is irreversible. Consider creating a backup before using the utility.
+
+## License
+This project is open source and available under the [MIT License](https://opensource.org/licenses/MIT).
diff --git a/bulk_rename.py b/bulk_rename.py
@@ -0,0 +1,36 @@
+import os
+import argparse
+
+# Set up argument parser
+def get_args(**parser_kwargs):
+    """Get command-line options."""
+    parser = argparse.ArgumentParser(**parser_kwargs)
+    parser.add_argument(
+        "--img_dir",
+        type=str,
+        default="input",
+        help="path to image directory (default: 'input')",
+    )
+    args = parser.parse_args()
+    return args
+
+# Get the input directory from parsed arguments
+args = get_args()
+input_directory = args.img_dir
+
+# Iterate through directories and files
+for root, dirs, files in os.walk(input_directory):
+    # Replace ":" and "\\" with a space
+    new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-")
+    # Remove the string "c  Users shawn Desktop"
+    new_root = new_root.replace("C, , Users, shawn, Desktop,", "")
+
+    # Iterate through files and rename them
+    for i, file in enumerate(files):
+        old_path = os.path.join(root, file)
+        new_name = new_root + "_" + str(i + 1) + os.path.splitext(file)[1]
+        new_path = os.path.join(root, new_name)
+
+        # Rename the file
+        os.rename(old_path, new_path)
+
diff --git a/caption.py b/caption.py
@@ -0,0 +1,170 @@
+"""
+Copyright [2022-2023] Victor C Hall
+
+Licensed under the GNU Affero General Public License;
+You may not use this code except in compliance with the License.
+You may obtain a copy of the License at
+
+    https://www.gnu.org/licenses/agpl-3.0.en.html
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+
+from PIL import Image
+import argparse
+import requests
+from transformers import Blip2Processor, Blip2ForConditionalGeneration, GitProcessor, GitForCausalLM, AutoModel, AutoProcessor
+
+import torch
+from  pynvml import *
+
+import time
+from colorama import Fore, Style
+
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.tag import pos_tag
+
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+
+SUPPORTED_EXT = [".jpg", ".png", ".jpeg", ".bmp", ".jfif", ".webp"]
+
+
+
+def get_gpu_memory_map():
+    """Get the current gpu usage.
+    Returns
+    -------
+    usage: dict
+        Keys are device ids as integers.
+        Values are memory usage as integers in MB.
+    """
+    nvmlInit()
+    handle = nvmlDeviceGetHandleByIndex(0)
+    info = nvmlDeviceGetMemoryInfo(handle)
+    return info.used/1024/1024
+
+def create_blip2_processor(model_name, device, dtype=torch.float16, cache_dir=None):
+    processor = Blip2Processor.from_pretrained(model_name, cache_dir=cache_dir)
+    model = Blip2ForConditionalGeneration.from_pretrained(
+      args.model, torch_dtype=dtype, cache_dir=cache_dir
+    )
+    model.to(device)
+    model.eval()
+    print(f"BLIP2 Model loaded: {model_name}")
+    return processor, model
+
+
+def create_git_processor(model_name, device, dtype=torch.float16, cache_dir=None):
+    processor = GitProcessor.from_pretrained(model_name, cache_dir=cache_dir)
+    model = GitForCausalLM.from_pretrained(
+        args.model, torch_dtype=dtype, cache_dir=cache_dir
+    )
+    model.to(device)
+    model.eval()
+    print(f"GIT Model loaded: {model_name}")
+    return processor, model
+
+
+def create_auto_processor(model_name, device, dtype=torch.float16):
+    processor = AutoProcessor.from_pretrained(model_name)
+    model = AutoModel.from_pretrained(
+        args.model, torch_dtype=dtype
+    )
+    model.to(device)
+    model.eval()
+    print("Auto Model loaded")
+    return processor, model
+
+def replace_first_noun_with_folder_name(caption, folder_name):
+    tagged_caption = pos_tag(word_tokenize(caption))
+    for idx, (word, pos) in enumerate(tagged_caption):
+        if pos.startswith("N"):
+            tagged_caption[idx] = (folder_name, pos)
+            break
+    return " ".join([word for word, _ in tagged_caption])
+
+def main(args):
+    device = "cuda" if torch.cuda.is_available() and not args.force_cpu else "cpu"
+    dtype = torch.float32 if args.force_cpu else torch.float16
+
+
+    cache_dir = os.path.join(args.Blip_location, 'cache')
+
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+
+    if "salesforce/blip2-" in args.model.lower():
+        print(f"Using BLIP2 model: {args.model}")
+        processor, model = create_blip2_processor(args.model, device, dtype, cache_dir=cache_dir)
+    elif "microsoft/git-" in args.model.lower():
+        print(f"Using GIT model: {args.model}")
+        processor, model = create_git_processor(args.model, device, dtype, cache_dir=cache_dir)
+    else:
+        # try to use auto model?  doesn't work with blip/git
+        processor, model = create_auto_processor(args.model, device, dtype)
+
+    # os.walk all files in args.data_root recursively
+    for root, dirs, files in os.walk(args.data_root):
+        for file in files:
+            # get file extension
+            ext = os.path.splitext(file)[1]
+            if ext.lower() in SUPPORTED_EXT:
+                full_file_path = os.path.join(root, file)
+                image = Image.open(full_file_path)
+                start_time = time.time()
+
+                inputs = processor(images=image, return_tensors="pt", max_new_tokens=args.max_new_tokens)
+                inputs = {key: tensor.to(device, dtype) for key, tensor in inputs.items()}
+
+                generated_ids = model.generate(**inputs)
+                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+                folder_name = os.path.basename(root)
+                if args.replace_subject:
+                  modified_caption = replace_first_noun_with_folder_name(generated_text, folder_name)
+                else:
+                  modified_caption = generated_text
+                print(f"file: {file}, caption: {modified_caption}")
+                exec_time = time.time() - start_time
+                print(f"  Time for last caption: {exec_time} sec.  GPU memory used: {get_gpu_memory_map()} MB")
+
+                # get bare name
+                name = os.path.splitext(full_file_path)[0]
+                if not os.path.exists(name):
+                    with open(f"{name}.txt", "w") as f:
+                        f.write(modified_caption)
+
+if __name__ == "__main__":
+    print(f"{Fore.CYAN}** Current supported models:{Style.RESET_ALL}")
+    print("     microsoft/git-base-textcaps")
+    print("     microsoft/git-large-textcaps")
+    print("     microsoft/git-large-r-textcaps")
+    print("     Salesforce/blip2-opt-2.7b - (9GB VRAM or recommend 32GB sys RAM)")
+    print("     Salesforce/blip2-opt-2.7b-coco - (9GB VRAM or recommend 32GB sys RAM)")
+    print("     Salesforce/blip2-opt-6.7b - (16.5GB VRAM or recommend 64GB sys RAM)")
+    print("     Salesforce/blip2-opt-6.7b-coco - (16.5GB VRAM or recommend 64GB sys RAM)")
+    print()
+    print(f"{Fore.CYAN} * The following will likely not work on any consumer GPUs or require huge sys RAM on CPU:{Style.RESET_ALL}")
+    print("     salesforce/blip2-flan-t5-xl")
+    print("     salesforce/blip2-flan-t5-xl-coco")
+    print("     salesforce/blip2-flan-t5-xxl")
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data_root", type=str, default="input", help="Path to images")
+    parser.add_argument("--Blip_location", type=str, default=os.getcwd(), help="Path to Blip Models")
+    parser.add_argument("--model", type=str, default="salesforce/blip2-opt-2.7b", help="model from huggingface, ex. 'salesforce/blip2-opt-2.7b'")
+    parser.add_argument("--replace_subject", action="store_true", default=False, help="Replace the first noun in the generated caption with the folder name")        
+    parser.add_argument("--force_cpu", action="store_true", default=False, help="force using CPU even if GPU is available, may be useful to run huge models if you have a lot of system memory")
+    parser.add_argument("--max_new_tokens", type=int, default=24, help="max length for generated captions")
+    args = parser.parse_args()
+
+    print(f"** Using model: {args.model}")
+    print(f"** Captioning files in: {args.data_root}")
+    main(args)