Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
452 changes: 452 additions & 0 deletions EveryDream_Tools_4_colab.ipynb

Large diffs are not rendered by default.

561 changes: 561 additions & 0 deletions NEW_EveryDream_Tools_4_colab.ipynb

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions balance_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
from tqdm import tqdm
from pathlib import Path
import argparse

def count_images(input_dir):
folder_counts = {}
total_images = 0
total_folders = 0
for root, dirs, files in os.walk(input_dir):
image_count = sum(1 for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')))
if image_count > 0:
folder_counts[root] = image_count
total_images += image_count
total_folders += 1

return folder_counts, total_images / total_folders if total_folders > 0 else 0

def write_multiplier(input_dir):
folder_counts, avg_count = count_images(input_dir)
progress_bar = tqdm(total=len(folder_counts), desc="Writing multipliers", position=0, leave=True)
for folder, count in folder_counts.items():
multiplier = avg_count / count if count > 0 else 0
with open(Path(folder) / 'multiply.txt', 'w') as f:
f.write(str(multiplier))
progress_bar.update(1)
progress_bar.close()

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Calculate image multiplication factors for subfolders")
parser.add_argument("input_dir", metavar="input_dir", type=str, help="the input directory to calculate multipliers")
args = parser.parse_args()

write_multiplier(args.input_dir)
39 changes: 39 additions & 0 deletions balance_data_Readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Data set Balancer via Image Multiplier Calculator

This script calculates the multiplier that should be applied to the number of images in each subfolder of a given directory, to equalize the number of images across all subfolders. It writes the calculated multiplier to a text file named `multiply.txt` in each subfolder.

For example, if the average number of images across all subfolders is 50, and a particular subfolder has 25 images, then the multiplier for that folder would be 2. Conversely, if a folder has 100 images, the multiplier would be 0.5.

## Requirements
- Python 3.7 or higher
- tqdm library (`pip install tqdm`)

## Usage

```bash
python image_multiplier.py /path/to/your/directory
```

Replace `/path/to/your/directory` with the path to the directory you want to process.

## Output

The script will create a `multiply.txt` file in each subfolder of the input directory. This file will contain a single number, which is the multiplier for that folder.

## Progress

The script uses a progress bar to indicate its progress. The progress bar updates after each subfolder's multiplier is calculated and written.

## How it Works

The script works by first traversing the input directory and counting the number of images in each subfolder. It then calculates the average number of images across all subfolders.

Next, the script calculates a multiplier for each subfolder by dividing the average count by the number of images in that subfolder. If a subfolder contains no images, its multiplier is set to 0.

Finally, the script writes each subfolder's multiplier to a `multiply.txt` file in that subfolder.

## Limitations

This script counts only files with the following extensions as images: .png, .jpg, .jpeg, .gif, .webp.

If the script is run multiple times on the same directory, it will overwrite the existing `multiply.txt` files.
42 changes: 42 additions & 0 deletions bulk rename_Readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Bulk Image Renaming Utility

## Overview
The Image Renaming Utility is a command-line Python script that renames image files in a specified directory and its subdirectories. The script replaces colons, backslashes, and underscores in the original file and directory names with commas, spaces, and hyphens, respectively. It also removes a specific string from the new names and adds an index number to each file.

## Requirements
- Python 3.6 or later

## Installation
1. Download the script `image_renaming_util.py` to your desired folder.
2. Ensure you have Python 3.6 or later installed. You can check your Python version by running `python --version` in your command prompt or terminal.

## Usage
Run the script in the command prompt or terminal with the desired options:

```
python image_renaming_util.py [options]
```

### Options
```
--img_dir <path> Path to the image directory (default: 'input')
```

### Example
```
python image_renaming_util.py --img_dir images
```

This command will rename image files in the 'images' directory according to the specified rules.

## Customization
To customize the renaming rules, modify the following lines in the script:
- Replace the specific string you want to remove: `new_root = new_root.replace("C, , Users, shawn, Desktop,", "")`
- Change the characters being replaced: `new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-")`

## Notes
- Ensure you have the necessary permissions to read and write to the input directory and the image files within it.
- Be cautious when running the script on important files, as the renaming process is irreversible. Consider creating a backup before using the utility.

## License
This project is open source and available under the [MIT License](https://opensource.org/licenses/MIT).
36 changes: 36 additions & 0 deletions bulk_rename.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import argparse

# Set up argument parser
def get_args(**parser_kwargs):
"""Get command-line options."""
parser = argparse.ArgumentParser(**parser_kwargs)
parser.add_argument(
"--img_dir",
type=str,
default="input",
help="path to image directory (default: 'input')",
)
args = parser.parse_args()
return args

# Get the input directory from parsed arguments
args = get_args()
input_directory = args.img_dir

# Iterate through directories and files
for root, dirs, files in os.walk(input_directory):
# Replace ":" and "\\" with a space
new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-")
# Remove the string "c Users shawn Desktop"
new_root = new_root.replace("C, , Users, shawn, Desktop,", "")

# Iterate through files and rename them
for i, file in enumerate(files):
old_path = os.path.join(root, file)
new_name = new_root + "_" + str(i + 1) + os.path.splitext(file)[1]
new_path = os.path.join(root, new_name)

# Rename the file
os.rename(old_path, new_path)

170 changes: 170 additions & 0 deletions caption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""
Copyright [2022-2023] Victor C Hall

Licensed under the GNU Affero General Public License;
You may not use this code except in compliance with the License.
You may obtain a copy of the License at

https://www.gnu.org/licenses/agpl-3.0.en.html

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import os

from PIL import Image
import argparse
import requests
from transformers import Blip2Processor, Blip2ForConditionalGeneration, GitProcessor, GitForCausalLM, AutoModel, AutoProcessor

import torch
from pynvml import *

import time
from colorama import Fore, Style

import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

SUPPORTED_EXT = [".jpg", ".png", ".jpeg", ".bmp", ".jfif", ".webp"]



def get_gpu_memory_map():
"""Get the current gpu usage.
Returns
-------
usage: dict
Keys are device ids as integers.
Values are memory usage as integers in MB.
"""
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0)
info = nvmlDeviceGetMemoryInfo(handle)
return info.used/1024/1024

def create_blip2_processor(model_name, device, dtype=torch.float16, cache_dir=None):
processor = Blip2Processor.from_pretrained(model_name, cache_dir=cache_dir)
model = Blip2ForConditionalGeneration.from_pretrained(
args.model, torch_dtype=dtype, cache_dir=cache_dir
)
model.to(device)
model.eval()
print(f"BLIP2 Model loaded: {model_name}")
return processor, model


def create_git_processor(model_name, device, dtype=torch.float16, cache_dir=None):
processor = GitProcessor.from_pretrained(model_name, cache_dir=cache_dir)
model = GitForCausalLM.from_pretrained(
args.model, torch_dtype=dtype, cache_dir=cache_dir
)
model.to(device)
model.eval()
print(f"GIT Model loaded: {model_name}")
return processor, model


def create_auto_processor(model_name, device, dtype=torch.float16):
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModel.from_pretrained(
args.model, torch_dtype=dtype
)
model.to(device)
model.eval()
print("Auto Model loaded")
return processor, model

def replace_first_noun_with_folder_name(caption, folder_name):
tagged_caption = pos_tag(word_tokenize(caption))
for idx, (word, pos) in enumerate(tagged_caption):
if pos.startswith("N"):
tagged_caption[idx] = (folder_name, pos)
break
return " ".join([word for word, _ in tagged_caption])

def main(args):
device = "cuda" if torch.cuda.is_available() and not args.force_cpu else "cpu"
dtype = torch.float32 if args.force_cpu else torch.float16


cache_dir = os.path.join(args.Blip_location, 'cache')

if not os.path.exists(cache_dir):
os.makedirs(cache_dir)

if "salesforce/blip2-" in args.model.lower():
print(f"Using BLIP2 model: {args.model}")
processor, model = create_blip2_processor(args.model, device, dtype, cache_dir=cache_dir)
elif "microsoft/git-" in args.model.lower():
print(f"Using GIT model: {args.model}")
processor, model = create_git_processor(args.model, device, dtype, cache_dir=cache_dir)
else:
# try to use auto model? doesn't work with blip/git
processor, model = create_auto_processor(args.model, device, dtype)

# os.walk all files in args.data_root recursively
for root, dirs, files in os.walk(args.data_root):
for file in files:
# get file extension
ext = os.path.splitext(file)[1]
if ext.lower() in SUPPORTED_EXT:
full_file_path = os.path.join(root, file)
image = Image.open(full_file_path)
start_time = time.time()

inputs = processor(images=image, return_tensors="pt", max_new_tokens=args.max_new_tokens)
inputs = {key: tensor.to(device, dtype) for key, tensor in inputs.items()}

generated_ids = model.generate(**inputs)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
folder_name = os.path.basename(root)
if args.replace_subject:
modified_caption = replace_first_noun_with_folder_name(generated_text, folder_name)
else:
modified_caption = generated_text
print(f"file: {file}, caption: {modified_caption}")
exec_time = time.time() - start_time
print(f" Time for last caption: {exec_time} sec. GPU memory used: {get_gpu_memory_map()} MB")

# get bare name
name = os.path.splitext(full_file_path)[0]
if not os.path.exists(name):
with open(f"{name}.txt", "w") as f:
f.write(modified_caption)

if __name__ == "__main__":
print(f"{Fore.CYAN}** Current supported models:{Style.RESET_ALL}")
print(" microsoft/git-base-textcaps")
print(" microsoft/git-large-textcaps")
print(" microsoft/git-large-r-textcaps")
print(" Salesforce/blip2-opt-2.7b - (9GB VRAM or recommend 32GB sys RAM)")
print(" Salesforce/blip2-opt-2.7b-coco - (9GB VRAM or recommend 32GB sys RAM)")
print(" Salesforce/blip2-opt-6.7b - (16.5GB VRAM or recommend 64GB sys RAM)")
print(" Salesforce/blip2-opt-6.7b-coco - (16.5GB VRAM or recommend 64GB sys RAM)")
print()
print(f"{Fore.CYAN} * The following will likely not work on any consumer GPUs or require huge sys RAM on CPU:{Style.RESET_ALL}")
print(" salesforce/blip2-flan-t5-xl")
print(" salesforce/blip2-flan-t5-xl-coco")
print(" salesforce/blip2-flan-t5-xxl")

parser = argparse.ArgumentParser()
parser.add_argument("--data_root", type=str, default="input", help="Path to images")
parser.add_argument("--Blip_location", type=str, default=os.getcwd(), help="Path to Blip Models")
parser.add_argument("--model", type=str, default="salesforce/blip2-opt-2.7b", help="model from huggingface, ex. 'salesforce/blip2-opt-2.7b'")
parser.add_argument("--replace_subject", action="store_true", default=False, help="Replace the first noun in the generated caption with the folder name")
parser.add_argument("--force_cpu", action="store_true", default=False, help="force using CPU even if GPU is available, may be useful to run huge models if you have a lot of system memory")
parser.add_argument("--max_new_tokens", type=int, default=24, help="max length for generated captions")
args = parser.parse_args()

print(f"** Using model: {args.model}")
print(f"** Captioning files in: {args.data_root}")
main(args)
Loading