diff --git a/EveryDream_Tools_4_colab.ipynb b/EveryDream_Tools_4_colab.ipynb new file mode 100644 index 0000000..b18f94a --- /dev/null +++ b/EveryDream_Tools_4_colab.ipynb @@ -0,0 +1,452 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uJfwih4wAVgw" + }, + "source": [ + "# Please read the documentation here before you start.\n", + "\n", + "I suggest reading this doc before you connect to your runtime to avoid using credits or being charged while you figure it out.\n", + "\n", + "[link to old readme, new one is a wip](doc/AUTO_CAPTION.md)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RJxfSai-8pkD", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@markdown # Install Dependencies and connect your Gdrive\n", + "#@markdown This will take a couple minutes, be patient and watch the output for \"DONE!\"\n", + "from IPython.display import clear_output\n", + "import subprocess\n", + "from tqdm.notebook import tqdm\n", + "\n", + "print(\"downloading the required repos to use\")\n", + "#downloading repos\n", + "\n", + "!git clone https://github.com/nawnie/EveryDream.git\n", + "\n", + "#@markdown Creates /content/drive/MyDrive/everydreamlogs/ckpt\n", + "Mount_to_Gdrive = True #@param{type:\"boolean\"} \n", + "\n", + "if Mount_to_Gdrive:\n", + " from google.colab import drive\n", + " drive.mount('/content/drive')\n", + "# Set working directory\n", + "\n", + "%cd EveryDream\n", + "\n", + "!git clone https://github.com/salesforce/BLIP scripts/BLIP\n", + "!pip install -q git+https://github.com/huggingface/transformers \n", + "\n", + "clear_output()\n", + "\n", + "print(\"DONE! now, installing dependcies, this may seem to freeze for a moment at the start do not worry\")\n", + "\n", + "packages = [\n", + "\n", + "# install requirements\n", + "'pandas>=1.3.5',\n", + "'timm',\n", + "'fairscale==0.4.4',\n", + "'diffusers[torch]==0.14.0',\n", + "'timm',\n", + "'aiofiles',\n", + "'colorama',\n", + "'pynvml'\n", + "'tensorrt'\n", + "]\n", + "\n", + "for package in tqdm(packages, desc='Installing packages', unit='package'):\n", + " if isinstance(package, tuple):\n", + " package_name, extra_index_url = package\n", + " cmd = f\"pip install -q {package_name} --extra-index-url {extra_index_url}\"\n", + " else:\n", + " cmd = f\"pip install -q {package}\"\n", + " \n", + " subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "\n", + "!pip install -q nltk #oddly this does not install correctly with above process\n", + "clear_output()\n", + "\n", + "clear_output()\n", + "print(\"DONE! installing dependcies make sure we are using python 3.10.x\")\n", + "!python --version" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Upload your input images or video into the EveryDream/input folder\n", + "#@markdown Run the following cell to create an upload button, allowing you to upload your images directly to this folder. \n", + "#@markdown * it is faster to simply right click the input folder in the file browser available on the left toolbar\n", + "from google.colab import files\n", + "\n", + "uploaded = files.upload()\n", + "\n", + "for name, data in uploaded.items():\n", + " with open(f\"input/{name}\", \"wb\") as f:\n", + " f.write(data)\n", + " print(f\"Uploaded file: {name}\")" + ], + "metadata": { + "cellView": "form", + "id": "pvLcqUEobmAV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Blip, Blip-COCO, T5-FLAN, and Git Captioner\n", + "_____________________________________\n", + "\n", + "\"Blip\", \"Blip-COCO\", \"T5-FLAN\", and \"Git Captioner\" are four transformer-based models developed for image captioning.\n", + "\n", + "* \"Blip\" is trained on a large and diverse dataset of image captions, and is known for its strong performance on a wide range of image captioning tasks and datasets.\n", + "\n", + "* \"Blip-COCO\" is a variant of \"Blip\" that has been fine-tuned on the COCO dataset, a popular benchmark for image captioning. Fine-tuning on COCO can improve a model's performance specifically on this dataset, but may not generalize as well to other datasets or tasks.\n", + "\n", + " * The COCO dataset consists of more than 330,000 images with five captions each, and is known for its high-quality captions and diversity of image types. \n", + "\n", + "\n", + "* \"T5-FLAN\" is similar to \"Blip\" in that it is trained on a large and diverse dataset of image captions. However, it uses a different architecture called the \"FLAN\" (Feature-wise Linear Attention) transformer. The FLAN transformer is designed to better capture long-range dependencies and improve the modeling of feature interactions in the encoder, which can lead to better performance on certain tasks.\n", + "\n", + "* \"Git Captioner\" is a model that generates image captions using a combination of computer vision and natural language processing techniques. It leverages pre-trained models for object detection and recognition, and then generates captions using a transformer-based language model.\n", + "\n", + "When choosing among these models, consider your specific task or dataset needs. If you require a model that can perform well across a wide range of image captioning tasks and datasets, \"Blip\" or \"T5-FLAN\" may be a good choice. If you require strong performance specifically on the COCO dataset, consider fine-tuning \"Blip-COCO\". If you prefer a model that leverages object detection and recognition in addition to language modeling, consider \"Git Captioner\".\n", + "\n", + "In addition to image captioning, \"T5-FLAN\" and \"Git Captioner\" have also been applied to tasks such as image generation and visual question answering.\n", + "\n" + ], + "metadata": { + "id": "NPWIluxMhAm8" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Blip2 uses alot of ram, this cell will clear our ram usage\n", + "#@markdown Run this cell before captioning and if your model crashes while loading.\n", + "import gc\n", + "gc.collect()" + ], + "metadata": { + "cellView": "form", + "id": "23gKI8QGJTyM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4TAICahl-RPn", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@title Dream Captioner\n", + "input_folder = \"\" #@param {type:\"string\"}\n", + "#@markdown * The location of images to be captoned\n", + "model_name = \"Salesforce/blip2-opt-2.7b\" #@param [\"microsoft/git-large-r-textcaps\", \"microsoft/git-large-textcaps\", \"microsoft/git-base-textcaps\", \"Salesforce/blip2-opt-2.7b\", \"Salesforce/blip2-opt-2.7b-coco\", \"Salesforce/blip2-flan-t5-xl\"] {allow-input: true}\n", + "#@markdown * Select a model from the drop down menu, the ones on this menu have been tested to run in this colab enviroment.\n", + "model_storage = \"/content/drive/MyDrive/Blip\" #@param {type:\"string\"}\n", + "#@markdown * Choose where to save these models on the Gdrive\n", + "token_limit = 12 #@param {type:\"slider\", min:10, max:38, step:1}\n", + "#@markdown * Token length\n", + "replace_subject = True #@param {type:\"boolean\"}\n", + "#@markdown * This will find the first noun and replace it with the name of the folder it was in\n", + "#@markdown * EXAMPLE: if i caption images in a folder named Peter Griffin the first noun (person place or thing) in that caption will be replaced by \"Peter Griffin\"\n", + "use_cpu = False #@param {type:\"boolean\"}\n", + "#@markdown * this probably should not be used with colab but its there to try without a gpu\n", + "\n", + "rename = \"\"\n", + "if replace_subject:\n", + " rename = \"--replace_subject\"\n", + "\n", + "cpu = \"\"\n", + "if use_cpu:\n", + " cpu = \"--force_cpu\"\n", + "\n", + "!python caption.py \\\n", + "$cpu \\\n", + "$rename \\\n", + "--model $model_name \\\n", + "--data_root \"$input_folder\" \\\n", + "--Blip_location \"$model_storage\" \\\n", + "--max_new_tokens $token_limit" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Extract Frames from video\n", + "\n", + "#@markdown Here we will use the folder input_vid \n", + "!mkdir output/vid \n", + "\n", + "!python /scripts/extract_video_frames.py \\\n", + "--vid_dir input \\\n", + "--out_dir output/vid \\\n", + "--format png \\\n", + "--interval 10" + ], + "metadata": { + "id": "RDuBL4k8Avz-", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Move the images to the input folder for captioning\n", + "!cp -r output/vid input" + ], + "metadata": { + "id": "Uv8wAHSQAvrm", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Laion Downloader\n", + "\n", + "* --laion_dir: directory with laion parquet files, default is ./laion\n", + "\n", + "* --search_text: csv of words with AND logic, ex \\\"photo,man,dog\\\"\n", + "\n", + "* --out_dir: directory to download files to, ive defaulted this to inputs so they can be captioned \n", + "\n", + "* --log_dir: directory for logs, if ommitted will not log, logs may be large!\n", + "\n", + "* --column:column to search for matches, defaults is 'TEXT', but you could use 'URL' if you wanted\",\n", + "\n", + "* --limit: max number of matching images to download, warning: may be slightly imprecise due to concurrency and http errors, defaults is 100\n", + "\n", + "* --min_hw: min height AND width of image to download, default is 512\n", + " \n", + "* --force: forces a full download of all images, even if no search is provided, USE CAUTION!\n", + "\n", + "* --parquet_skip: skips the first n parquet files on disk, useful to resume\n", + " \n", + "* --verbose: additional logging of URL and TEXT \n", + " \n", + "* --test: skips downloading, for checking filters, use with \"--verbose\"\n" + ], + "metadata": { + "id": "wY2f2LkPGSVa" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/download_laion.py \\\n", + "--laion_dir ./laion \\\n", + "--search_text \"photo,man,dog\" \\\n", + "#--out_dir input \\\n", + "#--log_dir logs \\\n", + "#--column TEXT \\\n", + "#--limit 100 \\\n", + "#--min_hw 512 \\\n", + "#--force False \\\n", + "#--parquet_skip 0 \\\n", + "#--Verbose False \\\n", + "#--test not \\\n" + ], + "metadata": { + "id": "cxw60TTmEy2C" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here we can take our now captioned images and replace generic terms with our subjects\n", + "\n", + "* --find: will search for a word in this case man\n", + "\n", + "* --replace: will replace our found word with in this case bob smith\n", + "\n", + "* --append_only: this will allow us to add a tag at he end " + ], + "metadata": { + "id": "EBdLelNpDjYc" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/filename_replace.py \\\n", + "--img_dir output \\\n", + "--find \"man\" \\\n", + "--replace \"bob smith\"" + ], + "metadata": { + "id": "6Y1md3OHAvhw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we can chose to create text files based on our file names, this is usefull for images with very long discriptions or tag list, windows has a limit of 256 characters, and files will not transfer correctly to a windows program if they are longer, moving these files in a zip is fine however and causes no issues\n" + ], + "metadata": { + "id": "W0MspWmXJQuc" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/createtxtfromfilename.py" + ], + "metadata": { + "id": "BpvenvyQJr9b" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Compress our images " + ], + "metadata": { + "id": "boVkDsiWJ_-P" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/compress_img.py \\\n", + "--img_dir output \\\n", + "--out_dir output/compressed_images \\\n", + "--max_mp 1.5 \n", + "#--overwrite False \\\n", + "#--Quality 95 \\\n", + "#--noresize False \\\n", + "#--delete \\" + ], + "metadata": { + "id": "F6QYfylhKAII" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HBrWnu1C_lN9" + }, + "source": [ + "## Download your DataSet from EveryDream/output\n", + "\n", + "If you're on a colab you can use the cell below to push your output to your Gdrive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ldW2sDLcAVgz" + }, + "outputs": [], + "source": [ + "\n", + "!mkdir /content/drive/MyDrive/Auto_Data_sets\n", + "!cp -r output/ /content/drive/MyDrive/Auto_Data_sets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B-HFqbP4AVgz" + }, + "source": [ + "## If not on colab/gdrive, the following will zip up your files for extraction\n", + "\n", + "You'll still need to use your runtime's own download feature to download the zip.\n", + "\n", + "![output zip](https://github.com/victorchall/EveryDream/blob/main/demo/output_zip.png?raw=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SVa80mrKAVg0" + }, + "outputs": [], + "source": [ + "import patoolib\n", + "from google.colab import files\n", + "import os\n", + "\n", + "Zip_Location = \"/content/drive/MyDrive/output\" #@param {type:\"string\"}\n", + "#@markdown * this is the location containing your captioned images\n", + "\n", + "!mkdir output/zip\n", + "!zip -r output/zip/output.zip \"$Zip_Location\"\n", + "\n", + "if os.path.exists(output/zip/output.zip):\n", + " files.download(output/zip/output.zip)\n", + " !rm output/zip/output.zip\n", + "else:\n", + " print(\"Error: File not found at specified path.\")\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm", + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3.10.5 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.5" + }, + "vscode": { + "interpreter": { + "hash": "faf4a6abb601e3a9195ce3e9620411ceec233a951446de834cdf28542d2d93b4" + } + }, + "accelerator": "GPU", + "gpuClass": "standard" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/NEW_EveryDream_Tools_4_colab.ipynb b/NEW_EveryDream_Tools_4_colab.ipynb new file mode 100644 index 0000000..2b41fe0 --- /dev/null +++ b/NEW_EveryDream_Tools_4_colab.ipynb @@ -0,0 +1,561 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uJfwih4wAVgw" + }, + "source": [ + "# Please read the documentation here before you start.\n", + "\n", + "I suggest reading this doc before you connect to your runtime to avoid using credits or being charged while you figure it out.\n", + "\n", + "[link to old readme, new one is a wip](doc/AUTO_CAPTION.md)" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title # Install python 3.10 and connect Gdrive\n", + "#@markdown # This will show a runtime error, it's ok, it's on purpose to restart the kernel to update python.\n", + "import os\n", + "import time\n", + "import sys\n", + "from IPython.display import clear_output\n", + "\n", + "\n", + "#@markdown Optional connect Gdrive But strongly recommended\n", + "#@markdown This will let you put all your training data and checkpoints directly on your drive. Much faster/easier to continue later, less setup time.\n", + "\n", + "#@markdown Creates /content/drive/MyDrive/everydreamlogs/ckpt\n", + "Mount_to_Gdrive = True #@param{type:\"boolean\"} \n", + "\n", + "if Mount_to_Gdrive:\n", + " from google.colab import drive\n", + " drive.mount('/content/drive')\n", + "\n", + " !mkdir -p /content/drive/MyDrive/everydreamlogs/ckpt\n", + "\n", + "# Define a custom function to display a progress bar\n", + "def display_progress_bar(progress, total, prefix=\"\"):\n", + " sys.stdout.write(f\"\\r{prefix}[{'=' * progress}>{' ' * (total - progress - 1)}] {progress + 1}/{total}\")\n", + " sys.stdout.flush()\n", + "\n", + "total_steps = 10\n", + "current_step = 0\n", + "\n", + "!pip install patool --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "!pip install transformers==4.25.1 --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "\n", + "!pip install watchdog --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "!pip install matplotlib --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "# Install the alive-package library\n", + "!pip install alive-progress --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "\n", + "# Install the tqdm library\n", + "!pip install tqdm --progress-bar on --quiet\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "# Download the py310.sh script\n", + "!wget https://github.com/korakot/kora/releases/download/v0.10/py310.sh -q\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "# Run the py310.sh script\n", + "try:\n", + " output = os.popen('bash ./py310.sh -b -f -p /usr/local 2>&1').read()\n", + " total_lines = len(output.splitlines())\n", + " for i, line in enumerate(output.splitlines()):\n", + " clear_output(wait=True)\n", + " display_progress_bar(i, total_lines, \"install progress:\")\n", + "except Exception as e:\n", + " print(str(e))\n", + "\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "# Install the py310 kernel\n", + "!python -m ipykernel install --name \"py310\" --user > /dev/null 2>&1\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "\n", + "# Clear output\n", + "!rm /content/py310.sh\n", + "current_step += 1\n", + "display_progress_bar(current_step, total_steps, \"install progress:\")\n", + "clear_output()\n", + "time.sleep(1) #needed to clear is before kill\n", + "os.kill(os.getpid(), 9)\n", + "print(\"\\nInstallation completed.\")\n" + ], + "metadata": { + "id": "Z_ZHfnQ52dg9", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RJxfSai-8pkD", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@markdown # Finish Install Dependencies into the new python\n", + "#@markdown This will take a couple minutes, be patient and watch the output for \"DONE!\"\n", + "from IPython.display import clear_output\n", + "import subprocess\n", + "from tqdm.notebook import tqdm\n", + "\n", + "print(\"downloading the required repos to use\")\n", + "#downloading repos\n", + "\n", + "!git clone https://github.com/victorchall/EveryDream.git\n", + "\n", + "# Set working directory\n", + "\n", + "%cd EveryDream\n", + "\n", + "!git clone https://github.com/salesforce/BLIP scripts/BLIP\n", + "!wget -O caption.py https://raw.githubusercontent.com/nawnie/New-Ed-Tools/main/caption.py\n", + "!pip install git+https://github.com/huggingface/transformers \n", + "\n", + "clear_output()\n", + "\n", + "print(\"DONE! now, installing dependcies, this may seem to freeze for a moment at the start do not worry\")\n", + "\n", + "packages = [\n", + " ('torch==1.13.1+cu117 torchvision==0.14.1+cu117', 'https://download.pytorch.org/whl/cu117'),\n", + " 'diffusers[torch]==0.13.0',\n", + " 'pynvml==11.4.1',\n", + " 'bitsandbytes==0.35.0',\n", + " 'pandas>=1.3.5'\n", + " 'aiofiles'\n", + " 'timm'\n", + " 'fairscale==0.4.4'\n", + " 'ftfy==6.1.1',\n", + " 'aiohttp==3.8.3',\n", + " 'tensorboard>=2.11.0',\n", + " 'protobuf==3.20.1',\n", + " 'wandb==0.13.6',\n", + " 'pyre-extensions==0.0.23',\n", + " 'xformers==0.0.16',\n", + " 'pytorch-lightning==1.6.5',\n", + " 'OmegaConf==2.2.3',\n", + " 'numpy==1.23.5',\n", + " 'colorama',\n", + " 'keyboard',\n", + " 'triton',\n", + " 'lion-pytorch'\n", + "]\n", + "\n", + "for package in tqdm(packages, desc='Installing packages', unit='package'):\n", + " if isinstance(package, tuple):\n", + " package_name, extra_index_url = package\n", + " cmd = f\"pip install -q {package_name} --extra-index-url {extra_index_url}\"\n", + " else:\n", + " cmd = f\"pip install -q {package}\"\n", + " \n", + " subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "\n", + "!pip install nltk #oddly this does not install correctly with above process\n", + "\n", + "clear_output()\n", + "\n", + "\n", + "!python utils/get_yamls.py\n", + "clear_output()\n", + "print(\"DONE! installing dependcies make sure we are using python 3.10.x\")\n", + "!python --version\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Upload your input images or video into the EveryDream/input folder\n", + "#@markdown Run the following cell to create an upload button, allowing you to upload your images directly to this folder. \n", + "#@markdown * it is faster to simply right click the input folder in the file browser available on the left toolbar\n", + "from google.colab import files\n", + "\n", + "uploaded = files.upload()\n", + "\n", + "for name, data in uploaded.items():\n", + " with open(f\"input/{name}\", \"wb\") as f:\n", + " f.write(data)\n", + " print(f\"Uploaded file: {name}\")\n", + "\n" + ], + "metadata": { + "cellView": "form", + "id": "pvLcqUEobmAV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#Blip, Blip-COCO, T5-FLAN, and Git Captioner\n", + "_____________________________________\n", + "\n", + "\"Blip\", \"Blip-COCO\", \"T5-FLAN\", and \"Git Captioner\" are four transformer-based models developed for image captioning.\n", + "\n", + "* \"Blip\" is trained on a large and diverse dataset of image captions, and is known for its strong performance on a wide range of image captioning tasks and datasets.\n", + "\n", + "* \"Blip-COCO\" is a variant of \"Blip\" that has been fine-tuned on the COCO dataset, a popular benchmark for image captioning. Fine-tuning on COCO can improve a model's performance specifically on this dataset, but may not generalize as well to other datasets or tasks.\n", + "\n", + " * The COCO dataset consists of more than 330,000 images with five captions each, and is known for its high-quality captions and diversity of image types. \n", + "\n", + "\n", + "* \"T5-FLAN\" is similar to \"Blip\" in that it is trained on a large and diverse dataset of image captions. However, it uses a different architecture called the \"FLAN\" (Feature-wise Linear Attention) transformer. The FLAN transformer is designed to better capture long-range dependencies and improve the modeling of feature interactions in the encoder, which can lead to better performance on certain tasks.\n", + "\n", + "* \"Git Captioner\" is a model that generates image captions using a combination of computer vision and natural language processing techniques. It leverages pre-trained models for object detection and recognition, and then generates captions using a transformer-based language model.\n", + "\n", + "When choosing among these models, consider your specific task or dataset needs. If you require a model that can perform well across a wide range of image captioning tasks and datasets, \"Blip\" or \"T5-FLAN\" may be a good choice. If you require strong performance specifically on the COCO dataset, consider fine-tuning \"Blip-COCO\". If you prefer a model that leverages object detection and recognition in addition to language modeling, consider \"Git Captioner\".\n", + "\n", + "In addition to image captioning, \"T5-FLAN\" and \"Git Captioner\" have also been applied to tasks such as image generation and visual question answering.\n", + "\n" + ], + "metadata": { + "id": "NPWIluxMhAm8" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Blip2 uses alot of ram, this cell will clear our ram usage\n", + "#@markdown Run this cell before captioning and if your model crashes while loading.\n", + "import gc\n", + "gc.collect()" + ], + "metadata": { + "cellView": "form", + "id": "23gKI8QGJTyM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4TAICahl-RPn", + "cellView": "form" + }, + "outputs": [], + "source": [ + "#@title Dream Captioner\n", + "input_folder = \"/content/EveryDream/input\" #@param {type:\"string\"}\n", + "#@markdown * The location of images to be captoned\n", + "model_name = \"Salesforce/blip2-opt-2.7b-coco\" #@param [\"microsoft/git-large-r-textcaps\", \"microsoft/git-large-textcaps\", \"microsoft/git-base-textcaps\", \"Salesforce/blip2-opt-2.7b\", \"Salesforce/blip2-opt-2.7b-coco\", \"Salesforce/blip2-flan-t5-xl\"]\n", + "#@markdown * Select a model from the drop down menu, the ones on this menu have been tested to run in this colab enviroment.\n", + "model_storage = \"/content/\" #@param {type:\"string\"}\n", + "#@markdown * Choose where to save these models on the Gdrive\n", + "token_limit = 24 #@param {type:\"slider\", min:10, max:38, step:1}\n", + "#@markdown * Token length\n", + "replace_subject = True #@param {type:\"boolean\"}\n", + "#@markdown * This will find the first noun and replace it with the name of the folder it was in\n", + "#@markdown * EXAMPLE: if i caption images in a folder named Peter Griffin the first noun (person place or thing) in that caption will be replaced by \"Peter Griffin\"\n", + "use_cpu = False #@param {type:\"boolean\"}\n", + "#@markdown * this probably should not be used with colab but its there to try without a gpu\n", + "\n", + "rename = \"\"\n", + "if replace_subject:\n", + " rename = \"--replace_subject\"\n", + "\n", + "cpu = \"\"\n", + "if use_cpu:\n", + " cpu = \"--force_cpu\"\n", + "\n", + "!python caption.py \\\n", + "$cpu \\\n", + "$rename \\\n", + "--model $model_name \\\n", + "--data_root \"$input_folder\" \\\n", + "--Blip_location \"$model_storage\" \\\n", + "--max_new_tokens $token_limit\n" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Extract Frames from video\n", + "\n", + "#@markdown Here we will use the folder input_vid \n", + "!mkdir output/vid \n", + "\n", + "!python /scripts/extract_video_frames.py \\\n", + "--vid_dir input \\\n", + "--out_dir output/vid \\\n", + "--format png \\\n", + "--interval 10 " + ], + "metadata": { + "id": "RDuBL4k8Avz-", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Move the images to the input folder for captioning\n", + "!cp -r output/vid input" + ], + "metadata": { + "id": "Uv8wAHSQAvrm", + "cellView": "form" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Laion Downloader\n", + "\n", + "* --laion_dir: directory with laion parquet files, default is ./laion\n", + "\n", + "* --search_text: csv of words with AND logic, ex \\\"photo,man,dog\\\"\n", + "\n", + "* --out_dir: directory to download files to, ive defaulted this to inputs so they can be captioned \n", + "\n", + "* --log_dir: directory for logs, if ommitted will not log, logs may be large!\n", + "\n", + "* --column:column to search for matches, defaults is 'TEXT', but you could use 'URL' if you wanted\",\n", + "\n", + "* --limit: max number of matching images to download, warning: may be slightly imprecise due to concurrency and http errors, defaults is 100\n", + "\n", + "* --min_hw: min height AND width of image to download, default is 512\n", + " \n", + "* --force: forces a full download of all images, even if no search is provided, USE CAUTION!\n", + "\n", + "* --parquet_skip: skips the first n parquet files on disk, useful to resume\n", + " \n", + "* --verbose: additional logging of URL and TEXT \n", + " \n", + "* --test: skips downloading, for checking filters, use with \"--verbose\"\n" + ], + "metadata": { + "id": "wY2f2LkPGSVa" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/download_laion.py \\\n", + "--laion_dir ./laion \\\n", + "--search_text \"photo,man,dog\" \\\n", + "#--out_dir input \\\n", + "#--log_dir logs \\\n", + "#--column TEXT \\\n", + "#--limit 100 \\\n", + "#--min_hw 512 \\\n", + "#--force False \\\n", + "#--parquet_skip 0 \\\n", + "#--Verbose False \\\n", + "#--test not \\\n" + ], + "metadata": { + "id": "cxw60TTmEy2C" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here we can take our now captioned images and replace generic terms with our subjects\n", + "\n", + "* --find: will search for a word in this case man\n", + "\n", + "* --replace: will replace our found word with in this case bob smith\n", + "\n", + "* --append_only: this will allow us to add a tag at he end " + ], + "metadata": { + "id": "EBdLelNpDjYc" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/filename_replace.py \\\n", + "--img_dir output \\\n", + "--find \"man\" \\\n", + "--replace \"bob smith\"" + ], + "metadata": { + "id": "6Y1md3OHAvhw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Now we can chose to create text files based on our file names, this is usefull for images with very long discriptions or tag list, windows has a limit of 256 characters, and files will not transfer correctly to a windows program if they are longer, moving these files in a zip is fine however and causes no issues\n" + ], + "metadata": { + "id": "W0MspWmXJQuc" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/createtxtfromfilename.py" + ], + "metadata": { + "id": "BpvenvyQJr9b" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Compress our images " + ], + "metadata": { + "id": "boVkDsiWJ_-P" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/compress_img.py \\\n", + "--img_dir output \\\n", + "--out_dir output/compressed_images \\\n", + "--max_mp 1.5 \n", + "#--overwrite False \\\n", + "#--Quality 95 \\\n", + "#--noresize False \\\n", + "#--delete \\" + ], + "metadata": { + "id": "F6QYfylhKAII" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HBrWnu1C_lN9" + }, + "source": [ + "## Download your DataSet from EveryDream/output\n", + "\n", + "If you're on a colab you can use the cell below to push your output to your Gdrive." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ldW2sDLcAVgz" + }, + "outputs": [], + "source": [ + "\n", + "!mkdir /content/drive/MyDrive/Auto_Data_sets\n", + "!cp -r output/ /content/drive/MyDrive/Auto_Data_sets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B-HFqbP4AVgz" + }, + "source": [ + "## If not on colab/gdrive, the following will zip up your files for extraction\n", + "\n", + "You'll still need to use your runtime's own download feature to download the zip.\n", + "\n", + "![output zip](https://github.com/victorchall/EveryDream/blob/main/demo/output_zip.png?raw=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SVa80mrKAVg0" + }, + "outputs": [], + "source": [ + "import patoolib\n", + "from google.colab import files\n", + "import os\n", + "\n", + "Zip_Location = \"/content/drive/MyDrive/output\" #@param {type:\"string\"}\n", + "#@markdown * this is the location containing your captioned images\n", + "\n", + "!mkdir output/zip\n", + "!zip -r output/zip/output.zip \"$Zip_Location\"\n", + "\n", + "if os.path.exists(output/zip/output.zip):\n", + " files.download(output/zip/output.zip)\n", + " !rm output/zip/output.zip\n", + "else:\n", + " print(\"Error: File not found at specified path.\")\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "machine_shape": "hm", + "include_colab_link": true + }, + "kernelspec": { + "display_name": "Python 3.10.5 ('.venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.5" + }, + "vscode": { + "interpreter": { + "hash": "faf4a6abb601e3a9195ce3e9620411ceec233a951446de834cdf28542d2d93b4" + } + }, + "accelerator": "GPU", + "gpuClass": "standard" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/balance_data.py b/balance_data.py new file mode 100644 index 0000000..e1124eb --- /dev/null +++ b/balance_data.py @@ -0,0 +1,34 @@ +import os +from tqdm import tqdm +from pathlib import Path +import argparse + +def count_images(input_dir): + folder_counts = {} + total_images = 0 + total_folders = 0 + for root, dirs, files in os.walk(input_dir): + image_count = sum(1 for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp'))) + if image_count > 0: + folder_counts[root] = image_count + total_images += image_count + total_folders += 1 + + return folder_counts, total_images / total_folders if total_folders > 0 else 0 + +def write_multiplier(input_dir): + folder_counts, avg_count = count_images(input_dir) + progress_bar = tqdm(total=len(folder_counts), desc="Writing multipliers", position=0, leave=True) + for folder, count in folder_counts.items(): + multiplier = avg_count / count if count > 0 else 0 + with open(Path(folder) / 'multiply.txt', 'w') as f: + f.write(str(multiplier)) + progress_bar.update(1) + progress_bar.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Calculate image multiplication factors for subfolders") + parser.add_argument("input_dir", metavar="input_dir", type=str, help="the input directory to calculate multipliers") + args = parser.parse_args() + + write_multiplier(args.input_dir) diff --git a/balance_data_Readme.txt b/balance_data_Readme.txt new file mode 100644 index 0000000..7f34f72 --- /dev/null +++ b/balance_data_Readme.txt @@ -0,0 +1,39 @@ +# Data set Balancer via Image Multiplier Calculator + +This script calculates the multiplier that should be applied to the number of images in each subfolder of a given directory, to equalize the number of images across all subfolders. It writes the calculated multiplier to a text file named `multiply.txt` in each subfolder. + +For example, if the average number of images across all subfolders is 50, and a particular subfolder has 25 images, then the multiplier for that folder would be 2. Conversely, if a folder has 100 images, the multiplier would be 0.5. + +## Requirements +- Python 3.7 or higher +- tqdm library (`pip install tqdm`) + +## Usage + +```bash +python image_multiplier.py /path/to/your/directory +``` + +Replace `/path/to/your/directory` with the path to the directory you want to process. + +## Output + +The script will create a `multiply.txt` file in each subfolder of the input directory. This file will contain a single number, which is the multiplier for that folder. + +## Progress + +The script uses a progress bar to indicate its progress. The progress bar updates after each subfolder's multiplier is calculated and written. + +## How it Works + +The script works by first traversing the input directory and counting the number of images in each subfolder. It then calculates the average number of images across all subfolders. + +Next, the script calculates a multiplier for each subfolder by dividing the average count by the number of images in that subfolder. If a subfolder contains no images, its multiplier is set to 0. + +Finally, the script writes each subfolder's multiplier to a `multiply.txt` file in that subfolder. + +## Limitations + +This script counts only files with the following extensions as images: .png, .jpg, .jpeg, .gif, .webp. + +If the script is run multiple times on the same directory, it will overwrite the existing `multiply.txt` files. \ No newline at end of file diff --git a/bulk rename_Readme.txt b/bulk rename_Readme.txt new file mode 100644 index 0000000..6d8bc81 --- /dev/null +++ b/bulk rename_Readme.txt @@ -0,0 +1,42 @@ +# Bulk Image Renaming Utility + +## Overview +The Image Renaming Utility is a command-line Python script that renames image files in a specified directory and its subdirectories. The script replaces colons, backslashes, and underscores in the original file and directory names with commas, spaces, and hyphens, respectively. It also removes a specific string from the new names and adds an index number to each file. + +## Requirements +- Python 3.6 or later + +## Installation +1. Download the script `image_renaming_util.py` to your desired folder. +2. Ensure you have Python 3.6 or later installed. You can check your Python version by running `python --version` in your command prompt or terminal. + +## Usage +Run the script in the command prompt or terminal with the desired options: + +``` +python image_renaming_util.py [options] +``` + +### Options +``` +--img_dir Path to the image directory (default: 'input') +``` + +### Example +``` +python image_renaming_util.py --img_dir images +``` + +This command will rename image files in the 'images' directory according to the specified rules. + +## Customization +To customize the renaming rules, modify the following lines in the script: +- Replace the specific string you want to remove: `new_root = new_root.replace("C, , Users, shawn, Desktop,", "")` +- Change the characters being replaced: `new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-")` + +## Notes +- Ensure you have the necessary permissions to read and write to the input directory and the image files within it. +- Be cautious when running the script on important files, as the renaming process is irreversible. Consider creating a backup before using the utility. + +## License +This project is open source and available under the [MIT License](https://opensource.org/licenses/MIT). \ No newline at end of file diff --git a/bulk_rename.py b/bulk_rename.py new file mode 100644 index 0000000..966235c --- /dev/null +++ b/bulk_rename.py @@ -0,0 +1,36 @@ +import os +import argparse + +# Set up argument parser +def get_args(**parser_kwargs): + """Get command-line options.""" + parser = argparse.ArgumentParser(**parser_kwargs) + parser.add_argument( + "--img_dir", + type=str, + default="input", + help="path to image directory (default: 'input')", + ) + args = parser.parse_args() + return args + +# Get the input directory from parsed arguments +args = get_args() +input_directory = args.img_dir + +# Iterate through directories and files +for root, dirs, files in os.walk(input_directory): + # Replace ":" and "\\" with a space + new_root = root.replace(":", ", ").replace("\\", ", ").replace("_", "-") + # Remove the string "c Users shawn Desktop" + new_root = new_root.replace("C, , Users, shawn, Desktop,", "") + + # Iterate through files and rename them + for i, file in enumerate(files): + old_path = os.path.join(root, file) + new_name = new_root + "_" + str(i + 1) + os.path.splitext(file)[1] + new_path = os.path.join(root, new_name) + + # Rename the file + os.rename(old_path, new_path) + diff --git a/caption.py b/caption.py new file mode 100644 index 0000000..f5068bc --- /dev/null +++ b/caption.py @@ -0,0 +1,170 @@ +""" +Copyright [2022-2023] Victor C Hall + +Licensed under the GNU Affero General Public License; +You may not use this code except in compliance with the License. +You may obtain a copy of the License at + + https://www.gnu.org/licenses/agpl-3.0.en.html + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os + +from PIL import Image +import argparse +import requests +from transformers import Blip2Processor, Blip2ForConditionalGeneration, GitProcessor, GitForCausalLM, AutoModel, AutoProcessor + +import torch +from pynvml import * + +import time +from colorama import Fore, Style + +import nltk +from nltk.tokenize import word_tokenize +from nltk.tag import pos_tag + +nltk.download('punkt') +nltk.download('averaged_perceptron_tagger') + +SUPPORTED_EXT = [".jpg", ".png", ".jpeg", ".bmp", ".jfif", ".webp"] + + + +def get_gpu_memory_map(): + """Get the current gpu usage. + Returns + ------- + usage: dict + Keys are device ids as integers. + Values are memory usage as integers in MB. + """ + nvmlInit() + handle = nvmlDeviceGetHandleByIndex(0) + info = nvmlDeviceGetMemoryInfo(handle) + return info.used/1024/1024 + +def create_blip2_processor(model_name, device, dtype=torch.float16, cache_dir=None): + processor = Blip2Processor.from_pretrained(model_name, cache_dir=cache_dir) + model = Blip2ForConditionalGeneration.from_pretrained( + args.model, torch_dtype=dtype, cache_dir=cache_dir + ) + model.to(device) + model.eval() + print(f"BLIP2 Model loaded: {model_name}") + return processor, model + + +def create_git_processor(model_name, device, dtype=torch.float16, cache_dir=None): + processor = GitProcessor.from_pretrained(model_name, cache_dir=cache_dir) + model = GitForCausalLM.from_pretrained( + args.model, torch_dtype=dtype, cache_dir=cache_dir + ) + model.to(device) + model.eval() + print(f"GIT Model loaded: {model_name}") + return processor, model + + +def create_auto_processor(model_name, device, dtype=torch.float16): + processor = AutoProcessor.from_pretrained(model_name) + model = AutoModel.from_pretrained( + args.model, torch_dtype=dtype + ) + model.to(device) + model.eval() + print("Auto Model loaded") + return processor, model + +def replace_first_noun_with_folder_name(caption, folder_name): + tagged_caption = pos_tag(word_tokenize(caption)) + for idx, (word, pos) in enumerate(tagged_caption): + if pos.startswith("N"): + tagged_caption[idx] = (folder_name, pos) + break + return " ".join([word for word, _ in tagged_caption]) + +def main(args): + device = "cuda" if torch.cuda.is_available() and not args.force_cpu else "cpu" + dtype = torch.float32 if args.force_cpu else torch.float16 + + + cache_dir = os.path.join(args.Blip_location, 'cache') + + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + + if "salesforce/blip2-" in args.model.lower(): + print(f"Using BLIP2 model: {args.model}") + processor, model = create_blip2_processor(args.model, device, dtype, cache_dir=cache_dir) + elif "microsoft/git-" in args.model.lower(): + print(f"Using GIT model: {args.model}") + processor, model = create_git_processor(args.model, device, dtype, cache_dir=cache_dir) + else: + # try to use auto model? doesn't work with blip/git + processor, model = create_auto_processor(args.model, device, dtype) + + # os.walk all files in args.data_root recursively + for root, dirs, files in os.walk(args.data_root): + for file in files: + # get file extension + ext = os.path.splitext(file)[1] + if ext.lower() in SUPPORTED_EXT: + full_file_path = os.path.join(root, file) + image = Image.open(full_file_path) + start_time = time.time() + + inputs = processor(images=image, return_tensors="pt", max_new_tokens=args.max_new_tokens) + inputs = {key: tensor.to(device, dtype) for key, tensor in inputs.items()} + + generated_ids = model.generate(**inputs) + generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() + folder_name = os.path.basename(root) + if args.replace_subject: + modified_caption = replace_first_noun_with_folder_name(generated_text, folder_name) + else: + modified_caption = generated_text + print(f"file: {file}, caption: {modified_caption}") + exec_time = time.time() - start_time + print(f" Time for last caption: {exec_time} sec. GPU memory used: {get_gpu_memory_map()} MB") + + # get bare name + name = os.path.splitext(full_file_path)[0] + if not os.path.exists(name): + with open(f"{name}.txt", "w") as f: + f.write(modified_caption) + +if __name__ == "__main__": + print(f"{Fore.CYAN}** Current supported models:{Style.RESET_ALL}") + print(" microsoft/git-base-textcaps") + print(" microsoft/git-large-textcaps") + print(" microsoft/git-large-r-textcaps") + print(" Salesforce/blip2-opt-2.7b - (9GB VRAM or recommend 32GB sys RAM)") + print(" Salesforce/blip2-opt-2.7b-coco - (9GB VRAM or recommend 32GB sys RAM)") + print(" Salesforce/blip2-opt-6.7b - (16.5GB VRAM or recommend 64GB sys RAM)") + print(" Salesforce/blip2-opt-6.7b-coco - (16.5GB VRAM or recommend 64GB sys RAM)") + print() + print(f"{Fore.CYAN} * The following will likely not work on any consumer GPUs or require huge sys RAM on CPU:{Style.RESET_ALL}") + print(" salesforce/blip2-flan-t5-xl") + print(" salesforce/blip2-flan-t5-xl-coco") + print(" salesforce/blip2-flan-t5-xxl") + + parser = argparse.ArgumentParser() + parser.add_argument("--data_root", type=str, default="input", help="Path to images") + parser.add_argument("--Blip_location", type=str, default=os.getcwd(), help="Path to Blip Models") + parser.add_argument("--model", type=str, default="salesforce/blip2-opt-2.7b", help="model from huggingface, ex. 'salesforce/blip2-opt-2.7b'") + parser.add_argument("--replace_subject", action="store_true", default=False, help="Replace the first noun in the generated caption with the folder name") + parser.add_argument("--force_cpu", action="store_true", default=False, help="force using CPU even if GPU is available, may be useful to run huge models if you have a lot of system memory") + parser.add_argument("--max_new_tokens", type=int, default=24, help="max length for generated captions") + args = parser.parse_args() + + print(f"** Using model: {args.model}") + print(f"** Captioning files in: {args.data_root}") + main(args) diff --git a/dupe_finder.py b/dupe_finder.py new file mode 100644 index 0000000..c2291e7 --- /dev/null +++ b/dupe_finder.py @@ -0,0 +1,90 @@ +import os +from PIL import Image +from tqdm import tqdm +import numpy as np +import sys +import argparse +from imagehash import average_hash, phash +from pathlib import Path + +def hash_image(image_path, accurate=False): + image = Image.open(image_path) + image = image.convert("L").resize((8, 8), Image.ANTIALIAS) # Convert the image to grayscale and resize it + + if accurate: + original_hash = str(phash(image)) + flipped_hash = str(phash(image.transpose(Image.FLIP_LEFT_RIGHT))) + else: + original_hash = str(average_hash(image)) + flipped_hash = str(average_hash(image.transpose(Image.FLIP_LEFT_RIGHT))) + + return original_hash, flipped_hash + +def find_duplicates(input_dir, quick=False, accurate=False): + image_files = [] + for dirpath, dirnames, filenames in os.walk(input_dir): + for filename in filenames: + if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): + full_path = os.path.join(dirpath, filename) + image_files.append(full_path) + + duplicates = [] + image_hashes = {} + + print("Hashing images...") + hashing_progress_bar = tqdm(total=len(image_files), desc="Hashing", position=0, leave=True) + + save_interval = int(len(image_files) * 0.1) # Save progress every 10% + if save_interval == 0: + save_interval = 1 + + hash_file_path = os.path.join(input_dir, "image_hashes.txt") + + for i, file1 in enumerate(image_files): + original_hash, flipped_hash = hash_image(file1, accurate) + if original_hash in image_hashes: + duplicates.append((file1, image_hashes[original_hash])) + elif flipped_hash in image_hashes: + duplicates.append((file1, image_hashes[flipped_hash])) + else: + image_hashes[original_hash] = file1 + image_hashes[flipped_hash] = file1 + + # Save progress every 10% + if i % save_interval == 0: + with open(hash_file_path, "w") as hash_file: + for hash_str, file_path in image_hashes.items(): + hash_file.write(f"{hash_str},{file_path}\n") + + hashing_progress_bar.update(1) + + hashing_progress_bar.close() + + print("Duplicates found:") + for duplicate, original in duplicates: + print(f"Duplicate: {duplicate}, Original: {original}") + + # Move duplicates to a new folder + move_duplicates(duplicates, input_dir) + +def move_duplicates(duplicates, input_dir): + dupe_dir = os.path.join(input_dir, "duplicates") + if not os.path.exists(dupe_dir): + os.makedirs(dupe_dir) + + for dupe, original in duplicates: + dupe_file_path = Path(dupe) + new_file_path = os.path.join(dupe_dir, dupe_file_path.name) + os.rename(dupe, new_file_path) + + print(f"Moved {len(duplicates)} duplicate files to {dupe_dir}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Find and move duplicate images in a directory.") + parser.add_argument("input_dir", metavar="input_dir", type=str, help="the input directory to search for duplicates") + parser.add_argument("--quick", action="store_true", help="use quick comparison method (average hash)") + parser.add_argument("--accurate", action="store_true", help="use accurate comparison method (perceptual hash)") + args = parser.parse_args() + + find_duplicates(args.input_dir, args.quick, args.accurate) + diff --git a/dupe_finder_Readme.txt b/dupe_finder_Readme.txt new file mode 100644 index 0000000..eee1870 --- /dev/null +++ b/dupe_finder_Readme.txt @@ -0,0 +1,35 @@ +# Duplicate Image Finder + +This script helps you find and move duplicate images in a given directory. It supports various image formats, including PNG, JPG, JPEG, GIF, and WEBP. The script can compare images by their content or by their names in a quick mode. + +## Requirements + +- Python 3.6 or higher +- Pillow (PIL) library +- tqdm library + +To install the required libraries, run: + +## Usage + +To use the script, navigate to the directory containing the script and run the following command: + +- : The input directory containing the images you want to check for duplicates. +- --quick: (Optional) Use this flag to enable quick mode, which compares images by their names instead of their content. + +The script will create a folder named "dupe" within the input directory and move the detected duplicate images into it. +This is done instead of deleting so the user can double check on the results + +## Example + +To find and move duplicate images in the "C:\Users\username\Desktop\Data_set" directory, run: +python Dupe_finder.py "C:\Users\username\Desktop\Data_set" + +To use quick mode for the same directory, run: +python Dupe_finder.py "C:\Users\username\Desktop\Data_set" --quick + +## How it works + +The script first indexes the image hashes and stores them in a file named "imagehashes.txt" within the input directory. It saves the progress every 10% to ensure that most of the progress is retained in case of a crash. The script then compares the images to find duplicates and moves them to the "dupe" folder. + +In quick mode, the script compares images by their names instead of their content, which can be faster but far less accurate. (uses string before _ in filenames) \ No newline at end of file diff --git a/image compress_Readme.txt b/image compress_Readme.txt new file mode 100644 index 0000000..4614f99 --- /dev/null +++ b/image compress_Readme.txt @@ -0,0 +1,47 @@ +# Image Compression Utility + +## Overview +The Image Compression Utility is a command-line Python script that compresses images in a specified directory and its subdirectories to a maximum megapixel size. The script supports JPEG, PNG, and WebP formats. The tool can also fix image orientation, and optionally delete the original files after processing. + +## Requirements +- Python 3.6 or later +- Pillow library: Install by running `pip install pillow` + +## Installation +1. Download the script `image_compression_util.py` to your desired folder. +2. Ensure you have Python 3.6 or later installed. You can check your Python version by running `python --version` in your command prompt or terminal. +3. Install the Pillow library by running `pip install pillow`. + +## Usage +Run the script in the command prompt or terminal with the desired options: + +``` +python image_compression_util.py [options] +``` + +### Options +``` +--img_dir Path to the image directory (default: 'input') +--out_dir Path to the output directory (default: IMG_DIR) +--max_mp Maximum megapixels (default: 1.5) +--quality Save quality (default: 95, range: 0-100, suggested: 90+) +--overwrite Overwrite files in the output directory +--noresize Do not resize, just fix orientation +--delete Delete original files after processing (default: True) +``` + +### Example +``` +python image_compression_util.py --img_dir images --out_dir compressed --max_mp 2 --quality 90 --overwrite --delete +``` + +This command will compress images in the 'images' directory, save the compressed images to the 'compressed' directory, with a maximum size of 2 megapixels, a quality of 90, and delete the original files after processing. + +## Notes +- The supported image formats are JPEG, PNG, and WebP. +- The script utilizes multi-threading for better performance on multi-core processors. +- Ensure you have the necessary permissions to read and write to the input and output directories, as well as the image files within them. + +## License +Original Code by Victorchall add relevant license here +-- this just adds multi folder function to the original code \ No newline at end of file diff --git a/image_compress.py b/image_compress.py new file mode 100644 index 0000000..ed2efb2 --- /dev/null +++ b/image_compress.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 + +"""Compress images in a folder to a maximum megapixel size.""" + +import argparse +import asyncio +import os +from concurrent.futures import ThreadPoolExecutor, as_completed +from glob import iglob +from multiprocessing import cpu_count +from queue import Queue + +from PIL import Image, ImageFile, ImageOps + +# Prevent errors from halting the script. +ImageFile.LOAD_TRUNCATED_IMAGES = True +Image.warnings.simplefilter("error", Image.DecompressionBombWarning) + +VERSION = "2.0" +SHORT_DESCRIPTION = "Compress images in a directory." +SUPPORTED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp"] + + +def get_args(**parser_kwargs): + """Get command-line options.""" + parser = argparse.ArgumentParser(**parser_kwargs) + parser.add_argument( + "--img_dir", + type=str, + default="input", + help="path to image directory (default: 'input')", + ) + parser.add_argument( + "--out_dir", + type=str, + default=None, + help="path to output directory (default: IMG_DIR)", + ) + parser.add_argument( + "--max_mp", + type=float, + default=1.5, + help="maximum megapixels (default: 1.5)", + ) + parser.add_argument( + "--quality", + type=int, + default=95, + help="save quality (default: 95, range: 0-100, suggested: 90+)", + ) + parser.add_argument( + "--overwrite", + action="store_true", + default=False, + help="overwrite files in output directory", + ) + parser.add_argument( + "--noresize", + action="store_true", + default=False, + help="do not resize, just fix orientation", + ) + parser.add_argument( + "--delete", + action="store_true", + default=True, + help="delete original files after processing", + ) + args = parser.parse_args() + args.out_dir = args.out_dir or args.img_dir + args.max_mp = args.max_mp * 1024000 + return args + + +def images(img_dir): + """Return each image in the input directory and its subdirectories.""" + for file in iglob(f"{img_dir}/**/*.*", recursive=True): + if file.lower().endswith(tuple(SUPPORTED_EXTENSIONS)): + yield file + + + +def inline(msg, newline=False): + """Print a message on the same line.""" + msg = f"\r{msg}" + msg += " " * (79 - len(msg)) + print(msg, end="\n" if newline else "", flush=True) + + +def launch_workers(queue, args): + """Launch a pool of workers.""" + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + tasks = [loop.create_task(worker(queue, args)) for _ in range(10)] + loop.run_until_complete(asyncio.wait(tasks)) + + +async def open_img(path): + """Open an image.""" + loop = asyncio.get_running_loop() + try: + return await loop.run_in_executor(None, Image.open, path) + except Exception as err: + inline(f"[!] Error Opening: {path} - {err}", True) + return None + + +def oversize(img, max_mp): + """Check if an image is larger than the maximum size.""" + return (img.width * img.height) > max_mp + + +async def process(image, args): + """Process an image.""" + outfile = image.replace(args.img_dir, args.out_dir).replace( + os.path.splitext(image)[1], ".webp" + ) + if args.overwrite or not os.path.exists(outfile): + img = await open_img(image) + if img: + newimg = transpose(img) + if not args.noresize and oversize(newimg, args.max_mp): + newimg = shrink(newimg, args) + if newimg != img: + await save_img(newimg, outfile, args) + if args.delete and outfile != image: + os.remove(image) + + +def slow_save(path, args, img): + """Save an image.""" + try: + img.save(path, "webp", quality=args.quality) + inline(f"[+] Compressed: {path}") + except Exception as err: + inline(f"[!] Error Saving: {path} - {err}", True) + + +async def save_img(img, path, args): + """Save an image.""" + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, slow_save, path, args, img) + + +def scan_path(queue, args): + """Scan the input directory for images.""" + inline("[*] Scanning for images...", True) + for image in images(args.img_dir): + inline(f"[+] {image}") + queue.put(image) + + +def shrink(img, args): + """Shrink an image.""" + hw = img.size + ratio = args.max_mp / (hw[0]*hw[1]) + newhw = (int(hw[0]*ratio**0.5), int(hw[1]*ratio**0.5)) + + try: + return img.resize(newhw, Image.BICUBIC) + except Exception as err: + inline(f"[!] Error Shrinking: {img.filename} - {err}", True) + return img + + +def start_compression(queue, args): + """Start the compression process.""" + inline("[*] Compressing images...", True) + inline("[-] (scanning...)") + with ThreadPoolExecutor() as executor: + workers = { + executor.submit(launch_workers, queue, args): None + for _ in range(cpu_count()) + } + for _ in as_completed(workers): + pass + inline("[!] Done!", True) + + +def transpose(img): + """Transpose an image.""" + try: + return ImageOps.exif_transpose(img) + except Exception as err: + inline(f"[!] Error Transposing: {img.filename} - {err}", True) + return img + + +async def worker(queue, args): + """Handle images from the queue until they're gone.""" + while not queue.empty(): + image = queue.get() + await process(image, args) + + +def main(): + """Run the program.""" + queue = Queue() + args = get_args(description=SHORT_DESCRIPTION) + inline(f"[>] Image Compression Utility v{VERSION}", True) + scan_path(queue, args) + start_compression(queue, args) + + +if __name__ == "__main__": + main() diff --git a/text replace tool_Readme.txt b/text replace tool_Readme.txt new file mode 100644 index 0000000..42778eb --- /dev/null +++ b/text replace tool_Readme.txt @@ -0,0 +1,27 @@ +# Text Replace Tool + +## Overview +The Text Replace Tool is a simple Python application with a graphical user interface (GUI) that allows you to find and replace text strings in all text files within a chosen directory. The application uses the tkinter library for creating the GUI, and the os and re libraries for handling file operations and text replacement. + +## Requirements +- Python 3.6 or later + +## Installation +1. Download the script `text_replace_tool.py` to your desired folder. +2. Ensure you have Python 3.6 or later installed. You can check your Python version by running `python --version` in your command prompt or terminal. + +## Usage +1. Run the script by navigating to the folder containing `text_replace_tool.py` and executing the command `python text_replace_tool.py` in your command prompt or terminal. +2. The Text Replace Tool window will open. Click the "Browse" button to choose the directory containing the text files you want to process. +3. Enter the text you want to find in the "Find:" field, and the text you want to replace it with in the "Replace:" field. +4. Click the "Rename" button to start the text replacement process. +5. A progress bar will show the progress of the operation. Once the process is complete, a "Done" message box will appear. +6. You can either repeat the process for another directory and text strings or close the application. + +## Notes +- The application only processes text files with the `.txt` extension. +- Regular expressions are not supported in the find and replace fields. The text entered in these fields will be treated as plain text. +- Ensure you have the necessary permissions to read and write to the chosen directory and the text files within it. + +## License +This project is open source and available under the [MIT License](https://opensource.org/licenses/MIT). \ No newline at end of file diff --git a/text_replace_tool.py b/text_replace_tool.py new file mode 100644 index 0000000..8477c47 --- /dev/null +++ b/text_replace_tool.py @@ -0,0 +1,76 @@ +import os +import re +import tkinter as tk +from tkinter import filedialog, messagebox, ttk + +class App(tk.Tk): + def __init__(self): + super().__init__() + self.title("Text Replace") + self.geometry("350x200") + self.create_widgets() + + def create_widgets(self): + self.directory_label = tk.Label(self, text="Directory:") + self.directory_label.grid(row=0, column=0, padx=5, pady=5) + + self.directory_entry = tk.Entry(self, width=30) + self.directory_entry.grid(row=0, column=1, padx=5, pady=5) + + self.browse_button = tk.Button(self, text="Browse", command=self.browse_directory) + self.browse_button.grid(row=0, column=2, padx=5, pady=5) + + self.find_label = tk.Label(self, text="Find:") + self.find_label.grid(row=1, column=0, padx=5, pady=5) + + self.find_entry = tk.Entry(self, width=30) + self.find_entry.grid(row=1, column=1, padx=5, pady=5) + + self.replace_label = tk.Label(self, text="Replace:") + self.replace_label.grid(row=2, column=0, padx=5, pady=5) + + self.replace_entry = tk.Entry(self, width=30) + self.replace_entry.grid(row=2, column=1, padx=5, pady=5) + + self.rename_button = tk.Button(self, text="Rename", command=self.rename_files) + self.rename_button.grid(row=3, column=1, padx=5, pady=5) + + self.progress = ttk.Progressbar(self, orient=tk.HORIZONTAL, length=200, mode='determinate') + self.progress.grid(row=4, column=0, columnspan=3, padx=5, pady=5) + + def browse_directory(self): + directory = filedialog.askdirectory() + self.directory_entry.delete(0, tk.END) + self.directory_entry.insert(0, directory) + + def rename_files(self): + directory = self.directory_entry.get() + find_text = self.find_entry.get() + replace_text = self.replace_entry.get() + + if not all((directory, find_text, replace_text)): + messagebox.showwarning("Warning", "Please fill in all fields.") + return + + text_files = [f for f in os.listdir(directory) if f.endswith('.txt')] + total_files = len(text_files) + self.progress['maximum'] = total_files + + for i, filename in enumerate(text_files, start=1): + file_path = os.path.join(directory, filename) + with open(file_path, 'r') as file: + file_contents = file.read() + + new_contents = re.sub(find_text, replace_text, file_contents) + + with open(file_path, 'w') as file: + file.write(new_contents) + + self.progress['value'] = i + self.update_idletasks() + + messagebox.showinfo("Done", "Text replacement completed.") + +if __name__ == "__main__": + app = App() + app.mainloop() diff --git a/text_to_filename.py b/text_to_filename.py new file mode 100644 index 0000000..4e00746 --- /dev/null +++ b/text_to_filename.py @@ -0,0 +1,48 @@ +import os +import argparse +from pathlib import Path +from tqdm import tqdm + +def rename_images(input_directory): + # Counter initialization + counter = 1 + + for root, _, files in os.walk(input_directory): + # Filter out image and text files + image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'))] + text_files = [f for f in files if f.lower().endswith('.txt')] + + # Create a dictionary with the base names of the files as keys and the file names as values + base_names = {} + for f in image_files + text_files: + base_name = os.path.splitext(f)[0] + if base_name not in base_names: + base_names[base_name] = [] + base_names[base_name].append(f) + + # Rename image files with the contents of the corresponding text files and add a counter to avoid duplicate file names + for base_name, file_names in tqdm(base_names.items(), desc="Renaming images", unit="image"): + if len(file_names) == 2: + image_file = [f for f in file_names if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'))][0] + text_file = [f for f in file_names if f.lower().endswith('.txt')][0] + + with open(os.path.join(root, text_file), 'r') as f: + new_image_name = f.read().strip() + + # Remove illegal characters from the new_image_name + illegal_characters = ['<', '>', ':', '"', '/', '\\', '|', '?', '*'] + for char in illegal_characters: + new_image_name = new_image_name.replace(char, '') + + new_image_name_with_counter = f"{new_image_name}_{counter}{Path(image_file).suffix}" + os.rename(os.path.join(root, image_file), os.path.join(root, new_image_name_with_counter)) + + counter += 1 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Rename image files with the contents of the corresponding text files.") + parser.add_argument("input_directory", help="The directory containing the image and text files.") + args = parser.parse_args() + + rename_images(args.input_directory) diff --git a/text_to_filename_Reademe.txt b/text_to_filename_Reademe.txt new file mode 100644 index 0000000..2a215c1 --- /dev/null +++ b/text_to_filename_Reademe.txt @@ -0,0 +1,32 @@ +# Text to Filename + +This Python script allows you to rename image files based on the contents of corresponding text files in a given directory. It supports various image formats, including .png, .jpg, .jpeg, .gif, .webp, and .bmp. + +## Usage + +To use this script, simply run it from the command line and provide the input directory containing the image and text files as an argument: + +## How it works + +The script performs the following steps: + +1. Lists all files in the input directory. +2. Filters out image and text files. +3. Creates a dictionary with the base names of the files as keys and the file names as values. +4. Renames image files with the contents of the corresponding text files and adds a counter to avoid duplicate file names. + + +## Requirements + +This script requires the following Python libraries: + +- os +- argparse +- pathlib +- tqdm + +Make sure to install these libraries before running the script. + +## License + +This project is licensed under the MIT License. \ No newline at end of file