DivyenduDutta · DivyenduDutta · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/README.md b/README.md
@@ -1,5 +1,54 @@
 # TinyDiffusion
-Optimized Diffusion for Edge Devices
+
+<div align="center">
+  <img src="tinydiffusion_icon.png" alt="tinydiffusion" width="120" height="120">
+
+  <p><em>Benchmarking and Optimized Stable Diffusion for Edge Devices.</em></p>
+
+  <p>
+    <a href="https://github.com/DivyenduDutta/TinyDiffusion/blob/master/LICENSE"><img src="https://img.shields.io/github/license/DivyenduDutta/TinyDiffusion?style=flat-square" alt="License"></a>
+    <a href="https://github.com/ambv/black"><img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Black"></a>
+  </p>
+
+  <p>
+    <a href="#introduction">Introduction</a> •
+    <a href="#setup">Setup</a> •
+    <a href="#quick-start">Quick Start</a> •
+    <a href="#results">Results</a> •
+    <a href="#analysis">Analysis</a>
+  </p>
+</div>
+
+## Introduction
+
+This repository benchmarks Stable Diffusion UNet inference performance across different runtimes. The focus is on comparing:
+
+- Native PyTorch UNet (GPU) execution
+- ONNXRuntime UNet (GPU) execution
+- ONNXRuntime UNet (CPU) execution
+
+The goal is to understand the trade-offs in inference speed, CPU/GPU memory usage, and runtime stability when exporting Stable Diffusion components to ONNX and running them with onnxruntime.
+
+### Features
+
+**ONNX Export**: Export the Stable Diffusion UNet model from Hugging Face’s diffusers library into an ONNX graph.
+
+**Flexible Inference**: Run inference on CPU or GPU with onnxruntime or fall back to native PyTorch.
+
+**Benchmarking Suite**: Collect detailed metrics including:
+- Average inference time & standard deviation
+- CPU memory usage (Resident Set Size)
+- GPU memory allocation
+
+**Results Logging**: Save benchmarking results to CSV, with the ability to append new results across runs.
+
+**Visualization**: Generate plots comparing performance across backends for quick insights.
+
+Why ONNX?
+
+[ONNX](https://onnx.ai/) allows exporting deep learning models into a framework-agnostic format. With onnxruntime, models can run on multiple backends (CPU, CUDA, TensorRT, DirectML, etc.) without depending on PyTorch. While this repo shows that ONNX on CPU can be useful for portability, we also observe that PyTorch often outperforms ONNX on GPU for Stable Diffusion UNet inference.
+
+## Setup
 
 Add the project root ie, Folder containing this README to PYTHONPATH whichever way you want. One way would be to create a .env and write the following in it
 ```
@@ -11,6 +60,45 @@ Another option would be to run `$env:PYTHONPATH = \full\path\to\projectroot` in
 
 Install pytorch, torchvision via `pip install torch==2.6.0 torchvision==0.21.0 --index-url https://download.pytorch.org/whl/cu118` - Conda doesnt install GPU version on Windows.
 
-##### Sanity
+### Sanity
 
 Before committing changes run `pre-commit run --all-files` or `pre-commit run --file <file1>, <file2> ...`
+## Quick Start
+
+### Generating Benchmarks
+
+Execute
+```bash
+notebooks/baseline_generation.ipynb
+```
+to benchmark the Pytorch version of Stable Diffusion's UNet from HuggingFace.
+
+
+Run
+```bash
+python tinydiffusion/src/onnx_export.py
+```
+to export the UNet to an ONNX graph.
+
+
+Then execute
+```bash
+notebooks/onnxruntime_generation.ipynb
+```
+to benchmark the ONNXRuntime version of UNet on GPU and CPU.
+
+### Visualizing
+
+Once all the benchmarking results populated in `results/benchmarks/benchmark_results.csv`, run
+```bash
+python tinydiffusion/src/benchmark_visualizer.py
+```
+to generate the visualization plots for comparison.
+
+## Results
+
+<img src="https://github.com/DivyenduDutta/TinyDiffusion/blob/master/results/benchmarks/benchmark_comparison.png" alt="Results">
+
+## Analysis
+
+For analysis see [this](https://github.com/DivyenduDutta/TinyDiffusion/blob/master/Analysis.md)
diff --git a/results/benchmarks/benchmark_comparison.png b/results/benchmarks/benchmark_comparison.png
diff --git a/tinydiffusion/src/benchmark_visualizer.py b/tinydiffusion/src/benchmark_visualizer.py
@@ -0,0 +1,61 @@
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from tinydiffusion.utils.csv_utils import load_results_from_csv, BENCHMARK_SAVE_PATH
+from tinydiffusion.utils.logger import LoggerConfig
+
+LOGGER = LoggerConfig().logger
+
+
+def visualize_benchmark_results(df: pd.DataFrame) -> None:
+    """
+    Visualize benchmark results from a DataFrame.
+
+    Args:
+        df (pd.DataFrame): DataFrame containing benchmark results.
+    """
+    # Set figure with 3 subplots (stacked vertically)
+    fig, axes = plt.subplots(3, 1, figsize=(10, 12), sharex=True)
+
+    #  Inference time
+    axes[0].bar(
+        df["desc"], df["avg_inference_time"], yerr=df["std_inference_time"], capsize=5
+    )
+    axes[0].set_ylabel("Inference Time (s)")
+    axes[0].set_title("Benchmark Comparison")
+    axes[0].grid(axis="y", linestyle="--", alpha=0.7)
+
+    # CPU memory usage
+    axes[1].bar(
+        df["desc"],
+        df["avg_cpu_mem_usage"],
+        yerr=df["std_cpu_mem_usage"],
+        capsize=5,
+        color="orange",
+    )
+    axes[1].set_ylabel("CPU Memory (MB)")
+    axes[1].grid(axis="y", linestyle="--", alpha=0.7)
+
+    # GPU memory usage
+    axes[2].bar(
+        df["desc"],
+        df["avg_gpu_mem_usage"],
+        yerr=df["std_gpu_mem_usage"],
+        capsize=5,
+        color="green",
+    )
+    axes[2].set_ylabel("GPU Memory (MB)")
+    axes[2].grid(axis="y", linestyle="--", alpha=0.7)
+
+    plt.xticks(rotation=15)
+    plt.tight_layout()
+    viz_save_path = os.path.join(BENCHMARK_SAVE_PATH, "benchmark_comparison.png")
+    plt.savefig(viz_save_path)
+    LOGGER.info(f"Saved benchmark visualization to {viz_save_path}")
+
+
+if __name__ == "__main__":
+    df = load_results_from_csv()
+
+    visualize_benchmark_results(df)
diff --git a/tinydiffusion/utils/csv_utils.py b/tinydiffusion/utils/csv_utils.py
@@ -6,7 +6,7 @@
 LOGGER = LoggerConfig().logger
 
 BENCHMARK_SAVE_PATH = os.path.join(
-    os.path.dirname(os.getcwd()), "results", "benchmarks"
+    os.path.dirname(__file__), "..", "..", "results", "benchmarks"
 )
 os.makedirs(BENCHMARK_SAVE_PATH, exist_ok=True)
 
@@ -25,3 +25,23 @@ def save_results_to_csv(results: list) -> None:
 
     df.to_csv(csv_path, mode="a", header=not file_exists, index=False)
     LOGGER.info(f"Saved benchmark results to {csv_path}")
+
+
+def load_results_from_csv() -> pd.DataFrame:
+    """
+    Load benchmark results from a CSV file.
+
+    Returns:
+        pd.DataFrame: A DataFrame containing the benchmark results.
+
+    Raises:
+        FileNotFoundError: If the CSV file does not exist.
+    """
+    csv_path = os.path.join(BENCHMARK_SAVE_PATH, "benchmark_results.csv")
+    if os.path.isfile(csv_path):
+        df = pd.read_csv(csv_path)
+        LOGGER.info(f"Loaded benchmark results from {csv_path}")
+        return df
+    else:
+        LOGGER.warning(f"No benchmark results found at {csv_path}")
+        raise FileNotFoundError(f"Benchmark results CSV not found at {csv_path}.")
diff --git a/tinydiffusion_icon.png b/tinydiffusion_icon.png