diff --git a/.gitignore b/.gitignore
index 8dda3f8..655922b 100755
--- a/.gitignore
+++ b/.gitignore
@@ -87,3 +87,16 @@ target/
 
 # Mypy cache
 .mypy_cache/
+
+# Claude Code working state
+.claude/
+
+# Training logs
+/logs/*.log
+
+# Model checkpoints downloaded from Colab (super_resolution.h5 in checkpoints/
+# is already tracked; this only catches root-level .h5 backups)
+/best_*.h5
+
+# Data archives at repo root
+/*.zip
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..2e056b8
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,90 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+PCBSegClassNet is a TensorFlow-based deep learning project for PCB (Printed Circuit Board) component segmentation and classification. It uses the FICS PCB Image Collection (FPIC) dataset.
+
+The two tasks are handled by separate model variants sharing the same encoder:
+- **Segmentation**: `PCBSegNet` — segments all 25 component classes on a full PCB image
+- **Classification**: `PCBClassNet` — classifies individual cropped component images
+
+## Environment Setup
+
+```bash
+conda create -n pscn python=3.8
+conda activate pscn
+conda install pip
+pip install -r requirements.txt
+```
+
+Key dependencies: `tensorflow-gpu==2.11`, `albumentations`, `pyyaml`, `tqdm`, `pandas`.
+
+## Commands
+
+All training commands must be run from the `src/` directory.
+
+**Train segmentation** (100 epochs):
+```bash
+python train_segmentation.py -opt cfs/pscn_seg.yml -epoch 100
+```
+
+**Evaluate segmentation** (loads best checkpoint, skips training):
+```bash
+python train_segmentation.py -opt cfs/pscn_seg.yml -epoch 0
+```
+
+**Train classification** (100 epochs):
+```bash
+python train_classification.py -opt cfs/pscn_class.yml -epoch 100
+```
+
+**Evaluate classification**:
+```bash
+python train_classification.py -opt cfs/pscn_class.yml -epoch 0
+```
+
+**Data preparation** (run from `src/data/`):
+```bash
+# Create HSI+CLAHE images, masks, and classification crops
+python create_mask.py -i ../../data/pcb_image/ -a ../../data/smd_annotation/ -id ../../data/segmentation/images -ad ../../data/segmentation/masks -cd ../../data/classification/images/
+
+# Create patches (768px) and split train/test
+python create_patches.py -i ../../data/segmentation/images/ -m ../../data/segmentation/masks -cd ../../data/classification/images/ -ps 768
+```
+
+## Architecture
+
+### Encoder (shared by both tasks)
+Built in `src/models/blocks.py`, the encoder has three stages:
+1. **Learning Module** — three conv/depthwise-separable conv blocks with stride 2, producing feature maps at 3 scales (`learning_layer1`, `learning_layer2`, `learning_layer3`)
+2. **Feature Extractor** — three `bottleneck_block` stages (MobileNetV2-style residual bottlenecks) followed by a `pyramid_pooling_block` (PSPNet-style)
+3. **Fusion Module** — fuses the learning module output with the upsampled feature extractor output
+
+### Segmentation Decoder (`get_decoder` in `blocks.py`)
+- Applies `tem_block` (Texture Enhancement Module: channel attention + cosine-similarity-based spatial attention) to encoder output
+- Two upsampling steps with skip connections from `learning_layer2` and `learning_layer1`
+- Final `Conv2D(num_classes)` + softmax
+
+### Classification Head (`get_classification` in `blocks.py`)
+- `GlobalAveragePooling2D` on encoder output → `Dense(128, relu)` → `Dense(num_classes, softmax)`
+
+### Loss
+Segmentation uses **DISLoss** (`src/models/loss.py`): sum of Dice loss + Jaccard loss + SSIM loss. Classification uses standard `categorical_crossentropy`.
+
+## Configuration
+
+Training hyperparameters and data paths are controlled by YAML files in `src/cfs/`:
+- `pscn_seg.yml` — segmentation config (25 classes, Adam lr=1e-4, batch=16, input 512×512)
+- `pscn_class.yml` — classification config (25 classes, Adam lr=1e-4, batch=16, input 512×512)
+
+Checkpoints are saved to `checkpoints/best_seg.h5` and `checkpoints/best_class.h5`. Logs go to `logs/app.log`.
+
+## Data
+
+25 PCB component classes: R, C, U, Q, J, L, RA, D, RN, TP, IC, P, CR, M, BTN, FB, CRA, SW, T, F, V, LED, S, QA, JP.
+
+The segmentation masks use specific RGB color values per class (defined in `src/data/dataloader.py::color_values`). When modifying mask generation, ensure colors match this mapping exactly.
+
+The FPIC dataset requires access codes from the dataset authors — it is not freely downloadable.
diff --git a/notebooks/README.md b/notebooks/README.md
new file mode 100644
index 0000000..f1ee2f8
--- /dev/null
+++ b/notebooks/README.md
@@ -0,0 +1,69 @@
+# Colab Training
+
+`colab_train.ipynb` is a self-contained notebook that runs the **full pipeline** end-to-end on a Colab GPU runtime: data preprocessing (mask generation + patches + train/val split) → segmentation training → classification training.
+
+## Quickstart
+
+1. **Get the raw FPIC dataset** (request access codes from the dataset authors — see top-level [README.md](../README.md)).
+2. **Zip raw inputs** and upload to Drive:
+    ```powershell
+    Compress-Archive -Path data\pcb_image, data\smd_annotation -DestinationPath data_raw.zip -Force
+    ```
+    Place at `MyDrive/PCBSegClassNet/data_raw.zip` (~7 GB).
+3. **Open the notebook in Colab**:
+    ```
+    https://colab.research.google.com/github/<your-fork>/PCBSegClassNet/blob/colab/notebooks/colab_train.ipynb
+    ```
+4. **Runtime → Change runtime type → GPU** (T4 is enough; High-RAM not needed), then run cells top to bottom.
+
+## What the notebook does
+
+| Section | Purpose |
+|---|---|
+| 1 | `nvidia-smi` GPU sanity |
+| 2 | Clone this repo (`colab` branch) |
+| 3 | Install TF 2.15 + dependencies (TF 2.15 is the last release on Keras 2; Keras 3 from TF 2.16+ breaks this codebase's `tf.keras.backend.{dot,transpose}` calls) |
+| 4 | Mount Drive, unzip `data_raw.zip` to local Colab disk |
+| 5 | `create_mask.py` — polygon masks + classification crops (EDSR super-resolution, GPU) |
+| 6 | `create_patches.py` — 768 px patches + 80/20 train/val split (CPU) |
+| 7 | Set up Drive checkpoint directory for persistence across sessions |
+| 8 | Segmentation training (5 epochs sanity → 80 epochs full → mirror checkpoint to Drive) |
+| 9 | Classification training (same pattern) |
+| 10 | Optional: re-evaluate from Drive checkpoints in a fresh session |
+
+## Why preprocess on Colab?
+
+- Raw inputs (~7 GB) are smaller than the processed dataset (~18 GB) — easier to transfer to Drive.
+- Reproducibility: anyone with raw data + this notebook can recreate the exact training set without trusting an opaque processed zip.
+- Easy to iterate on preprocessing knobs (e.g. patch size) without re-uploading.
+
+If you already have a processed dataset zip, you can skip cells 5–6 and unzip it directly into `data/` instead.
+
+## Why TF 2.15?
+
+- This repo uses `tf.keras.backend.dot` / `backend.transpose` and `tf.keras.activations.softmax(tensor)` patterns that broke in Keras 3.
+- TF 2.15 is the **last TF release on Keras 2**; Keras 3 starts at TF 2.16.
+- Earlier this notebook tried to pin TF 2.10 via `condacolab`, but Colab's base Python keeps moving past 3.10 and TF 2.10's wheel matrix doesn't follow. TF 2.15 ships wheels for the Python versions Colab actually serves.
+
+## VRAM notes
+
+| GPU | Comfortable batch size at 512×512 input |
+|---|---|
+| T4 (16 GB) | 16 |
+| A100 (40 GB) | 32+ |
+| L4 (24 GB) | 16-24 |
+| RTX 4060 Ti (8 GB) | 4-8 (and even 8 OOMs in this codebase due to SSIM gradient) |
+
+The default `batch_size: 16` in `cfs/pscn_seg.yml` works on all Colab GPUs.
+
+## Epoch budget
+
+The notebook runs:
+- **Sanity 5 epochs** before each full run, so you catch NaN losses or OOMs in <1 hour.
+- **Full 80 epochs** for both segmentation and classification.
+
+80 + 80 ≈ 18 hours on an L4, fitting inside Colab Pro's 24 h session limit with margin. The original paper trained for 100 epochs; 80 leaves a safety buffer for the inevitable Drive-mount / preprocessing time at the start of a session. If you want closer to paper-faithful runs, push to 100 once you've seen one full run complete.
+
+## Session persistence
+
+Colab wipes `/content` on disconnect but Drive persists. The notebook copies the best checkpoint to Drive after each training run; section 10 shows how to restore it in a new session for evaluation.
diff --git a/notebooks/colab_train.ipynb b/notebooks/colab_train.ipynb
new file mode 100644
index 0000000..962f7e3
--- /dev/null
+++ b/notebooks/colab_train.ipynb
@@ -0,0 +1,351 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PCBSegClassNet — Colab Training\n",
+    "\n",
+    "End-to-end pipeline on Google Colab GPU: data preprocessing (mask generation + patches + train/val split) → segmentation training → classification training.\n",
+    "\n",
+    "**Why Colab?** Local 8 GB GPUs (e.g. RTX 4060 Ti) are too tight for `batch=16` at 512×512 input — the segmentation decoder activation alone is ~4 GB. Colab T4 (16 GB) and above handle it comfortably.\n",
+    "\n",
+    "## Before you run\n",
+    "1. **Runtime → Change runtime type → GPU** (T4 is enough; High-RAM not needed).\n",
+    "2. Have `data_raw.zip` ready in Drive at `MyDrive/PCBSegClassNet/data_raw.zip` (~7 GB; contains `pcb_image/` + `smd_annotation/`).\n",
+    "3. Mount Drive when prompted in §4."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. GPU sanity check"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!nvidia-smi"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Clone the repo\n",
+    "\n",
+    "If you forked the repo, change the URL to your fork."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd /content\n",
+    "!rm -rf PCBSegClassNet\n",
+    "!git clone -b colab https://github.com/ironmanizawesome/PCBSegClassNet.git\n",
+    "%cd PCBSegClassNet"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Install TF 2.15 + dependencies into Python 3.11\n",
+    "\n",
+    "TF 2.15 is the last release on Keras 2 (Keras 3 starts at TF 2.16, which breaks `tf.keras.backend.{dot,transpose}` and other patterns this codebase relies on). But TF 2.15 wheels only target Python 3.9–3.11, while Colab's notebook kernel runs on Python 3.12.\n",
+    "\n",
+    "Workaround: Colab images already ship a system `python3.11` binary at `/usr/local/bin/python3.11`. Install TF 2.15 + deps **into that interpreter** and run all training scripts via `!python3.11 ...`. The notebook kernel itself stays on 3.12 — that's fine, we never `import tensorflow` from it.\n",
+    "\n",
+    "> 🔑 We can't use `tensorflow[and-cuda]==2.15.0` here — that extra pins `tensorrt-libs==8.6.1`, which is no longer available on PyPI (only 9.x remains). Installing the cudnn / cublas / cuda-runtime / etc. wheels directly is enough; TensorRT is only needed for `tf.experimental.tensorrt` inference, not training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# If python3.11 doesn't exist on this Colab image (e.g. L4 base), install it first.\n# T4 base already has /usr/local/bin/python3.11 so this is a no-op there.\n!command -v python3.11 >/dev/null || { \\\n    apt-get update -q && \\\n    apt-get install -y software-properties-common && \\\n    add-apt-repository -y ppa:deadsnakes/ppa && \\\n    apt-get update -q && \\\n    apt-get install -y python3.11 python3.11-dev python3.11-distutils && \\\n    curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11; \\\n}\n\n# TF + non-CUDA python deps\n!python3.11 -m pip install -q tensorflow==2.15.0 albumentations==1.4.18 opencv-python-headless pyyaml tqdm pandas scikit-learn\n\n# CUDA libs pinned to versions matching TF 2.15. The latest nvidia-cudnn-cu12 is\n# 9.x which TF 2.15 cannot dlopen (it links against libcudnn.so.8 specifically),\n# so version pinning is mandatory here.\n!python3.11 -m pip install -q \\\n    nvidia-cudnn-cu12==8.9.4.25 \\\n    nvidia-cublas-cu12==12.2.5.6 \\\n    nvidia-cuda-cupti-cu12==12.2.142 \\\n    nvidia-cuda-nvrtc-cu12==12.2.140 \\\n    nvidia-cuda-runtime-cu12==12.2.140 \\\n    nvidia-cufft-cu12==11.0.8.103 \\\n    nvidia-curand-cu12==10.3.3.141 \\\n    nvidia-cusolver-cu12==11.5.2.141 \\\n    nvidia-cusparse-cu12==12.1.2.141 \\\n    nvidia-nccl-cu12==2.16.5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Verify TF + GPU under Python 3.11 (the interpreter that will actually run training).\n# `tf.keras.__version__` raises AttributeError on TF 2.15 due to a lazy_loader quirk,\n# so we deliberately don't print it here. Skip-tested: works on both T4 and L4 images.\n!python3.11 -c \"import sys, tensorflow as tf; print('Python:', sys.version.split()[0]); print('TF:', tf.__version__); print('GPU:', tf.config.list_physical_devices('GPU'))\""
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Mount Drive and unpack the raw FPIC archive\n",
+    "\n",
+    "This notebook does the **entire data prep pipeline** (mask generation + patches + train/val split) in Colab so you only need to upload the raw FPIC images + annotations (~7 GB) instead of the processed dataset (~18 GB).\n",
+    "\n",
+    "### Data layout expected on Drive\n",
+    "Zip the **raw** FPIC images + annotations together:\n",
+    "\n",
+    "```\n",
+    "/MyDrive/PCBSegClassNet/\n",
+    "    data_raw.zip                  ← contains: pcb_image/*.png  +  smd_annotation/*.csv\n",
+    "    checkpoints/                  ← (optional, for resume / saved best models)\n",
+    "```\n",
+    "\n",
+    "To make the zip on a Windows host:\n",
+    "\n",
+    "```powershell\n",
+    "Compress-Archive -Path data\\pcb_image, data\\smd_annotation -DestinationPath data_raw.zip -Force\n",
+    "```\n",
+    "\n",
+    "Why unzip to local disk and not stream from Drive? Drive mounts thousands of small files extremely slowly (API throttling). Always unpack to `/content` for training."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from google.colab import drive\n",
+    "drive.mount(\"/content/drive\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "RAW_ZIP = \"/content/drive/MyDrive/PCBSegClassNet/data_raw.zip\"\n",
+    "\n",
+    "import os, time\n",
+    "assert os.path.exists(RAW_ZIP), f\"Not found: {RAW_ZIP}\"\n",
+    "\n",
+    "%cd /content/PCBSegClassNet\n",
+    "!mkdir -p data\n",
+    "t0 = time.time()\n",
+    "!unzip -q -o {RAW_ZIP} -d data/\n",
+    "print(f\"Unzip done in {time.time()-t0:.1f}s\")\n",
+    "\n",
+    "!echo \"--- pcb_image:\"; ls data/pcb_image/ | wc -l\n",
+    "!echo \"--- smd_annotation:\"; ls data/smd_annotation/ | wc -l"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Generate masks + classification crops (`create_mask.py`)\n",
+    "\n",
+    "Runs through all annotation CSVs, fills polygon masks per component class, and writes:\n",
+    "- `data/segmentation/images/` — HSI + CLAHE preprocessed PCB images\n",
+    "- `data/segmentation/masks/` — RGB masks (color-encoded per class)\n",
+    "- `data/classification/images/<CLASS>/` — individual component crops upscaled with the EDSR super-resolution model in `checkpoints/super_resolution.h5`\n",
+    "\n",
+    "GPU-accelerated via the EDSR forward pass. Expect ~10–30 minutes depending on Colab GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd /content/PCBSegClassNet/src/data\n",
+    "!python3.11 create_mask.py \\\n",
+    "    -i ../../data/pcb_image/ \\\n",
+    "    -a ../../data/smd_annotation/ \\\n",
+    "    -id ../../data/segmentation/images \\\n",
+    "    -ad ../../data/segmentation/masks \\\n",
+    "    -cd ../../data/classification/images/\n",
+    "\n",
+    "!echo \"--- segmentation/images: $(ls ../../data/segmentation/images 2>/dev/null | wc -l)\"\n",
+    "!echo \"--- segmentation/masks:  $(ls ../../data/segmentation/masks  2>/dev/null | wc -l)\"\n",
+    "!echo \"--- classification crops total: $(find ../../data/classification/images -type f | wc -l)\"\n",
+    "!echo \"--- classification classes:\"; ls ../../data/classification/images 2>/dev/null"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Cut 768 px patches and split into train/val (`create_patches.py`)\n",
+    "\n",
+    "Cuts the full PCB images + masks into 768×768 patches and moves the patches + classification crops into `train/` and `val/` subfolders (80/20 split). Pure CPU work, ~5 minutes.\n",
+    "\n",
+    "After this cell, the dataset layout matches what the training scripts expect:\n",
+    "\n",
+    "```\n",
+    "data/segmentation/train/{images,masks}/*.png\n",
+    "data/segmentation/val/{images,masks}/*.png\n",
+    "data/classification/train/<CLASS>/*.png\n",
+    "data/classification/val/<CLASS>/*.png\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%cd /content/PCBSegClassNet/src/data\n",
+    "!python3.11 create_patches.py \\\n",
+    "    -i ../../data/segmentation/images/ \\\n",
+    "    -m ../../data/segmentation/masks \\\n",
+    "    -cd ../../data/classification/images/ \\\n",
+    "    -ps 768\n",
+    "\n",
+    "!echo \"--- seg train: $(ls ../../data/segmentation/train/images 2>/dev/null | wc -l) images / $(ls ../../data/segmentation/train/masks 2>/dev/null | wc -l) masks\"\n",
+    "!echo \"--- seg val:   $(ls ../../data/segmentation/val/images   2>/dev/null | wc -l) images / $(ls ../../data/segmentation/val/masks   2>/dev/null | wc -l) masks\"\n",
+    "!echo \"--- class train: $(find ../../data/classification/train -type f 2>/dev/null | wc -l) crops\"\n",
+    "!echo \"--- class val:   $(find ../../data/classification/val   -type f 2>/dev/null | wc -l) crops\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. (Optional) Mirror checkpoints to Drive for persistence\n",
+    "\n",
+    "Colab local disk is wiped on session end. Save best model files back to Drive at the end of training. For now, just record the path."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DRIVE_CKPT_DIR = \"/content/drive/MyDrive/PCBSegClassNet/checkpoints\"\n",
+    "!mkdir -p {DRIVE_CKPT_DIR}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Train segmentation\n",
+    "\n",
+    "Default config in `cfs/pscn_seg.yml` is `batch_size=16`, `epochs` controlled by `-epoch`.\n",
+    "\n",
+    "**First run a 5-epoch sanity pass.** If loss is finite and val_dice_coef is improving, kick off the full 40 epochs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Sanity check: 5 epochs\n# TF_GPU_ALLOCATOR=cuda_malloc_async reduces fragmentation OOMs on 16 GB GPUs\n# (the SSIM gradient in DISLoss spikes a 416 MB tensor that can fail to fit\n# even on T4 16 GB without async allocator). On L4 24 GB it's belt-and-braces.\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_segmentation.py -opt cfs/pscn_seg.yml -epoch 5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Full training run (80 epochs)\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_segmentation.py -opt cfs/pscn_seg.yml -epoch 80"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Backup the best seg checkpoint to Drive\n",
+    "!cp /content/PCBSegClassNet/checkpoints/best_seg.h5 {DRIVE_CKPT_DIR}/best_seg.h5\n",
+    "!ls -la {DRIVE_CKPT_DIR}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": "### 8b. (Optional) Fine-tune segmentation from best checkpoint\n\nThe first full run leaves `lr` at the `min_lr` (1e-5) of `ReduceLROnPlateau`. To squeeze more out of the model, resume from `best_seg.h5` with `pscn_seg_finetune.yml` — same architecture but `lr=1e-5` start and `min_lr=1e-6` so the plateau callback can step down further.\n\nIf you're running this in a fresh session, the first cell below restores `best_seg.h5` from Drive into Colab local disk (the resume flag loads weights from there).",
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "source": "# Backup fine-tuned best_seg.h5 to Drive (ModelCheckpoint overwrites in place\n# whenever val_dice_coef improves, so this captures the best of the two runs)\n!cp /content/PCBSegClassNet/checkpoints/best_seg.h5 {DRIVE_CKPT_DIR}/best_seg.h5\n!ls -la {DRIVE_CKPT_DIR}",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": "# Make sure best_seg.h5 exists locally (restore from Drive if fresh session)\n!mkdir -p /content/PCBSegClassNet/checkpoints\n!test -f /content/PCBSegClassNet/checkpoints/best_seg.h5 || cp {DRIVE_CKPT_DIR}/best_seg.h5 /content/PCBSegClassNet/checkpoints/\n!ls -la /content/PCBSegClassNet/checkpoints/best_seg.h5\n\n# Fine-tune: 20 epochs, lr=1e-5 → min_lr=1e-6, resume from best_seg.h5\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_segmentation.py -opt cfs/pscn_seg_finetune.yml -epoch 20 -resume",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9. Train classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Sanity check: 5 epochs\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_classification.py -opt cfs/pscn_class.yml -epoch 5"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Full training run (80 epochs)\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_classification.py -opt cfs/pscn_class.yml -epoch 80"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Backup the best classification checkpoint to Drive\n",
+    "!cp /content/PCBSegClassNet/checkpoints/best_class.h5 {DRIVE_CKPT_DIR}/best_class.h5\n",
+    "!ls -la {DRIVE_CKPT_DIR}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 10. (Optional) Evaluate without retraining\n",
+    "\n",
+    "Pass `-epoch 0` to skip training; the script will load `best_*.h5` from `checkpoints/` and run `model.evaluate(val_dataset)`. Make sure the checkpoint is in `/content/PCBSegClassNet/checkpoints/` (copy it back from Drive if you reconnected)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": "# Restore checkpoints from Drive after a fresh session\n!mkdir -p /content/PCBSegClassNet/checkpoints\n!cp {DRIVE_CKPT_DIR}/best_seg.h5 /content/PCBSegClassNet/checkpoints/ 2>/dev/null || echo 'no seg ckpt'\n!cp {DRIVE_CKPT_DIR}/best_class.h5 /content/PCBSegClassNet/checkpoints/ 2>/dev/null || echo 'no class ckpt'\n\n%cd /content/PCBSegClassNet/src\n!TF_GPU_ALLOCATOR=cuda_malloc_async python3.11 train_segmentation.py -opt cfs/pscn_seg.yml -epoch 0"
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "name": "PCBSegClassNet — Colab Training",
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/src/cfs/pscn_seg_finetune.yml b/src/cfs/pscn_seg_finetune.yml
new file mode 100644
index 0000000..7ca6ddc
--- /dev/null
+++ b/src/cfs/pscn_seg_finetune.yml
@@ -0,0 +1,86 @@
+# ------------------------------------------------------------------------
+# Copyright (c) 2023 CandleLabAI. All Rights Reserved.
+# ------------------------------------------------------------------------
+# Fine-tune config: same model, smaller learning rate. Use with -resume so
+# weights are loaded from best_seg.h5 before training continues.
+# general settings
+name: PCBSegClassNet
+model_type: SegmentationModel
+
+datasets:
+  train:
+    name: FPIC
+    type: Segmentation
+    data_images: ../data/segmentation/train/images/
+    data_masks: ../data/segmentation/train/masks/
+
+    img_size_h: 512
+    img_size_w: 512
+
+    # data loader
+    use_shuffle: true
+    batch_size: 16
+
+  val:
+    name: FPIC
+    type: Segmentation
+    data_images: ../data/segmentation/val/images/
+    data_masks: ../data/segmentation/val/masks/
+
+    img_size_h: 512
+    img_size_w: 512
+
+    use_shuffle: false
+    batch_size: 1
+
+# path
+path:
+  checkpoint_network: ../checkpoints/best_seg.h5
+  log_file: ../logs/app.log
+
+# training settings
+train:
+  optim:
+    type: Adam
+    lr: 0.00001       # 1e-5 — start where the previous run left off
+    betas: [0.9, 0.9]
+
+  callbacks:
+    modelcheckpoint:
+      type: ModelCheckpoint
+      monitor: val_dice_coef
+      mode: max
+      verbose: 1
+      save_best_only: true
+      save_weights_only: false
+
+    reducelronplateau:
+      type: ReduceLROnPlateau
+      monitor: val_loss
+      mode: min
+      verbose: 1
+      factor: 0.1
+      patience: 15
+      min_lr: 0.000001   # 1e-6 — allow finer adjustments than the initial run
+
+  num_classes: 25
+
+  # losses
+  loss:
+    type: DISLoss
+
+  # metrics
+  metric:
+    DICE:
+      type: dice_coef
+    IoU:
+      type: jacard_coef
+
+# val settings
+val:
+  # metrics
+  metric:
+    DICE:
+      type: DiceCoef
+    IoU:
+      type: IoU
diff --git a/src/models/network.py b/src/models/network.py
index 8a1da70..15253b5 100755
--- a/src/models/network.py
+++ b/src/models/network.py
@@ -52,7 +52,7 @@ def build(self):
         """
         build encoder and final model
         """
-        encoder = get_encoder(self.image_height, self.image_width)
+        encoder, _, _ = get_encoder(self.image_height, self.image_width)
         model = get_classification(encoder, self.num_classes)
         return model
 
diff --git a/src/train_segmentation.py b/src/train_segmentation.py
index d51e2bb..fbda5d3 100755
--- a/src/train_segmentation.py
+++ b/src/train_segmentation.py
@@ -34,10 +34,14 @@ def parse_config():
                         type=int,
                         default=1,
                         help="number of epochs.")
+    parser.add_argument("-resume",
+                        action="store_true",
+                        help="resume training from existing best checkpoint.")
     args = parser.parse_args()
 
     opt = parse(args.opt)
     opt["train"]["total_epochs"] = args.epoch
+    opt["train"]["resume"] = args.resume
     return opt
 
 def init_log(opt):
@@ -110,6 +114,12 @@ def main():
             )
         )
 
+    # resume from existing checkpoint if requested
+    import os
+    if opt["train"].get("resume") and os.path.exists(opt["path"]["checkpoint_network"]):
+        logger.info(f"Resuming from {opt['path']['checkpoint_network']}")
+        model.load_weights(opt["path"]["checkpoint_network"])
+
     # training model
     if opt["train"]["total_epochs"] > 0:
         logger.info(f"Training for {opt['train']['total_epochs']} epochs")