Skip to content
204 changes: 195 additions & 9 deletions biapy/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,178 @@ def __init__(self, job_dir: str, job_identifier: str):
#
_C.MODEL.TORCHVISION_MODEL_NAME = ""

#
# BIAPY BACKEND MODELS
#
# Architecture of the network. Possible values are:
# * Semantic segmentation: 'unet', 'resunet', 'resunet++', 'attention_unet', 'multiresunet', 'seunet', 'resunet_se', 'unetr', 'unext_v1', 'unext_v2'
# * Instance segmentation: 'unet', 'resunet', 'resunet++', 'attention_unet', 'multiresunet', 'seunet', 'resunet_se', 'unetr', 'unext_v1', 'unext_v2'
# * Detection: 'unet', 'resunet', 'resunet++', 'attention_unet', 'multiresunet', 'seunet', 'resunet_se', 'unetr', 'unext_v1', 'unext_v2'
# * Denoising: 'unet', 'resunet', 'resunet++', 'attention_unet', 'seunet', 'resunet_se', 'unext_v1', 'unext_v2', 'nafnet'
# * Super-resolution: 'edsr', 'rcan', 'dfcan', 'wdsr', 'unet', 'resunet', 'resunet++', 'seunet', 'resunet_se', 'attention_unet', 'multiresunet', 'unext_v1', 'unext_v2'
# * Self-supervision: 'unet', 'resunet', 'resunet++', 'attention_unet', 'multiresunet', 'seunet', 'resunet_se', 'unetr', 'edsr', 'rcan', 'dfcan', 'wdsr', 'vit', 'mae', 'unext_v1', 'unext_v2'
# * Classification: 'simple_cnn', 'vit', 'efficientnet_b[0-7]' (only 2D)
# * Image to image: 'edsr', 'rcan', 'dfcan', 'wdsr', 'unet', 'resunet', 'resunet++', 'seunet', 'resunet_se', 'attention_unet', 'unetr', 'multiresunet', 'unext_v1', 'unext_v2'
_C.MODEL.ARCHITECTURE = "unet"
# Number of feature maps on each level of the network.
_C.MODEL.FEATURE_MAPS = [16, 32, 64, 128, 256]
# Values to make the dropout with. Set to 0 to prevent dropout. When using it with 'ViT' or 'unetr'
# a list with just one number must be provided
_C.MODEL.DROPOUT_VALUES = [0.0, 0.0, 0.0, 0.0, 0.0]
# Normalization layer (one of 'bn', 'sync_bn' 'in', 'gn' or 'none').
_C.MODEL.NORMALIZATION = "bn"
# Kernel size
_C.MODEL.KERNEL_SIZE = 3
# Upsampling layer to use in the model. Options: ["upsampling", "convtranspose"]
_C.MODEL.UPSAMPLE_LAYER = "convtranspose"
# Activation function to use along the model
_C.MODEL.ACTIVATION = "ELU"
# Number of classes including the background class (that should be using 0 label)
_C.DATA.N_CLASSES = 2
# Downsampling to be made in Z. This value will be the third integer of the MaxPooling operation. When facing
# anysotropic datasets set it to get better performance
_C.MODEL.Z_DOWN = [0, 0, 0, 0]
# For each level of the model (U-Net levels), set to true or false if the dimensions of the feature maps are isotropic.
_C.MODEL.ISOTROPY = [True, True, True, True, True]
# Include extra convolutional layers with larger kernel at the beginning and end of the U-Net-like model.
_C.MODEL.LARGER_IO = False
# Checkpoint: set to True to load previous training weigths (needed for inference or to make fine-tunning)
_C.MODEL.LOAD_CHECKPOINT = False
# When loading checkpoints whether only model's weights are going to be loaded or optimizer, epochs and loss_scaler.
_C.MODEL.LOAD_CHECKPOINT_ONLY_WEIGHTS = True
# Decide which checkpoint to load from job's dir if PATHS.CHECKPOINT_FILE is ''.
# Options: 'best_on_val' or 'last_on_train'
_C.MODEL.LOAD_CHECKPOINT_EPOCH = "best_on_val"
# Whether to load the model from the checkpoint instead of builiding it following 'MODEL.ARCHITECTURE' when 'MODEL.SOURCE' is "biapy"
_C.MODEL.LOAD_MODEL_FROM_CHECKPOINT = True
# Format of the output checkpoint. Options are 'pth' (native PyTorch format) or 'safetensors' (https://github.com/huggingface/safetensors)
_C.MODEL.OUT_CHECKPOINT_FORMAT = "pth"
# To skip loading those layers that do not match in shape with the given checkpoint. If this is set to False a regular load function will be
# done, which will fail if a layer mismatch is found. Only works when 'MODEL.LOAD_MODEL_FROM_CHECKPOINT' is True
_C.MODEL.SKIP_UNMATCHED_LAYERS = False
# Epochs to save a checkpoint of the model apart from the ones saved with LOAD_CHECKPOINT_ONLY_WEIGHTS. Set it to -1 to
# not do it.
_C.MODEL.SAVE_CKPT_FREQ = -1
# Number of ConvNeXtBlocks in each level.
_C.MODEL.CONVNEXT_LAYERS = [2, 2, 2, 2, 2] # CONVNEXT_LAYERS
# Maximum Stochastic Depth probability for the U-NeXt model.
_C.MODEL.CONVNEXT_SD_PROB = 0.1
# Layer Scale parameter for the U-NeXt model.
_C.MODEL.CONVNEXT_LAYER_SCALE = 1e-6
# Size of the stem kernel in the U-NeXt model.
_C.MODEL.CONVNEXT_STEM_K_SIZE = 2

# TRANSFORMERS MODELS
# Type of model. Options are "custom", "vit_base_patch16", "vit_large_patch16" and "vit_huge_patch16". On custom setting
# the rest of the ViT parameters can be modified as other options will set them automatically.
_C.MODEL.VIT_MODEL = "custom"
# Size of the patches that are extracted from the input image.
_C.MODEL.VIT_TOKEN_SIZE = 16
# Dimension of the embedding space
_C.MODEL.VIT_EMBED_DIM = 768
# Number of transformer encoder layers
_C.MODEL.VIT_NUM_LAYERS = 12
# Number of heads in the multi-head attention layer.
_C.MODEL.VIT_NUM_HEADS = 12
# Size of the dense layers of the final classifier. This value will mutiply 'VIT_EMBED_DIM'
_C.MODEL.VIT_MLP_RATIO = 4.0
# Normalization layer epsion
_C.MODEL.VIT_NORM_EPS = 1e-6

# Dimension of the embedding space for the MAE decoder
_C.MODEL.MAE_DEC_HIDDEN_SIZE = 512
# Number of transformer decoder layers
_C.MODEL.MAE_DEC_NUM_LAYERS = 8
# Number of heads in the multi-head attention layer.
_C.MODEL.MAE_DEC_NUM_HEADS = 16
# Size of the dense layers of the final classifier
_C.MODEL.MAE_DEC_MLP_DIMS = 2048
# Type of the masking strategy. Options: ["grid", "random"]
_C.MODEL.MAE_MASK_TYPE = "grid"
# Percentage of the input image to mask (applied only when MODEL.MAE_MASK_TYPE == "random"). Value between 0 and 1.
_C.MODEL.MAE_MASK_RATIO = 0.5

# UNETR
# Multiple of the transformer encoder layers from of which the skip connection signal is going to be extracted
_C.MODEL.UNETR_VIT_HIDD_MULT = 3
# Number of filters in the first UNETR's layer of the decoder. In each layer the previous number of filters is doubled.
_C.MODEL.UNETR_VIT_NUM_FILTERS = 16
# Decoder activation
_C.MODEL.UNETR_DEC_ACTIVATION = "relu"
# Decoder convolutions' kernel size
_C.MODEL.UNETR_DEC_KERNEL_SIZE = 3

# Specific for SR models based on U-Net architectures. Options are ["pre", "post"]
_C.MODEL.UNET_SR_UPSAMPLE_POSITION = "pre"

# RCAN
# Number of RG modules
_C.MODEL.RCAN_RG_BLOCK_NUM = 10
# Number of RCAB modules in each RG block
_C.MODEL.RCAN_RCAB_BLOCK_NUM = 20
# Filters in the convolutions
_C.MODEL.RCAN_CONV_FILTERS = 16
# Channel reduction ratio for channel attention
_C.MODEL.RCAN_REDUCTION_RATIO = 16
# Whether to maintain or not the upscaling layer.
_C.MODEL.RCAN_UPSCALING_LAYER = True

# These parameters can be used as a template for building custom HRNet versions
_C.MODEL.HRNET = CN()
# Whether to downsample the input in Z or not
_C.MODEL.HRNET.Z_DOWN = True
# Type of block to use in HRNet. Options: 'BASIC', 'BOTTLENECK', 'CONVNEXT_V1' and 'CONVNEXT_V2'
_C.MODEL.HRNET.BLOCK_TYPE = 'BASIC'
# Indicate whether to use a custom configuration for HRNet or use a predefined one. If set to True
# MODEL.HRNET.STAGE2, MODEL.HRNET.STAGE3 and MODEL.HRNET.STAGE4 will be used. If False, the configuration
# will be set depending on the selected architecture (see PROBLEM.MODEL_ARCHITECTURE)
_C.MODEL.HRNET.HEAD_TYPE = "FCN" # Options: "OCR", "ASPP", "PSP", "FCN"
_C.MODEL.HRNET.CUSTOM = False

# These stages are used for HRNet18, HRNet32, HRNet48 and HRNet64
_C.MODEL.HRNET.STAGE2 = CN()
_C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
_C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
_C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
_C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [18, 36]
_C.MODEL.HRNET.STAGE3 = CN()
_C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
_C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
_C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
_C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [18, 36, 72]
_C.MODEL.HRNET.STAGE4 = CN()
_C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
_C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
_C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
_C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [18, 36, 72, 144]

_C.MODEL.STUNET = CN()
# Variant of the STUNet model. Options are: 'small', 'base', 'large'
_C.MODEL.STUNET.VARIANT = 'base'
# Whether to use a pretrained version of STUNet on ImageNet
_C.MODEL.STUNET.PRETRAINED = False

# NafNet
_C.MODEL.NAFNET = CN()
# Base number of channels (width) used in the first layer and base levels.
_C.MODEL.NAFNET.WIDTH = 16
# Number of NAFBlocks stacked at the bottleneck (deepest level).
_C.MODEL.NAFNET.MIDDLE_BLK_NUM = 12
# Number of NAFBlocks assigned to each downsampling level of the encoder.
_C.MODEL.NAFNET.ENC_BLK_NUMS = [2, 2, 4, 8]
# Number of NAFBlocks assigned to each upsampling level of the decoder.
_C.MODEL.NAFNET.DEC_BLK_NUMS = [2, 2, 2, 2]
# Channel expansion factor for the depthwise convolution within the gating unit.
_C.MODEL.NAFNET.DW_EXPAND = 2
# Expansion factor for the hidden layer within the feed-forward network.
_C.MODEL.NAFNET.FFN_EXPAND = 2
# Discriminator architecture
_C.MODEL.NAFNET.ARCHITECTURE_D = "patchgan"
# Discriminator PATCHGAN
_C.MODEL.NAFNET.PATCHGAN = CN()
# Number of initial convolutional filters in the first layer of the discriminator.
_C.MODEL.NAFNET.PATCHGAN.BASE_FILTERS = 64

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 6. Loss definition options
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -1474,26 +1646,37 @@ def __init__(self, job_dir: str, job_identifier: str):
_C.LOSS.CONTRAST.PROJ_DIM = 256
_C.LOSS.CONTRAST.PIXEL_UPD_FREQ = 10

# Fine-grained GAN composition. Set any weight to 0.0 to disable that term.
# Used when LOSS.TYPE == "CYCLEGAN".
_C.LOSS.CYCLEGAN = CN()
# Weight for adversarial BCE term.
_C.LOSS.CYCLEGAN.LAMBDA_GAN = 1.0
# Weight for L1 reconstruction term.
_C.LOSS.CYCLEGAN.LAMBDA_RECON = 10.0
# Weight for MSE reconstruction term.
_C.LOSS.CYCLEGAN.DELTA_MSE = 0.0
# Weight for VGG perceptual term.
_C.LOSS.CYCLEGAN.ALPHA_PERCEPTUAL = 0.0
# Weight for SSIM term.
_C.LOSS.CYCLEGAN.GAMMA_SSIM = 1.0

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 7. Training phase options
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
_C.TRAIN = CN()
_C.TRAIN.ENABLE = False
# Enable verbosity
_C.TRAIN.VERBOSE = False
# Optimizer to use. Possible values: "SGD", "ADAM" or "ADAMW"
_C.TRAIN.OPTIMIZER = "SGD"
# Learning rate
_C.TRAIN.LR = 1.0e-4
# Optimizer(s) to use. Possible values: "SGD", "ADAM" or "ADAMW".
_C.TRAIN.OPTIMIZER = ["SGD"]
# Learning rate(s).
_C.TRAIN.LR = [1.0e-4]
# Weight decay
_C.TRAIN.W_DECAY = 0.02
# Coefficients used for computing running averages of gradient and its square. Used in ADAM and ADAMW optmizers
_C.TRAIN.OPT_BETAS = (0.9, 0.999)
_C.TRAIN.OPT_BETAS = [[0.9, 0.999]]
# Batch size
_C.TRAIN.BATCH_SIZE = 2
# If memory or # gpus is limited, use this variable to maintain the effective batch size, which is
# batch_size (per gpu) * nodes * (gpus per node) * accum_iter.
_C.TRAIN.ACCUM_ITER = 1
# Number of epochs to train the model
_C.TRAIN.EPOCHS = 360
# Epochs to wait with no validation data improvement until the training is stopped
Expand All @@ -1509,6 +1692,9 @@ def __init__(self, job_dir: str, job_identifier: str):
# * Classification: 'accuracy', 'top-5-accuracy'
# * Image to image: "psnr", "mae", "mse", "ssim"
_C.TRAIN.METRICS = []

# Gradient clipping max norm applied per optimizer. 0 = disabled.
_C.TRAIN.GRADIENT_CLIP_NORM = 0.0

# Callbacks
# To determine which value monitor to consider which epoch consider the best to save. Currently not used.
Expand All @@ -1526,7 +1712,7 @@ def __init__(self, job_dir: str, job_identifier: str):
_C.TRAIN.LR_SCHEDULER = CN()
_C.TRAIN.LR_SCHEDULER.NAME = "" # Possible options: 'warmupcosine', 'reduceonplateau', 'onecycle'
# Lower bound on the learning rate used in 'warmupcosine' and 'reduceonplateau'
_C.TRAIN.LR_SCHEDULER.MIN_LR = -1.0
_C.TRAIN.LR_SCHEDULER.MIN_LR = [-1.0]

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# 7.1.1 Reduce on plateau options
Expand Down
7 changes: 3 additions & 4 deletions biapy/data/generators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def create_train_val_augmentors(
dic["zflip"] = cfg.AUGMENTOR.ZFLIP
if cfg.PROBLEM.TYPE == "INSTANCE_SEG":
dic["instance_problem"] = True
elif cfg.PROBLEM.TYPE == "DENOISING":
elif cfg.PROBLEM.TYPE == "DENOISING" and cfg.MODEL.ARCHITECTURE != 'nafnet':
dic["n2v"] = True
dic["n2v_perc_pix"] = cfg.PROBLEM.DENOISING.N2V_PERC_PIX
dic["n2v_manipulator"] = cfg.PROBLEM.DENOISING.N2V_MANIPULATOR
Expand Down Expand Up @@ -297,7 +297,7 @@ def create_train_val_augmentors(
)
if cfg.PROBLEM.TYPE == "INSTANCE_SEG":
dic["instance_problem"] = True
elif cfg.PROBLEM.TYPE == "DENOISING":
elif cfg.PROBLEM.TYPE == "DENOISING" and cfg.MODEL.ARCHITECTURE != 'nafnet':
dic["n2v"] = True
dic["n2v_perc_pix"] = cfg.PROBLEM.DENOISING.N2V_PERC_PIX
dic["n2v_manipulator"] = cfg.PROBLEM.DENOISING.N2V_MANIPULATOR
Expand All @@ -317,7 +317,7 @@ def create_train_val_augmentors(
)

# Training dataset
total_batch_size = cfg.TRAIN.BATCH_SIZE * get_world_size() * cfg.TRAIN.ACCUM_ITER
total_batch_size = cfg.TRAIN.BATCH_SIZE * get_world_size()
training_samples = len(train_generator)

# ---- Choose num_workers for this DataLoader ----
Expand Down Expand Up @@ -352,7 +352,6 @@ def worker_init_fn(worker_id):

num_training_steps_per_epoch = training_samples // total_batch_size
print(f"Train/val generators with {num_workers} workers")
print("Accumulate grad iterations: %d" % cfg.TRAIN.ACCUM_ITER)
print("Effective batch size: %d" % total_batch_size)
print("Sampler_train = %s" % str(sampler_train))
train_dataset = DataLoader(
Expand Down
101 changes: 57 additions & 44 deletions biapy/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def prepare_optimizer(
cfg: CN,
model_without_ddp: nn.Module | nn.parallel.DistributedDataParallel,
steps_per_epoch: int,
) -> Tuple[Optimizer, Scheduler | None]:
) -> Tuple[list[Optimizer], list[Scheduler | None]]:
"""
Create and configure the optimizer and learning rate scheduler for the given model.

Expand All @@ -40,50 +40,63 @@ def prepare_optimizer(

Returns
-------
optimizer : Optimizer
Configured optimizer for the model.
lr_scheduler : Scheduler or None
Configured learning rate scheduler, or None if not specified.
optimizers : List[Optimizer]
Configured optimizers for the models.
lr_schedulers : List[Scheduler | None]
Configured learning rate schedulers, or None if not specified.
"""
lr = cfg.TRAIN.LR if cfg.TRAIN.LR_SCHEDULER.NAME != "warmupcosine" else cfg.TRAIN.LR_SCHEDULER.MIN_LR
opt_args = {}
if cfg.TRAIN.OPTIMIZER in ["ADAM", "ADAMW"]:
opt_args["betas"] = cfg.TRAIN.OPT_BETAS
optimizer = timm.optim.create_optimizer_v2(
model_without_ddp,
opt=cfg.TRAIN.OPTIMIZER,
lr=lr,
weight_decay=cfg.TRAIN.W_DECAY,
**opt_args,
)
print(optimizer)

# Learning rate schedulers
lr_scheduler = None
if cfg.TRAIN.LR_SCHEDULER.NAME != "":
if cfg.TRAIN.LR_SCHEDULER.NAME == "reduceonplateau":
lr_scheduler = ReduceLROnPlateau(
optimizer,
patience=cfg.TRAIN.LR_SCHEDULER.REDUCEONPLATEAU_PATIENCE,
factor=cfg.TRAIN.LR_SCHEDULER.REDUCEONPLATEAU_FACTOR,
min_lr=cfg.TRAIN.LR_SCHEDULER.MIN_LR,
)
elif cfg.TRAIN.LR_SCHEDULER.NAME == "warmupcosine":
lr_scheduler = WarmUpCosineDecayScheduler(
lr=cfg.TRAIN.LR,
min_lr=cfg.TRAIN.LR_SCHEDULER.MIN_LR,
warmup_epochs=cfg.TRAIN.LR_SCHEDULER.WARMUP_COSINE_DECAY_EPOCHS,
epochs=cfg.TRAIN.EPOCHS,
)
elif cfg.TRAIN.LR_SCHEDULER.NAME == "onecycle":
lr_scheduler = OneCycleLR(
optimizer,
cfg.TRAIN.LR,
epochs=cfg.TRAIN.EPOCHS,
steps_per_epoch=steps_per_epoch,
)

return optimizer, lr_scheduler

optimizers = []
lr_schedulers = []

if hasattr(model_without_ddp, 'param_groups'):
param_groups = model_without_ddp.param_groups
else:
param_groups = [[p for p in model_without_ddp.parameters()]]

for i in range(len(cfg.TRAIN.OPTIMIZER)):
lr = cfg.TRAIN.LR if cfg.TRAIN.LR_SCHEDULER.NAME != "warmupcosine" else cfg.TRAIN.LR_SCHEDULER.MIN_LR
opt_args = {}
if cfg.TRAIN.OPTIMIZER[i] in ["ADAM", "ADAMW"]:
opt_args["betas"] = cfg.TRAIN.OPT_BETAS[i]
optimizer = timm.optim.create_optimizer_v2(
param_groups[i],
opt=cfg.TRAIN.OPTIMIZER[i],
lr=lr[i],
weight_decay=cfg.TRAIN.W_DECAY,
**opt_args,
)
print(optimizer)
optimizers.append(optimizer)

# Learning rate schedulers
lr_scheduler = None
if cfg.TRAIN.LR_SCHEDULER.NAME != "":
if cfg.TRAIN.LR_SCHEDULER.NAME == "reduceonplateau":
lr_scheduler = ReduceLROnPlateau(
optimizer,
patience=cfg.TRAIN.LR_SCHEDULER.REDUCEONPLATEAU_PATIENCE,
factor=cfg.TRAIN.LR_SCHEDULER.REDUCEONPLATEAU_FACTOR,
min_lr=cfg.TRAIN.LR_SCHEDULER.MIN_LR[i],
)
elif cfg.TRAIN.LR_SCHEDULER.NAME == "warmupcosine":
lr_scheduler = WarmUpCosineDecayScheduler(
lr=cfg.TRAIN.LR[i],
min_lr=cfg.TRAIN.LR_SCHEDULER.MIN_LR[i],
warmup_epochs=cfg.TRAIN.LR_SCHEDULER.WARMUP_COSINE_DECAY_EPOCHS,
epochs=cfg.TRAIN.EPOCHS,
)
elif cfg.TRAIN.LR_SCHEDULER.NAME == "onecycle":
lr_scheduler = OneCycleLR(
optimizer,
cfg.TRAIN.LR[i],
epochs=cfg.TRAIN.EPOCHS,
steps_per_epoch=steps_per_epoch,
)

lr_schedulers.append(lr_scheduler)

return optimizers, lr_schedulers


def build_callbacks(cfg: CN) -> EarlyStopping | None:
Expand Down
Loading