Skip to content
177 changes: 91 additions & 86 deletions deepprofiler/__main__.py
Original file line number Diff line number Diff line change
@@ -1,116 +1,85 @@
"""Command-line interface for DeepProfiler.

Four subcommands are available, intended to be run in order:

1. ``setup`` — create the project directory structure under ``--root``.
2. ``prepare`` — compute per-plate illumination statistics and compress images
to 8-bit PNG (optional but recommended for large datasets).
3. ``profile`` — extract per-cell deep learning features using the Cell
Painting CNN v1 checkpoint and write ``.npz`` files.
4. ``split`` — split the metadata index into N parts for parallel profiling
across multiple machines or jobs.

Typical usage::

deepprofiler --root=/data/project --config=config.json --exp=run1 profile

See README.md and the DeepProfiler Handbook for full configuration details.
"""

import copy
import json
import os

import click

import deepprofiler.dataset.compression
import deepprofiler.dataset.illumination_statistics
import deepprofiler.dataset.image_dataset
import deepprofiler.dataset.indexing
import deepprofiler.dataset.illumination_statistics
import deepprofiler.dataset.metadata
import deepprofiler.dataset.utils
import deepprofiler.profiling
import deepprofiler.dataset.image_dataset
import deepprofiler.learning.training
import deepprofiler.learning.profiling
import deepprofiler.learning.optimization
import deepprofiler.download.normalize_bbbc021_metadata


# Main interaction point
@click.group()
@click.option("--root", prompt="Root directory for DeepProfiler experiment",
help="Root directory for DeepProfiler experiment",
type=click.Path(exists=True))
type=click.Path("r"))
@click.option("--config", default=None,
help="Path to existing config file (filename in project_root/inputs/config/)",
type=click.STRING)
help="Path to existing config file",
type=click.Path("r"))
@click.option("--cores", default=0,
help="Number of CPU cores for parallel processing (all=0) for prepare command",
help="Number of CPU cores for parallel processing (all=0)",
type=click.INT)
@click.option("--gpu", default="0",
help="GPU device id (the id can be checked with nvidia-smi)",
type=click.STRING)
@click.option("--exp", default="results",
help="Name of experiment, this folder will be created in project_root/outputs/",
type=click.STRING)
@click.option("--metadata", default='index.csv',
help="Metadata index filename in project_root/inputs/metadata/",
type=click.STRING)
@click.pass_context
def cli(context, root, config, exp, cores, gpu, metadata):
"""Configure paths and load the experiment config, then dispatch to a subcommand."""
def cli(context, root, config, cores):
dirs = {
"root": root,
"locations": root + "/inputs/locations/", # TODO: use os.path.join()
"config": root + "/inputs/config/",
"images": root + "/inputs/images/",
"metadata": root + "/inputs/metadata/",
"intensities": root + "/outputs/intensities/",
"compressed_images": root + "/outputs/compressed/images/",
"results": root + "/outputs/" + exp + "/",
"checkpoints": root + "/outputs/" + exp + "/checkpoint/",
"logs": root + "/outputs/" + exp + "/logs/",
"summaries": root + "/outputs/" + exp + "/summaries/",
"features": root + "/outputs/" + exp + "/features/"
"locations": os.path.join(root, "inputs", "locations"),
"config": os.path.join(root, "inputs", "config"),
"images": os.path.join(root, "inputs", "images"),
"metadata": os.path.join(root, "inputs", "metadata"),
"preprocessed": os.path.join(root, "inputs", "preprocessed"),
"pretrained": os.path.join(root, "inputs", "pretrained"),
"intensities": os.path.join(root, "outputs", "intensities"),
"compressed_images": os.path.join(root, "outputs", "compressed", "images"),
"compressed_metadata": os.path.join(root, "outputs", "compressed", "metadata"),
"training": os.path.join(root, "outputs", "training"),
"checkpoints": os.path.join(root, "outputs", "training", "checkpoint"),
"logs": os.path.join(root, "outputs", "training", "logs"),
"summaries": os.path.join(root, "outputs", "training", "summaries"),
"features": os.path.join(root, "outputs", "features")
}
if context.invoked_subcommand == 'setup':
context.obj["dirs"] = dirs
return
if config is not None:

context.obj["config"] = {}
context.obj["config"]["paths"] = {}
context.obj["config"]["paths"]["config"] = config
dirs["config"] = os.path.dirname(os.path.abspath(config))
else:
config = os.path.join(dirs["config"], "config.json")

config = dirs["config"] + "/" + config
context.obj["cores"] = cores
context.obj["gpu"] = gpu
os.environ["CUDA_VISIBLE_DEVICES"] = gpu
# Load configuration file
if config is not None and os.path.isfile(config):

if os.path.isfile(config):
with open(config, "r") as f:
params = json.load(f)

# Override paths defined by user
if "paths" in params.keys():
for key, value in dirs.items():
if key not in params["paths"].keys():
params["paths"][key] = dirs[key]
params["paths"][key] = os.path.join(root, dirs[key])
else:
dirs[key] = params["paths"][key]
else:
params["paths"] = copy.deepcopy(dirs)
dirs[key] = os.path.join(root, params["paths"][key])

if os.path.isdir(dirs["root"]):
for k in ["results", "checkpoints", "logs", "summaries", "features"]:
os.makedirs(dirs[k], exist_ok=True)
else:
params["paths"] = dirs

# Update references
params["experiment_name"] = exp
params["paths"]["index"] = params["paths"]["metadata"] + metadata
params["paths"]["index"] = os.path.join(root, params["paths"]["metadata"], "index.csv")
context.obj["config"] = params
else:
raise Exception("Config does not exists; make sure that the file exists in /inputs/config/")

process = deepprofiler.dataset.utils.Parallel(context.obj["config"], numProcs=context.obj["cores"])
context.obj["process"] = process
context.obj["dirs"] = dirs


# Optional tool: Create the support file and folder structure in a root directory
@cli.command(help='initialize folder structure of DeepProfiler project')
@cli.command()
@click.pass_context
def setup(context):
"""Create the project directory tree under the configured root."""
for path in context.obj["dirs"].values():
if not os.path.isdir(path):
print("Creating directory: ", path)
Expand All @@ -121,49 +90,85 @@ def setup(context):
context.obj["config"]["paths"] = context.obj["dirs"]


# Optional tool: Download and prepare the BBBC021 dataset
@cli.command()
@click.pass_context
def download_bbbc021(context):
context.invoke(setup)
deepprofiler.download.normalize_bbbc021_metadata.normalize_bbbc021_metadata(context)
print("BBBC021 download and preparation complete!")


# First tool: Compute illumination statistics and compress images
@cli.command(help='Run illumination correction and compression')
@cli.command()
@click.pass_context
def prepare(context):
"""Compute per-plate illumination statistics and compress images to 8-bit PNG."""
metadata = deepprofiler.dataset.metadata.read_plates(context.obj["config"]["paths"]["index"])
process = deepprofiler.dataset.utils.Parallel(context.obj["config"], numProcs=context.obj["cores"])
process = context.obj["process"]
process.compute(deepprofiler.dataset.illumination_statistics.calculate_statistics, metadata)
print("Illumination complete!")
metadata = deepprofiler.dataset.metadata.read_plates(
context.obj["config"]["paths"]["index"]) # reinitialize generator
metadata = deepprofiler.dataset.metadata.read_plates(context.obj["config"]["paths"]["index"]) # reinitialize generator
process.compute(deepprofiler.dataset.compression.compress_plate, metadata)
deepprofiler.dataset.indexing.write_compression_index(context.obj["config"])
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
print("Compression complete!")


# Second tool: Profile cells and extract features
@cli.command(help='run feature extraction')
# Optional learning tool: Optimize the hyperparameters of a model
@cli.command()
@click.option("--epoch", default=1)
@click.option("--seed", default=None)
@click.pass_context
def optimize(context, epoch, seed):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
optim = deepprofiler.learning.optimization.Optimize(context.obj["config"], metadata, epoch, seed)
optim.optimize()


# Second tool: Train a network
@cli.command()
@click.option("--epoch", default=1)
@click.option("--seed", default=None)
@click.pass_context
def train(context, epoch, seed):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
deepprofiler.learning.training.learn_model(context.obj["config"], metadata, epoch, seed)


# Third tool: Profile cells and extract features
@cli.command()
@click.pass_context
@click.option("--part",
help="Part of index to process",
default=-1,
type=click.INT)
def profile(context, part):
"""Extract per-cell deep learning features and write .npz files."""
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
config = context.obj["config"]
if part >= 0:
partfile = "index-{0:03d}.csv".format(part)
config["paths"]["index"] = context.obj["config"]["paths"]["index"].replace("index.csv", partfile)
dset = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"], mode='profile')
deepprofiler.profiling.profile(context.obj["config"], dset)
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
deepprofiler.learning.profiling.profile(context.obj["config"], metadata)


# Auxiliary tool: Split index in multiple parts
@cli.command(help='split metadata into multiple parts')
@cli.command()
@click.pass_context
@click.option("--parts",
help="Number of parts to split the index",
type=click.INT)
def split(context, parts):
"""Split the metadata index into N parts for parallel profiling jobs."""
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
deepprofiler.dataset.indexing.split_index(context.obj["config"], parts)

Expand Down
Loading
Loading