From 865df53f4f34c80813b6e3ed66169785f10ee797 Mon Sep 17 00:00:00 2001 From: Eric Boittier Date: Wed, 19 Jul 2023 23:46:02 +0200 Subject: [PATCH] Moved functions from main.py into respective files. Args parse added. Preproc and descriptor directives are available --- RESULTS.md | 9 +- benchmark.py | 239 ++++++++++++++++++++++++++ classifiers.py | 5 + configs.py | 91 ++++++++++ descriptors.py | 74 +++++++++ gzip_classifier.py | 2 - main.py | 406 ++++++--------------------------------------- models.py | 22 +++ regressors.py | 6 + tests.md | 0 10 files changed, 497 insertions(+), 357 deletions(-) create mode 100644 benchmark.py create mode 100644 classifiers.py create mode 100644 configs.py create mode 100644 descriptors.py create mode 100644 models.py create mode 100644 regressors.py create mode 100644 tests.md diff --git a/RESULTS.md b/RESULTS.md index 9b67bb6..7903e20 100644 --- a/RESULTS.md +++ b/RESULTS.md @@ -1,5 +1,6 @@ # Results Gzip-based Molecular Classification -|Data Set|Split |AUROC/RMSE (Valid)|F1/MAE (Valid)|AUROC/RMSE (Test)| F1/MAE (Test) | -|--------|------|------------------|--------------|-----------------|---------------| -|freesolv|random|0.456 +/- 0.094 |0.29 +/- 0.094|0.505 +/- 0.094 |0.308 +/- 0.094| -|delaney |random|1.242 +/- 0.14 |0.967 +/- 0.14|1.359 +/- 0.14 |1.056 +/- 0.14 | +|Data Set|Split |AUROC/RMSE (Valid)|F1/MAE (Valid) |AUROC/RMSE (Test)| F1/MAE (Test) | +|--------|------|------------------|---------------|-----------------|---------------| +|freesolv|random|0.588 +/- 0.244 |0.312 +/- 0.244|0.477 +/- 0.244 |0.305 +/- 0.244| +|delaney |random|1.303 +/- 0.085 |1.005 +/- 0.085|1.349 +/- 0.085 |1.029 +/- 0.085| +|lipo |random|0.926 +/- 0.041 |0.738 +/- 0.041|0.926 +/- 0.041 |0.743 +/- 0.041| diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 0000000..643cd80 --- /dev/null +++ b/benchmark.py @@ -0,0 +1,239 @@ +from typing import List, Dict, Any, Tuple +from pytablewriter import MarkdownTableWriter +import numpy as np +import deepchem.molnet as mn +from sklearn.model_selection import train_test_split +from rdkit.Chem.AllChem import MolFromSmiles, MolToSmiles, MolToInchi + +from sklearn.metrics import ( + f1_score, + roc_auc_score, + mean_squared_error, + mean_absolute_error, +) +from sklearn.utils.class_weight import compute_class_weight + +from rdkit import rdBase + +blocker = rdBase.BlockLogs() + +from gzip_regressor import regress as gzip_regress +from gzip_classifier import classify as gzip_classify +from descriptors import preprocess + + + + +def write_table(results: List, model: str=None, descriptor: str=None) -> None: + values = [] + + for config, result in results: + values.append( + [ + config["dataset"], + config["splitter"], + result["valid_auroc"], + result["valid_f1"], + result["test_auroc"], + result["test_f1"], + ] + ) + + writer = MarkdownTableWriter( + table_name="Results Gzip-based Molecular Classification", + headers=[ + "Data Set", + "Split", + "AUROC/RMSE (Valid)", + "F1/MAE (Valid)", + "AUROC/RMSE (Test)", + "F1/MAE (Test)", + ], + value_matrix=values, + ) + + with open(f"results/RESULTS_{model}_{descriptor}.md", "w+") as f: + writer.stream = f + writer.write_table() + + if model == "gzip" and descriptor == "smiles": + writer.stream = f + writer.write_table() + + +def benchmark( + configs: List[Dict[str, Any]], + model: str, + classify=gzip_classify, + regress=gzip_regress, + preprocess_task="smiles")\ + -> None: + """Benchmark models on classification/regression tasks""" + results = [] + + for config in configs: + # Load data sets + run_results = [] + for _ in range(config["n"]): + tasks, X_train, y_train, X_valid, y_valid, X_test, y_test = molnet_loader( + config["dataset"], + splitter=config["splitter"], + preproc=config["preprocess"], + reload=False, + transformers=[], + ) + + if config["augment"] > 0: + X_train, y_train = augment(X_train, y_train, config["augment"]) + + if config["sub_sample"] > 0.0: + X_train, y_train = sub_sample(X_train, y_train, config["sub_sample"]) + + # Do classification + if config["task"] == "classification" and classify is not None: + # Get class weights + class_weights = [] + if config["is_imbalanced"]: + for y_task in y_train.T: + class_weights.append( + compute_class_weight( + "balanced", classes=sorted(list(set(y_task))), y=y_task + ) + ) + + # Run classification + valid_preds = classify( + X_train, y_train, X_valid, config["k"], class_weights + ) + test_preds = classify( + X_train, y_train, X_test, config["k"], class_weights + ) + + # Compute metrics + valid_auroc = roc_auc_score(y_valid, valid_preds) + valid_f1 = f1_score( + y_valid, + valid_preds, + average="micro", + ) + test_auroc = roc_auc_score(y_test, test_preds) + test_f1 = f1_score( + y_test, + test_preds, + average="micro", + ) + + print(f"\n{config['dataset']} ({len(tasks)} tasks)") + print(config) + print( + f"Valid AUROC: {valid_auroc}, Valid F1: {valid_f1} , Test AUROC: {test_auroc}, Test F1: {test_f1}" + ) + + run_results.append([valid_auroc, valid_f1, test_auroc, test_f1]) + + # Do regression + elif config["task"] == "regression" and regress is not None: + valid_preds = regress(X_train, y_train, X_valid, config["k"]) + test_preds = regress(X_train, y_train, X_test, config["k"]) + + # Compute metrics + valid_rmse = mean_squared_error(y_valid, valid_preds, squared=False) + valid_mae = mean_absolute_error( + y_valid, + valid_preds, + ) + test_rmse = mean_squared_error(y_test, test_preds, squared=False) + test_mae = mean_absolute_error( + y_test, + test_preds, + ) + + print(f"\n{config['dataset']} ({len(tasks)} tasks)") + print(config) + print( + f"Valid RMSE: {valid_rmse}, Valid MAE: {valid_mae} , Test RMSE: {test_rmse}, Test MAE: {test_mae}" + ) + + run_results.append([valid_rmse, valid_mae, test_rmse, test_mae]) + + run_results = np.array(run_results) + results_means = np.mean(run_results, axis=0) + results_stds = np.std(run_results, axis=0) + + results.append( + ( + config, + { + "valid_auroc": f"{round(results_means[0], 3)} +/- {round(results_stds[0], 3)}", + "valid_f1": f"{round(results_means[1], 3)} +/- {round(results_stds[0], 3)}", + "test_auroc": f"{round(results_means[2], 3)} +/- {round(results_stds[0], 3)}", + "test_f1": f"{round(results_means[3], 3)} +/- {round(results_stds[0], 3)}", + }, + ) + ) + + write_table(results, model=model, descriptor=preprocess_task) + + + + +def sub_sample( + X: np.array, Y: np.array, p: float = 0.5, seed=666 +) -> Tuple[np.array, np.array]: + X_sample, _, y_sample, _ = train_test_split( + X, + Y, + train_size=int(p * len(X)), + stratify=Y, + random_state=seed, + ) + return X_sample, y_sample + + +def augment(X: np.array, Y: np.array, n: int = 5) -> Tuple[np.array, np.array]: + X_aug = [] + y_aug = [] + + for x, y in zip(X, Y): + mol = MolFromSmiles(x) + for _ in range(n): + x_rand = MolToSmiles( + mol, + canonical=False, + doRandom=True, + kekuleSmiles=True, + allBondsExplicit=True, + allHsExplicit=True, + ) + + X_aug.append(x_rand) + y_aug.append(y) + + return np.array(X_aug), np.array(y_aug) + + + + +def molnet_loader( + name: str, preproc: bool = False, + preprocess_task: str = "smiles", **kwargs: object +) -> List[Tuple[str, np.array, np.array, np.array]]: + mn_loader = getattr(mn, f"load_{name}") + dc_set = mn_loader(**kwargs) + + tasks, dataset, _ = dc_set + train, valid, test = dataset + + X_train = np.array([preprocess(x, preproc, + preprocess_task=preprocess_task) for x in train.ids]) + y_train = np.array(train.y, dtype=int) + + X_valid = np.array([preprocess(x, preproc, + preprocess_task=preprocess_task) for x in valid.ids]) + y_valid = np.array(valid.y, dtype=int) + + X_test = np.array([preprocess(x, preproc, + preprocess_task=preprocess_task) for x in test.ids]) + y_test = np.array(test.y, dtype=int) + + return tasks, X_train, y_train, X_valid, y_valid, X_test, y_test diff --git a/classifiers.py b/classifiers.py new file mode 100644 index 0000000..d2a7761 --- /dev/null +++ b/classifiers.py @@ -0,0 +1,5 @@ +from gzip_classifier import classify as gzip_classify + +classifiers = { + "gzip": gzip_classify, +} \ No newline at end of file diff --git a/configs.py b/configs.py new file mode 100644 index 0000000..592630c --- /dev/null +++ b/configs.py @@ -0,0 +1,91 @@ + +dataset_configs = [ + { + "dataset": "freesolv", + "splitter": "random", + "task": "regression", + "k": 10, + "augment": 0, + "preprocess": True, + "sub_sample": 0.0, + "is_imbalanced": True, + "n": 4, + }, + { + "dataset": "delaney", + "splitter": "random", + "task": "regression", + "k": 10, + "augment": 0, + "preprocess": True, + "sub_sample": 0.0, + "is_imbalanced": True, + "n": 4, + }, + { + "dataset": "lipo", + "splitter": "random", + "task": "regression", + "k": 10, + "augment": 0, + "preprocess": True, + "sub_sample": 0.0, + "is_imbalanced": True, + "n": 4, + }, + # { + # "dataset": "sider", + # "splitter": "random", + # "task": "classification", + # "k": 5, + # "augment": 0, + # "preprocess": False, + # "sub_sample": 0.0, + # "is_imbalanced": True, + # "n": 4, + # }, + # { + # "dataset": "bbbp", + # "splitter": "scaffold", + # "task": "classification", + # "k": 5, + # "augment": 0, + # "preprocess": False, + # "sub_sample": 0.0, + # "is_imbalanced": True, + # "n": 1, + # }, + # { + # "dataset": "bace_classification", + # "splitter": "random", + # "task": "classification", + # "k": 5, + # "augment": 0, + # "preprocess": False, + # "sub_sample": 0.0, + # "is_imbalanced": True, + # "n": 4, + # }, + # { + # "dataset": "clintox", + # "splitter": "random", + # "task": "classification", + # "k": 5, + # "augment": 0, + # "preprocess": False, + # "sub_sample": 0.0, + # "is_imbalanced": True, + # "n": 4, + # }, + # { + # "dataset": "tox21", + # "splitter": "random", + # "task": "classification", + # "k": 5, + # "augment": 0, + # "preprocess": False, + # "sub_sample": 0.0, + # "is_imbalanced": True, + # "n": 4, + # }, + ] \ No newline at end of file diff --git a/descriptors.py b/descriptors.py new file mode 100644 index 0000000..03ccf13 --- /dev/null +++ b/descriptors.py @@ -0,0 +1,74 @@ +from typing import List, Dict, Any, Tuple +import deepchem.molnet as mn +import selfies as sf +import deepsmiles as ds +from mhfp.encoder import MHFPEncoder +from smiles_tokenizer import tokenize +from rdkit.Chem.AllChem import MolFromSmiles, MolToSmiles, MolToInchi + +def preprocess(smiles: str, + preproc: bool = False, + preprocess_task: str = "smiles" +) -> str: + """Preprocess SMILES string + Parameters + ---------- + smiles: str + SMILES string to preprocess + preproc: bool + Whether to preprocess + preprocess_task: str + Preprocessing task to perform + valid preprocessing tasks: + - smiles + - selfies + - deepsmiles TODO: Fix deepsmiles + - secfp + TODO: Add more preprocessing tasks + + Returns + ------- + str + Preprocessed SMILES string + """ + if not preproc: + return smiles + if preprocess_task == "smiles": + smiles = MolToSmiles( + MolFromSmiles(smiles), + kekuleSmiles=True, + allBondsExplicit=True, + allHsExplicit=True, + ) + return " ".join(tokenize(smiles)) + + elif preprocess_task == "selfies": + return sf.encoder(smiles) + + elif preprocess_task == "deepsmiles": + # TODO: Fix deepsmiles + # return ds.encode(smiles) + raise NotImplementedError(f"Preprocessing task {preprocess_task} not implemented.") + + elif preprocess_task == "secfp": + return to_secfp(smiles) + + else: + raise NotImplementedError(f"Preprocessing task {preprocess_task} not implemented.") + + +def to_secfp( + smiles: str, + radius: int = 3, + rings: bool = True, + kekulize: bool = True, + min_radius: int = 1, +) -> str: + return " ".join( + [ + str(s) + for s in MHFPEncoder.shingling_from_mol( + MolFromSmiles(smiles), radius, rings, kekulize, min_radius + ) + ] + ) diff --git a/gzip_classifier.py b/gzip_classifier.py index 5bb6265..ea6cfc1 100644 --- a/gzip_classifier.py +++ b/gzip_classifier.py @@ -38,8 +38,6 @@ def classify_(x1, X_train, y_train, k, class_weights=None): def classify(X_train, y_train, X_test, k, class_weights): - preds = [] - cpu_count = multiprocessing.cpu_count() with multiprocessing.Pool(cpu_count) as p: diff --git a/main.py b/main.py index a3505ba..235c085 100644 --- a/main.py +++ b/main.py @@ -1,355 +1,59 @@ -from typing import List, Dict, Any, Tuple -import numpy as np -import deepchem.molnet as mn -import selfies as sf -import deepsmiles as ds - -from mhfp.encoder import MHFPEncoder - -from sklearn.metrics import ( - f1_score, - roc_auc_score, - mean_squared_error, - mean_absolute_error, -) -from sklearn.utils.class_weight import compute_class_weight -from sklearn.model_selection import train_test_split - -from pytablewriter import MarkdownTableWriter - -from rdkit.Chem.AllChem import MolFromSmiles, MolToSmiles, MolToInchi -from rdkit import rdBase - -blocker = rdBase.BlockLogs() - -from gzip_classifier import classify -from gzip_regressor import regress -from smiles_tokenizer import tokenize - - -def to_secfp( - smiles: str, - radius: int = 3, - rings: bool = True, - kekulize: bool = True, - min_radius: int = 1, -) -> str: - return " ".join( - [ - str(s) - for s in MHFPEncoder.shingling_from_mol( - MolFromSmiles(smiles), radius, rings, kekulize, min_radius - ) - ] - ) - - -def write_table(results: List) -> None: - values = [] - - for config, result in results: - values.append( - [ - config["dataset"], - config["splitter"], - result["valid_auroc"], - result["valid_f1"], - result["test_auroc"], - result["test_f1"], - ] - ) - - writer = MarkdownTableWriter( - table_name="Results Gzip-based Molecular Classification", - headers=[ - "Data Set", - "Split", - "AUROC/RMSE (Valid)", - "F1/MAE (Valid)", - "AUROC/RMSE (Test)", - "F1/MAE (Test)", - ], - value_matrix=values, - ) - - with open("RESULTS.md", "w+") as f: - writer.stream = f - writer.write_table() - - -def sub_sample( - X: np.array, Y: np.array, p: float = 0.5, seed=666 -) -> Tuple[np.array, np.array]: - X_sample, _, y_sample, _ = train_test_split( - X, - Y, - train_size=int(p * len(X)), - stratify=Y, - random_state=seed, - ) - - return X_sample, y_sample - - -def augment(X: np.array, Y: np.array, n: int = 5) -> Tuple[np.array, np.array]: - X_aug = [] - y_aug = [] - - for x, y in zip(X, Y): - mol = MolFromSmiles(x) - for _ in range(n): - x_rand = MolToSmiles( - mol, - canonical=False, - doRandom=True, - kekuleSmiles=True, - allBondsExplicit=True, - allHsExplicit=True, - ) - - X_aug.append(x_rand) - y_aug.append(y) - - return np.array(X_aug), np.array(y_aug) - - -def preprocess(smiles: str, preproc: bool = False) -> str: - if not preproc: - return smiles - return to_secfp(smiles, min_radius=0) - return sf.encoder(smiles, strict=False) - return ds.Converter(rings=True, branches=True).encode(smiles) - - smiles = MolToSmiles( - MolFromSmiles(smiles), - kekuleSmiles=True, - allBondsExplicit=True, - allHsExplicit=True, - ) - - return " ".join(tokenize(smiles)) - - -def molnet_loader( - name: str, preproc: bool = False, **kwargs -) -> List[Tuple[str, np.array, np.array, np.array]]: - mn_loader = getattr(mn, f"load_{name}") - dc_set = mn_loader(**kwargs) - - tasks, dataset, _ = dc_set - train, valid, test = dataset - - X_train = np.array([preprocess(x, preproc) for x in train.ids]) - y_train = np.array(train.y, dtype=int) - - X_valid = np.array([preprocess(x, preproc) for x in valid.ids]) - y_valid = np.array(valid.y, dtype=int) - - X_test = np.array([preprocess(x, preproc) for x in test.ids]) - y_test = np.array(test.y, dtype=int) - - return tasks, X_train, y_train, X_valid, y_valid, X_test, y_test - - -def benchmark(configs: List[Dict[str, Any]]) -> None: - results = [] - - for config in configs: - # Load data sets - - run_results = [] - for _ in range(config["n"]): - tasks, X_train, y_train, X_valid, y_valid, X_test, y_test = molnet_loader( - config["dataset"], - splitter=config["splitter"], - preproc=config["preprocess"], - reload=False, - transformers=[], - ) - - if config["augment"] > 0: - X_train, y_train = augment(X_train, y_train, config["augment"]) - - if config["sub_sample"] > 0.0: - X_train, y_train = sub_sample(X_train, y_train, config["sub_sample"]) - - if config["task"] == "classification": - # Get class weights - class_weights = [] - if config["is_imbalanced"]: - for y_task in y_train.T: - class_weights.append( - compute_class_weight( - "balanced", classes=sorted(list(set(y_task))), y=y_task - ) - ) - - # Run classification - valid_preds = classify( - X_train, y_train, X_valid, config["k"], class_weights - ) - test_preds = classify( - X_train, y_train, X_test, config["k"], class_weights - ) - - # Compute metrics - valid_auroc = roc_auc_score(y_valid, valid_preds) - valid_f1 = f1_score( - y_valid, - valid_preds, - average="micro", - ) - test_auroc = roc_auc_score(y_test, test_preds) - test_f1 = f1_score( - y_test, - test_preds, - average="micro", - ) - - print(f"\n{config['dataset']} ({len(tasks)} tasks)") - print(config) - print( - f"Valid AUROC: {valid_auroc}, Valid F1: {valid_f1} , Test AUROC: {test_auroc}, Test F1: {test_f1}" - ) - - run_results.append([valid_auroc, valid_f1, test_auroc, test_f1]) - else: - valid_preds = regress(X_train, y_train, X_valid, config["k"]) - test_preds = regress(X_train, y_train, X_test, config["k"]) - - # Compute metrics - valid_rmse = mean_squared_error(y_valid, valid_preds, squared=False) - valid_mae = mean_absolute_error( - y_valid, - valid_preds, - ) - test_rmse = mean_squared_error(y_test, test_preds, squared=False) - test_mae = mean_absolute_error( - y_test, - test_preds, - ) - - print(f"\n{config['dataset']} ({len(tasks)} tasks)") - print(config) - print( - f"Valid RMSE: {valid_rmse}, Valid MAE: {valid_mae} , Test RMSE: {test_rmse}, Test MAE: {test_mae}" - ) - - run_results.append([valid_rmse, valid_mae, test_rmse, test_mae]) - - run_results = np.array(run_results) - results_means = np.mean(run_results, axis=0) - results_stds = np.std(run_results, axis=0) - - results.append( - ( - config, - { - "valid_auroc": f"{round(results_means[0], 3)} +/- {round(results_stds[0], 3)}", - "valid_f1": f"{round(results_means[1], 3)} +/- {round(results_stds[0], 3)}", - "test_auroc": f"{round(results_means[2], 3)} +/- {round(results_stds[0], 3)}", - "test_f1": f"{round(results_means[3], 3)} +/- {round(results_stds[0], 3)}", - }, - ) +from benchmark import benchmark +from configs import dataset_configs +from models import Models, gzip_classify, gzip_regress + + +def main(model=None, preprocess_task="smiles", tasks="all"): + models = Models() + regress = None + classify = None + + # create a list of models, classifiers and regressors + if model is None: + models = ["gzip"] + classify = gzip_classify + regress = gzip_regress + elif isinstance(model, str): + classify = [models.classifiers[model]] + regress = [models.regressors[model]] + models = [model] + elif isinstance(model, list): + # assume a list of strings + classify = [models.classifiers[m] for m in model] + regress = [models.regressors[m] for m in model] + models = model + + if tasks == "all": + print("Performing regression and classification benchmarks") + elif tasks == "regression": + print("Performing regression benchmarks") + classify = [None for _ in classify] + elif tasks == "classification": + print("Performing classification benchmarks") + regress = [None for _ in regress] + + # loop through the models and run the benchmark + for model, classifier, regressor, in zip(models, classify, regress): + benchmark( + dataset_configs, + model=model, + classify=classifier, + regress=regressor, + preprocess_task=preprocess_task, ) - write_table(results) - - -def main(): - benchmark( - [ - { - "dataset": "freesolv", - "splitter": "random", - "task": "regression", - "k": 10, - "augment": 0, - "preprocess": True, - "sub_sample": 0.0, - "is_imbalanced": True, - "n": 4, - }, - { - "dataset": "delaney", - "splitter": "random", - "task": "regression", - "k": 10, - "augment": 0, - "preprocess": True, - "sub_sample": 0.0, - "is_imbalanced": True, - "n": 4, - }, - { - "dataset": "lipo", - "splitter": "random", - "task": "regression", - "k": 10, - "augment": 0, - "preprocess": True, - "sub_sample": 0.0, - "is_imbalanced": True, - "n": 4, - }, - # { - # "dataset": "sider", - # "splitter": "random", - # "task": "classification", - # "k": 5, - # "augment": 0, - # "preprocess": False, - # "sub_sample": 0.0, - # "is_imbalanced": True, - # "n": 4, - # }, - # { - # "dataset": "bbbp", - # "splitter": "scaffold", - # "task": "classification", - # "k": 5, - # "augment": 0, - # "preprocess": False, - # "sub_sample": 0.0, - # "is_imbalanced": True, - # "n": 1, - # }, - # { - # "dataset": "bace_classification", - # "splitter": "random", - # "task": "classification", - # "k": 5, - # "augment": 0, - # "preprocess": False, - # "sub_sample": 0.0, - # "is_imbalanced": True, - # "n": 4, - # }, - # { - # "dataset": "clintox", - # "splitter": "random", - # "task": "classification", - # "k": 5, - # "augment": 0, - # "preprocess": False, - # "sub_sample": 0.0, - # "is_imbalanced": True, - # "n": 4, - # }, - # { - # "dataset": "tox21", - # "splitter": "random", - # "task": "classification", - # "k": 5, - # "augment": 0, - # "preprocess": False, - # "sub_sample": 0.0, - # "is_imbalanced": True, - # "n": 4, - # }, - ] - ) - if __name__ == "__main__": - main() + """ Run the benchmarks + """ + from argparse import ArgumentParser + + # command line arguments + arg_parser = ArgumentParser() + arg_parser.add_argument("--tasks", type=str, default="all") + arg_parser.add_argument("--model", type=str, default="gzip") + args = arg_parser.parse_args() + main( + model=args.model, + tasks=args.tasks + ) diff --git a/models.py b/models.py new file mode 100644 index 0000000..b71c0ed --- /dev/null +++ b/models.py @@ -0,0 +1,22 @@ +from classifiers import classifiers, gzip_classify +from regressors import regressors, gzip_regress + + +class Models: + def __init__(self): + self.classifiers = classifiers + self.regressors = regressors + + def get_classifier(self, name): + return self.classifiers[name] + + def get_regressor(self, name): + return self.regressors[name] + + def task(self, task: str): + if task == "classification": + return self.classifiers + elif task == "regression": + return self.regressors + else: + raise ValueError(f"Unknown task {task}.") \ No newline at end of file diff --git a/regressors.py b/regressors.py new file mode 100644 index 0000000..64116e6 --- /dev/null +++ b/regressors.py @@ -0,0 +1,6 @@ +from gzip_regressor import regress as gzip_regress + +regressors = { + "gzip": gzip_regress, +} + diff --git a/tests.md b/tests.md new file mode 100644 index 0000000..e69de29