From 8136f36313f955dd4108502b599f68c708f7a502 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 23 Jun 2025 13:28:34 -0400 Subject: [PATCH 1/7] add click command to download mqtl files --- Dockerfile | 10 ++++++- app/__init__.py | 2 ++ app/commands.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 app/commands.py diff --git a/Dockerfile b/Dockerfile index e006f5f..bfb6aa3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -42,7 +42,8 @@ RUN apt-get update && \ liblapack-dev \ libpcre2-dev \ xauth \ - vim + vim \ + zip # install plink RUN wget -O /tmp/plink.zip https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20241022.zip \ @@ -50,6 +51,13 @@ RUN wget -O /tmp/plink.zip https://s3.amazonaws.com/plink1-assets/plink_linux_x8 && mv /tmp/plink /usr/local/bin/plink \ && rm /tmp/* +# install smr for mqtl querying +RUN wget https://yanglab.westlake.edu.cn/software/smr/download/smr-1.3.2-linux-x86_64.zip \ + && unzip smr-1.3.2-linux-x86_64.zip \ + && mv smr-1.3.2-linux-x86_64/smr /usr/local/bin/ \ + && rm smr-1.3.2-linux-x86_64.zip + + # Install Poetry # https://github.com/python-poetry/poetry/issues/6397#issuecomment-1236327500 ENV POETRY_HOME=/opt/poetry diff --git a/app/__init__.py b/app/__init__.py index ab3bd07..631cdbf 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -9,6 +9,7 @@ from flask_sitemap import Sitemap from flask_talisman import Talisman +from app.commands import register_cli from app.config import BaseConfig, DevConfig, ProdConfig ConfigClass = ProdConfig if BaseConfig.APP_ENV == "production" else DevConfig @@ -64,6 +65,7 @@ def create_app(config_class=ConfigClass): talisman.init_app(app, content_security_policy=app.config["CSP_POLICY"]) mongo.init_app(app) celery_init_app(app) + register_cli(app) with app.app_context(): from app import routes diff --git a/app/commands.py b/app/commands.py new file mode 100644 index 0000000..18ed8d3 --- /dev/null +++ b/app/commands.py @@ -0,0 +1,79 @@ +from os import path +from pathlib import Path +import shutil +import tarfile +import zipfile + +import click +from flask import Flask, current_app +import requests +import tqdm + + +def register_cli(app: Flask): + @app.cli.command(name="download-smr-eqtl", help="Download mQTL files") + @click.option( + "--lite", + is_flag=True, + help="Download 'lite' version of the McRae et al. mQTL data (only SNPs with P < 1e-5 are included; 241 MB)", + ) + def download_smr_mqtl(lite): + # https://yanglab.westlake.edu.cn/software/smr/#mQTLsummarydata + file_list = [ + # Whole blood mQTL data set used in Hannon et al. (2018 AJHG).(121MB) + "https://yanglab.westlake.edu.cn/data/SMR/US_mQTLS_SMR_format.zip", + # 42MB + "https://yanglab.westlake.edu.cn/data/SMR/Hannon_Blood_dataset1.zip", + # 25MB + "https://yanglab.westlake.edu.cn/data/SMR/Hannon_Blood_dataset2.zip", + # https://yanglab.westlake.edu.cn/data/SMR/Hannon_FetalBrain.zip (4.8MB) + "https://yanglab.westlake.edu.cn/data/SMR/Hannon_FetalBrain.zip", + # mQTL summary data from a meta-analysis of samples of East Asian ancestry. (2.5GB) + "https://yanglab.westlake.edu.cn/data/SMR/EAS.tar.gz", + # mQTL summary data from a meta-analysis of samples of European ancestry. (3.7GB) + "https://yanglab.westlake.edu.cn/data/SMR/EUR.tar.gz", + # Brain-mMeta mQTL summary data (Qi et al. 2018 Nat Commun) in SMR binary (BESD) format: Brain-mMeta.tar.gz (893 MB) + "https://yanglab.westlake.edu.cn/data/SMR/Brain-mMeta.tar.gz", + ] + ( + # Lite version of the McRae et al. mQTL data (only SNPs with P < 1e-5 are included; 241 MB) + ["https://yanglab.westlake.edu.cn/data/SMR/LBC_BSGS_meta_lite.tar.gz"] + if lite + else [ + # McRae et al. mQTL summary data (7.5 GB) + "https://yanglab.westlake.edu.cn/data/SMR/LBC_BSGS_meta.tar.gz", + ] + ) + + data_dir = path.join(path.dirname(path.dirname(__file__)), "data", "smr_eqtl") + Path(data_dir).mkdir(exist_ok=True, parents=True) + + file_list.reverse() + + for file_url in file_list: + filename = path.basename(file_url) + _, ext = path.splitext(filename) + tmp_save_path = path.join(data_dir, filename) + with requests.get(file_url, stream=True) as r: + current_app.logger.info(f"Downloading {filename}...") + if r.status_code != 200: + r.raise_for_status() + raise RuntimeError( + f"Request to {file_url} returned status code {r.status_code}" + ) + file_size = int(r.headers.get("Content-Length", 0)) + desc = "(Unknown total file size)" if file_size == 0 else "" + with tqdm.tqdm.wrapattr( + r.raw, "read", total=file_size, desc=desc + ) as r_raw: + with open(tmp_save_path, "wb") as fd: + shutil.copyfileobj(r_raw, fd) + + if ext == ".zip": + with zipfile.ZipFile(tmp_save_path) as zf: + zf.extractall(data_dir) + Path(tmp_save_path).unlink() + elif ext == ".gz": + with tarfile.open(tmp_save_path) as tf: + tf.extractall(data_dir, filter="data") + Path(tmp_save_path).unlink() + tf.close() From f76a819c0f47fb8fd3874afae9caf0e8f66b243e Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 15 Jul 2025 10:36:42 -0400 Subject: [PATCH 2/7] sketch out mqtl query function --- app/commands.py | 11 +++-- app/utils/smr.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 app/utils/smr.py diff --git a/app/commands.py b/app/commands.py index 18ed8d3..1b4330b 100644 --- a/app/commands.py +++ b/app/commands.py @@ -11,7 +11,7 @@ def register_cli(app: Flask): - @app.cli.command(name="download-smr-eqtl", help="Download mQTL files") + @app.cli.command(name="download-smr-mqtl", help="Download mQTL files") @click.option( "--lite", is_flag=True, @@ -21,6 +21,7 @@ def download_smr_mqtl(lite): # https://yanglab.westlake.edu.cn/software/smr/#mQTLsummarydata file_list = [ # Whole blood mQTL data set used in Hannon et al. (2018 AJHG).(121MB) + # Saved as US_mQTLS_SMR_format "https://yanglab.westlake.edu.cn/data/SMR/US_mQTLS_SMR_format.zip", # 42MB "https://yanglab.westlake.edu.cn/data/SMR/Hannon_Blood_dataset1.zip", @@ -29,13 +30,17 @@ def download_smr_mqtl(lite): # https://yanglab.westlake.edu.cn/data/SMR/Hannon_FetalBrain.zip (4.8MB) "https://yanglab.westlake.edu.cn/data/SMR/Hannon_FetalBrain.zip", # mQTL summary data from a meta-analysis of samples of East Asian ancestry. (2.5GB) + # no particular tissue? saved as EAS "https://yanglab.westlake.edu.cn/data/SMR/EAS.tar.gz", # mQTL summary data from a meta-analysis of samples of European ancestry. (3.7GB) + # no particular tissue? saved as EUR "https://yanglab.westlake.edu.cn/data/SMR/EUR.tar.gz", # Brain-mMeta mQTL summary data (Qi et al. 2018 Nat Commun) in SMR binary (BESD) format: Brain-mMeta.tar.gz (893 MB) + # brain (from meta-analysis) "https://yanglab.westlake.edu.cn/data/SMR/Brain-mMeta.tar.gz", ] + ( # Lite version of the McRae et al. mQTL data (only SNPs with P < 1e-5 are included; 241 MB) + # peripheral blood ["https://yanglab.westlake.edu.cn/data/SMR/LBC_BSGS_meta_lite.tar.gz"] if lite else [ @@ -44,11 +49,9 @@ def download_smr_mqtl(lite): ] ) - data_dir = path.join(path.dirname(path.dirname(__file__)), "data", "smr_eqtl") + data_dir = path.join(path.dirname(path.dirname(__file__)), "data", "smr_mqtl") Path(data_dir).mkdir(exist_ok=True, parents=True) - file_list.reverse() - for file_url in file_list: filename = path.basename(file_url) _, ext = path.splitext(filename) diff --git a/app/utils/smr.py b/app/utils/smr.py new file mode 100644 index 0000000..908c73e --- /dev/null +++ b/app/utils/smr.py @@ -0,0 +1,123 @@ +import os +import re +import subprocess +from tempfile import NamedTemporaryFile +from typing import List, TypedDict + +import pandas as pd + +curr_dir = os.path.dirname(__file__) +data_dir = os.path.join(os.path.dirname(os.path.dirname(curr_dir)), "data", "smr_mqtl") + + +class SMRDataset(TypedDict): + base_filename: str + by_chr: bool + + +datasets: dict[str, SMRDataset] = { + "Brain-mMeta": { + "by_chr": False, + "base_filename": "Brain-mMeta", + }, + "EAS": { + "by_chr": True, + "base_filename": "EAS", + }, + "EUR": { + "by_chr": True, + "base_filename": "EUR", + }, + "Hannon et al. Blood dataset1": { + "by_chr": False, + "base_filename": "Aberdeen_Blood", + }, + "Hannon et al. Blood dataset2": { + "by_chr": False, + "base_filename": "UCL_Blood", + }, + "Hannon et al. FetalBrain": { + "by_chr": False, + "base_filename": "FB_Brain", + }, + # TODO: confirm + "LBC_BSGS_meta": { + "by_chr": True, + "base_filename": "bl_mqtl", + }, + "LBC_BSGS_meta_lite": { + "by_chr": True, + "base_filename": "bl_mqtl_lite", + }, + "US_mQTLS_SMR_format": { + "by_chr": False, + "base_filename": "US_Blood", + }, +} + + +def query_smr(chr: int, snps: List[str], dataset: str, thresh: float = 5.0e-8): + """Query mqtl data in smr format + + :param chr: The chromosome to query + :type chr: int + :param snps: A list of SNPS in format chr{chr}_{bp}_ref_alt + :type snps: List[str] + :param snps_rs: A list of the *same* SNPs in rsid format + :type snps_rs: List[str] + :param dataset: The dataset to query + :type dataset: str + :param thresh: The p-value threshold, defaults to 5e-8 + :type thresh: float + :raises FileNotFoundError: If the dataset does not exist + """ + if dataset not in datasets.keys(): + raise FileNotFoundError(f"Dataset {dataset} does not exist!") + dataset_dir = os.path.join(data_dir, dataset) + base_filepath = os.path.join(dataset_dir, dataset) + if datasets[dataset]["by_chr"]: + base_filepath = f"{base_filepath}_chr{chr}" + + regex = r"_(\d+)_" + + snp_poses = [int(re.findall(regex, snp)[0]) for snp in snps] + + start = min(snp_poses) // 1000 + end = max(snp_poses) // 1000 + 1 + + with NamedTemporaryFile("w") as f: + query = [ + "smr", + "--beqtl-summary", + base_filepath, + "--query", + str(thresh), + "--snp-chr", + str(chr), + "--from-snp-kb", + str(start), + "--to-snp-kb", + str(end), + "--out", + f.name, + ] + + subprocess.run(query, check=True) + + res = pd.read_csv(f"{f.name}.txt", sep="\t") + + res.sort_values(by="BP", inplace=True) + res["full_snp"] = res.apply( + lambda x: f"chr{str(x['Chr'])}" + + "_" + + str(x["BP"]) + + "_" + + x["A1"] + + "_" + + x["A2"], + axis=1, + ) + + filtered = res[res["full_snp"].isin(snps)] + + return filtered From 5ece539cf1c8c6792584c8628152a60c69e606b4 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 21 Jul 2025 11:26:43 -0400 Subject: [PATCH 3/7] add test for smr query --- .gitignore | 2 +- app/utils/smr.py | 87 +++++++++++++++++++----------- tests/utils/test_query_smr.py | 99 +++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 31 deletions(-) create mode 100644 tests/utils/test_query_smr.py diff --git a/.gitignore b/.gitignore index 35223bf..d0124b2 100644 --- a/.gitignore +++ b/.gitignore @@ -137,7 +137,7 @@ data/gwas.public.txt.gz data/uk_biobank/ data/mQTL_eQTM_SNPs_SAKNORM_For_CoLoc*.txt data/Note.txt -*test* +misc/**/test* data/slc9a3_gwas_mqtl.html data/*lookup_table.txt.gz .Rproj.user diff --git a/app/utils/smr.py b/app/utils/smr.py index 908c73e..461b9a2 100644 --- a/app/utils/smr.py +++ b/app/utils/smr.py @@ -56,20 +56,65 @@ class SMRDataset(TypedDict): } -def query_smr(chr: int, snps: List[str], dataset: str, thresh: float = 5.0e-8): +def run_smr_query( + query_path: str, chr: int, thresh: float, start: int, end: int +) -> pd.DataFrame: + """Query the data, save to a temporary file, and return as a dataframe + + :param query_path: The path to the data to be queried + :type query_path: str + :param chr: The chromosome to query + :type chr: int + :param thresh: The p-value threshold + :type thresh: float + :param start: The start bp to query + :type start: int + :param end: Then end bp to query + :type end: int + :return: The returned SNPs and pvalues + :rtype: pd.DataFrame + """ + with NamedTemporaryFile("w") as f: + query = [ + "smr", + "--beqtl-summary", + query_path, + "--query", + str(thresh), + "--snp-chr", + str(chr), + "--from-snp-kb", + str(start), + "--to-snp-kb", + str(end), + "--out", + f.name, + ] + + subprocess.run(query, check=True) + + return pd.read_csv(f"{f.name}.txt", sep="\t") + + +def query_smr( + chr: int, snps: List[str], dataset: str, thresh: float = 5.0e-8 +) -> pd.DataFrame: """Query mqtl data in smr format :param chr: The chromosome to query :type chr: int :param snps: A list of SNPS in format chr{chr}_{bp}_ref_alt :type snps: List[str] - :param snps_rs: A list of the *same* SNPs in rsid format - :type snps_rs: List[str] :param dataset: The dataset to query :type dataset: str :param thresh: The p-value threshold, defaults to 5e-8 :type thresh: float :raises FileNotFoundError: If the dataset does not exist + :return: The SNPs and pvalues as a dataframe with the following columns: + 'SNP', 'Chr', 'BP', 'A1', 'A2', 'Freq', 'Probe', 'Probe_Chr', + 'Probe_bp', 'Gene', 'Orientation', 'b', 'SE', 'p', 'full_snp' + Note that 'full_snp' is a combined column that takes the same format as those in ``snps`` + :rtype: pd.DataFrame """ if dataset not in datasets.keys(): raise FileNotFoundError(f"Dataset {dataset} does not exist!") @@ -85,39 +130,21 @@ def query_smr(chr: int, snps: List[str], dataset: str, thresh: float = 5.0e-8): start = min(snp_poses) // 1000 end = max(snp_poses) // 1000 + 1 - with NamedTemporaryFile("w") as f: - query = [ - "smr", - "--beqtl-summary", - base_filepath, - "--query", - str(thresh), - "--snp-chr", - str(chr), - "--from-snp-kb", - str(start), - "--to-snp-kb", - str(end), - "--out", - f.name, - ] - - subprocess.run(query, check=True) - - res = pd.read_csv(f"{f.name}.txt", sep="\t") + query_result = run_smr_query( + query_path=base_filepath, chr=chr, thresh=thresh, start=start, end=end + ) - res.sort_values(by="BP", inplace=True) - res["full_snp"] = res.apply( - lambda x: f"chr{str(x['Chr'])}" + query_result["full_snp"] = query_result.apply( + lambda df: f"chr{str(df['Chr'])}" + "_" - + str(x["BP"]) + + str(df["BP"]) + "_" - + x["A1"] + + df["A1"] + "_" - + x["A2"], + + df["A2"], axis=1, ) - filtered = res[res["full_snp"].isin(snps)] + filtered = query_result[query_result["full_snp"].isin(snps)] return filtered diff --git a/tests/utils/test_query_smr.py b/tests/utils/test_query_smr.py new file mode 100644 index 0000000..9a3cae3 --- /dev/null +++ b/tests/utils/test_query_smr.py @@ -0,0 +1,99 @@ +from unittest.mock import patch, Mock + +import pandas as pd + +from app.utils.smr import query_smr + +mock_result = pd.DataFrame( + [ + [ + "chr1:982513", + 1, + 982513, + "T", + "C", + 0.074442, + "cg24669183", + 1, + 534242, + pd.NA, + "N", + 0.127188, + 0.058496, + 0.029684, + ], + [ + "chr1:982513", + 1, + 982513, + "T", + "C", + 0.074442, + "cg12726839", + 1, + 845311, + pd.NA, + "N", + 0.180720, + 0.058765, + 0.002103, + ], + [ + "chr1:982513", + 1, + 982513, + "A", + "T", + 0.074442, + "cg12726839", + 1, + 845311, + pd.NA, + "N", + 0.180720, + 0.058765, + 0.002103, + ], + ], + columns=[ + "SNP", + "Chr", + "BP", + "A1", + "A2", + "Freq", + "Probe", + "Probe_Chr", + "Probe_bp", + "Gene", + "Orientation", + "b", + "SE", + "p", + ], +) + + +@patch("app.utils.smr.run_smr_query", return_value=mock_result) +def test_query_smr(mock: Mock): + """Test the query function with mock data, since smr files are not committed to source, + ensuring that filtering and query construction functions are correst. + """ + chr = 1 + snps = ["chr1_982513_T_C"] + dataset = "EUR" + thresh = 1 + + res = query_smr(chr, snps, dataset, thresh) + + assert len(res) == 2 + + assert len(res["full_snp"].isin(snps)) == 2 + + mock.assert_called_once_with( + query_path="/code/data/smr_mqtl/EUR/EUR_chr1", + thresh=1, + chr=1, + start=982, + end=983, + ) From cd0778181be91ced67207ab6c3fa30220816739f Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Mon, 21 Jul 2025 11:44:19 -0400 Subject: [PATCH 4/7] add assembly to smr query --- app/utils/smr.py | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/app/utils/smr.py b/app/utils/smr.py index 461b9a2..6a78bac 100644 --- a/app/utils/smr.py +++ b/app/utils/smr.py @@ -2,7 +2,7 @@ import re import subprocess from tempfile import NamedTemporaryFile -from typing import List, TypedDict +from typing import List, Literal, TypedDict import pandas as pd @@ -11,45 +11,55 @@ class SMRDataset(TypedDict): + assembly: Literal["hg19", "hg38"] base_filename: str by_chr: bool -datasets: dict[str, SMRDataset] = { +smr_datasets: dict[str, SMRDataset] = { "Brain-mMeta": { + "assembly": "hg38", "by_chr": False, "base_filename": "Brain-mMeta", }, "EAS": { + "assembly": "hg38", "by_chr": True, "base_filename": "EAS", }, "EUR": { + "assembly": "hg38", "by_chr": True, "base_filename": "EUR", }, "Hannon et al. Blood dataset1": { + "assembly": "hg19", "by_chr": False, "base_filename": "Aberdeen_Blood", }, "Hannon et al. Blood dataset2": { + "assembly": "hg19", "by_chr": False, "base_filename": "UCL_Blood", }, "Hannon et al. FetalBrain": { + "assembly": "hg19", "by_chr": False, "base_filename": "FB_Brain", }, # TODO: confirm "LBC_BSGS_meta": { + "assembly": "hg19", "by_chr": True, "base_filename": "bl_mqtl", }, "LBC_BSGS_meta_lite": { + "assembly": "hg19", "by_chr": True, "base_filename": "bl_mqtl_lite", }, "US_mQTLS_SMR_format": { + "assembly": "hg19", "by_chr": False, "base_filename": "US_Blood", }, @@ -97,7 +107,11 @@ def run_smr_query( def query_smr( - chr: int, snps: List[str], dataset: str, thresh: float = 5.0e-8 + chr: int, + snps: List[str], + dataset: str, + thresh: float = 5.0e-8, + assembly: Literal["hg19", "hg38"] = "hg38", ) -> pd.DataFrame: """Query mqtl data in smr format @@ -109,18 +123,27 @@ def query_smr( :type dataset: str :param thresh: The p-value threshold, defaults to 5e-8 :type thresh: float + :param assembly: The genome assembly to use, defaults to "hg38" + :type assembly: Literal["hg19", "hg38"] :raises FileNotFoundError: If the dataset does not exist + :raises ValueError: If the requested assembly does not match the dataset assembly :return: The SNPs and pvalues as a dataframe with the following columns: 'SNP', 'Chr', 'BP', 'A1', 'A2', 'Freq', 'Probe', 'Probe_Chr', 'Probe_bp', 'Gene', 'Orientation', 'b', 'SE', 'p', 'full_snp' Note that 'full_snp' is a combined column that takes the same format as those in ``snps`` :rtype: pd.DataFrame """ - if dataset not in datasets.keys(): + if dataset not in smr_datasets.keys(): raise FileNotFoundError(f"Dataset {dataset} does not exist!") + + if smr_datasets[dataset]["assembly"] != assembly: + raise ValueError( + f"Dataset {dataset} uses {smr_datasets[dataset]['assembly']} but {assembly} was requested!" + ) + dataset_dir = os.path.join(data_dir, dataset) base_filepath = os.path.join(dataset_dir, dataset) - if datasets[dataset]["by_chr"]: + if smr_datasets[dataset]["by_chr"]: base_filepath = f"{base_filepath}_chr{chr}" regex = r"_(\d+)_" @@ -131,7 +154,11 @@ def query_smr( end = max(snp_poses) // 1000 + 1 query_result = run_smr_query( - query_path=base_filepath, chr=chr, thresh=thresh, start=start, end=end + query_path=base_filepath, + chr=chr, + thresh=thresh, + start=start, + end=end, ) query_result["full_snp"] = query_result.apply( From e32b1bfd02113c5b735e0ae14412ff28e423992d Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:57:06 -0400 Subject: [PATCH 5/7] confirm lbc_bsgs filename --- app/utils/smr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/utils/smr.py b/app/utils/smr.py index 6a78bac..ab6395b 100644 --- a/app/utils/smr.py +++ b/app/utils/smr.py @@ -47,7 +47,6 @@ class SMRDataset(TypedDict): "by_chr": False, "base_filename": "FB_Brain", }, - # TODO: confirm "LBC_BSGS_meta": { "assembly": "hg19", "by_chr": True, From 92e8b0e68f356473f838815477f1bbb915c80069 Mon Sep 17 00:00:00 2001 From: cklamann <12862284+cklamann@users.noreply.github.com> Date: Tue, 26 Aug 2025 13:13:53 -0400 Subject: [PATCH 6/7] add assembly to smr test --- tests/utils/test_query_smr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils/test_query_smr.py b/tests/utils/test_query_smr.py index 9a3cae3..34a113a 100644 --- a/tests/utils/test_query_smr.py +++ b/tests/utils/test_query_smr.py @@ -84,7 +84,7 @@ def test_query_smr(mock: Mock): dataset = "EUR" thresh = 1 - res = query_smr(chr, snps, dataset, thresh) + res = query_smr(chr, snps, dataset, thresh, "hg38") assert len(res) == 2 From 42de7bc990c446da015f7c40c9d0ec885f194517 Mon Sep 17 00:00:00 2001 From: Mackenzie Ian Frew Date: Wed, 14 Jan 2026 15:12:52 -0500 Subject: [PATCH 7/7] Move SMR fetch to helper script --- Dockerfile | 10 +--------- setup/setup-smr.sh | 7 +++++++ 2 files changed, 8 insertions(+), 9 deletions(-) create mode 100644 setup/setup-smr.sh diff --git a/Dockerfile b/Dockerfile index 3dbf11e..b4e1643 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,8 +21,7 @@ RUN apt-get update && \ r-base-dev \ xauth \ vim \ - wget \ - zip + wget # install plink RUN wget -O /tmp/plink.zip https://s3.amazonaws.com/plink1-assets/plink_linux_x86_64_20241022.zip \ @@ -39,13 +38,6 @@ RUN curl -f -L -O https://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/liftOve mkdir /usr/local/share/liftOver && \ mv hg38ToHg19.over.chain.gz hg19ToHg38.over.chain.gz /usr/local/share/liftOver/ -# install smr for mqtl querying -RUN wget https://yanglab.westlake.edu.cn/software/smr/download/smr-1.3.2-linux-x86_64.zip \ - && unzip smr-1.3.2-linux-x86_64.zip \ - && mv smr-1.3.2-linux-x86_64/smr /usr/local/bin/ \ - && rm smr-1.3.2-linux-x86_64.zip - - # Install Poetry # https://github.com/python-poetry/poetry/issues/6397#issuecomment-1236327500 ENV POETRY_HOME=/opt/poetry diff --git a/setup/setup-smr.sh b/setup/setup-smr.sh new file mode 100644 index 0000000..656fd2f --- /dev/null +++ b/setup/setup-smr.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +wget https://yanglab.westlake.edu.cn/software/smr/download/smr-1.3.2-linux-x86_64.zip +unzip smr-1.3.2-linux-x86_64.zip +mv smr-1.3.2-linux-x86_64/smr misc/ +rm smr-1.3.2-linux-x86_64.zip