diff --git a/app/colocalization/payload.py b/app/colocalization/payload.py
index b0b55d6..34f71ad 100644
--- a/app/colocalization/payload.py
+++ b/app/colocalization/payload.py
@@ -15,8 +15,8 @@
 from app.utils.gtex import get_gtex_snp_matches, gene_names
 from app.utils import (
     get_session_filepath,
-    parse_region_text,
 )
+from app.utils.helpers import parse_region_text
 
 
 class SessionFiles:
diff --git a/app/colocalization/stages/read_gwas_file.py b/app/colocalization/stages/read_gwas_file.py
index d5fa82c..88f233b 100644
--- a/app/colocalization/stages/read_gwas_file.py
+++ b/app/colocalization/stages/read_gwas_file.py
@@ -229,11 +229,10 @@ def _infer_gwas_columns(
         self, payload: SessionPayload, gwas_data: pd.DataFrame
     ) -> pd.DataFrame:
         """
-        Given that the user wishes to infer the variant information using dbSNP,
-        validate and rename the columns in the DataFrame accordingly, if possible.
-
-        This assumes that the only columns provided in the GWAS file are "SNP" and "P", where "SNP" contains
-        snp IDs that can be looked up using dbSNP for variant information.
+        Look up CHROM/POS/REF/ALT from dbSNP based on information in the SNP column,
+        then validate and rename the columns in the DataFrame accordingly, if possible.
+        This assumes that "SNP" contains SNP IDs that can be looked up using dbSNP for variant information.
+        If the dataframe has columns already named CHROM, POS, REF, or ALT, they will be overwritten.
 
         Prerequisite: gwas_data columns are set as default values (eg. "SNP", "P", etc.)
         """
@@ -246,17 +245,25 @@ def _infer_gwas_columns(
         regionstr = payload.get_locus()
 
         variant_list = standardize_snps(list(gwas_data["SNP"]), regionstr, coordinate)
+
         if all(x == "." for x in variant_list):
             raise InvalidUsage(
                 f"None of the variants provided could be mapped to {regionstr}!",
                 status_code=410,
             )
+
         var_df = decompose_variant_list(variant_list)
-        gwas_data = pd.concat([var_df, gwas_data], axis=1)  # add CHROM, POS, REF, ALT
-        gwas_data = gwas_data.loc[
-            [str(x) != "." for x in list(gwas_data["CHROM"])]
-        ].copy()
-        gwas_data.reset_index(drop=True, inplace=True)
+
+        gwas_data = gwas_data.reset_index(drop=True).join(
+            var_df.reset_index(drop=True), lsuffix="_orig"
+        )
+
+        gwas_data.drop(
+            [c for c in gwas_data.columns if c.endswith("_orig")], axis=1, inplace=True
+        )
+
+        gwas_data = gwas_data.loc[gwas_data["CHROM"] != "."]
+
         return gwas_data
 
     def _validate_gwas_file(
diff --git a/app/colocalization/stages/report_gtex_data.py b/app/colocalization/stages/report_gtex_data.py
index 4ac79c1..d1e0dc0 100644
--- a/app/colocalization/stages/report_gtex_data.py
+++ b/app/colocalization/stages/report_gtex_data.py
@@ -51,7 +51,7 @@ def invoke(self, payload: SessionPayload) -> SessionPayload:
         elif gtex_version == "V10":
             gene = "ENSG00000174502.18"
 
-        snp_list = [asnp.split(";")[0] for asnp in payload.gwas_data_kept["SNP"]]
+        snp_list = payload.std_snp_list[payload.gwas_indices_kept]
 
         if len(gtex_tissues) > 0:
             for tissue in tqdm(gtex_tissues):
diff --git a/app/routes.py b/app/routes.py
index b467cb1..b34afcb 100644
--- a/app/routes.py
+++ b/app/routes.py
@@ -9,7 +9,6 @@
 import subprocess
 from datetime import datetime
 import re
-import pysam
 import glob
 import tarfile
 from typing import Dict, Optional, Tuple, List, Union
@@ -34,6 +33,7 @@
 from app.utils.gtex import get_gtex, get_gtex_data
 from app.utils.errors import InvalidUsage, ServerError
 from app.utils.numpy_encoder import NumpyEncoder
+from app.utils.helpers import parse_region_text
 
 from app import ext, mongo
 from app.cache import cache
@@ -148,71 +148,6 @@ def __getattribute__(self, name):
 valid_populations = ["EUR", "AFR", "EAS", "SAS", "AMR", "ASN", "NFE"]
 
 
-####################################
-# Helper functions
-####################################
-def parseRegionText(regiontext, build):
-    if build not in ["hg19", "hg38"]:
-        raise InvalidUsage(f"Unrecognized build: {build}", status_code=410)
-    regiontext = regiontext.strip().replace(" ", "").replace(",", "").replace("chr", "")
-    if not re.search(
-        r"^\d+:\d+-\d+$", regiontext.replace("X", "23").replace("x", "23")
-    ):
-        raise InvalidUsage(
-            f"Invalid coordinate format. {regiontext} e.g. 1:205,000,000-206,000,000",
-            status_code=410,
-        )
-    chrom = regiontext.split(":")[0].lower().replace("chr", "").upper()
-    pos = regiontext.split(":")[1]
-    startbp = pos.split("-")[0].replace(",", "")
-    endbp = pos.split("-")[1].replace(",", "")
-    chromLengths = pd.read_csv(
-        os.path.join(app.config["LF_DATA_FOLDER"], build + "_chrom_lengths.txt"),
-        sep="\t",
-        encoding="utf-8",
-    )
-    chromLengths.set_index("sequence", inplace=True)
-    if chrom in ["X", "x"] or chrom == "23":
-        chrom = 23
-        maxChromLength = chromLengths.loc["chrX", "length"]
-        try:
-            startbp = int(startbp)
-            endbp = int(endbp)
-        except Exception:
-            raise InvalidUsage(
-                f"Invalid coordinates input: {regiontext}", status_code=410
-            )
-    else:
-        try:
-            chrom = int(chrom)
-            if chrom == 23:
-                maxChromLength = chromLengths.loc["chrX", "length"]
-            else:
-                maxChromLength = chromLengths.loc["chr" + str(chrom), "length"]
-            startbp = int(startbp)
-            endbp = int(endbp)
-        except Exception:
-            raise InvalidUsage(
-                f"Invalid coordinates input {regiontext}", status_code=410
-            )
-    if chrom < 1 or chrom > 23:
-        raise InvalidUsage("Chromosome input must be between 1 and 23", status_code=410)
-    elif startbp > endbp:
-        raise InvalidUsage(
-            "Starting chromosome basepair position is greater than ending basepair position",
-            status_code=410,
-        )
-    elif startbp > maxChromLength or endbp > maxChromLength:
-        raise InvalidUsage("Start or end coordinates are out of range", status_code=410)
-    elif (endbp - startbp) > genomicWindowLimit:
-        raise InvalidUsage(
-            f"Entered region size is larger than {genomicWindowLimit / 10**6} Mbp",
-            status_code=410,
-        )
-    else:
-        return chrom, startbp, endbp
-
-
 def parseSNP(snp_text):
     """
     Extract chrom, position from string of format "chr{chrom}:{position}".
@@ -385,85 +320,6 @@ def buildSNPlist(df, chromcol, poscol, refcol, altcol, build):
     return snplist
 
 
-def fetchSNV(chrom, bp, ref, build):
-    variantid = "."
-
-    if ref is None or ref == ".":
-        ref = ""
-
-    # Ensure valid region:
-    try:
-        regiontxt = str(chrom) + ":" + str(bp) + "-" + str(int(bp) + 1)
-    except Exception:
-        raise InvalidUsage(f"Invalid input for {str(chrom):str(bp)}")
-    chrom, startbp, endbp = parseRegionText(regiontxt, build)
-    chrom = str(chrom).replace("chr", "").replace("23", "X")
-
-    # Load dbSNP151 SNP names from region indicated
-    dbsnp_filepath = ""
-    if build.lower() in ["hg38", "grch38"]:
-        suffix = "b38"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh38p7", "All_20180418.vcf.gz"
-        )
-    else:
-        suffix = "b37"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh37p13", "All_20180423.vcf.gz"
-        )
-
-    # Load variant info from dbSNP151
-    tbx = pysam.TabixFile(dbsnp_filepath)
-    varlist = []
-    for row in tbx.fetch(str(chrom), bp - 1, bp):
-        rowlist = str(row).split("\t")
-        chromi = rowlist[0].replace("chr", "")
-        posi = rowlist[1]
-        refi = rowlist[3]
-        alti = rowlist[4]
-        varstr = "_".join([chromi, posi, refi, alti, suffix])
-        varlist.append(varstr)
-
-    # Check if there is a match to an SNV with the provided info
-    if len(varlist) == 1:
-        variantid = varstr
-    elif len(varlist) > 1 and ref != "":
-        for v in varlist:
-            if v.split("_")[2] == ref:
-                variantid = v
-                break
-    return variantid
-
-
-def decomposeVariant(variant_list):
-    """
-    Parameters
-    ----------
-    variantid_list : list
-        list of str standardized variants in chr_pos_ref_alt_build format
-
-    Returns
-    -------
-    A pandas.dataframe with chromosome, pos, reference and alternate alleles columns
-    """
-    chromlist = [x.split("_")[0] if len(x.split("_")) == 5 else x for x in variant_list]
-    chromlist = [int(x) if x not in ["X", "."] else x for x in chromlist]
-    poslist = [
-        int(x.split("_")[1]) if len(x.split("_")) == 5 else x for x in variant_list
-    ]
-    reflist = [x.split("_")[2] if len(x.split("_")) == 5 else x for x in variant_list]
-    altlist = [x.split("_")[3] if len(x.split("_")) == 5 else x for x in variant_list]
-    df = pd.DataFrame(
-        {
-            DEFAULT_FORM_VALUE_DICT[FormID.CHROM_COL]: chromlist,
-            DEFAULT_FORM_VALUE_DICT[FormID.POS_COL]: poslist,
-            DEFAULT_FORM_VALUE_DICT[FormID.REF_COL]: reflist,
-            DEFAULT_FORM_VALUE_DICT[FormID.ALT_COL]: altlist,
-        }
-    )
-    return df
-
-
 def addVariantID(gwas_data, chromcol, poscol, refcol, altcol, build="hg19"):
     """
 
@@ -508,7 +364,7 @@ def subsetLocus(build, summaryStats, regiontext, chromcol, poscol, pcol):
     if regiontext == "":
         regiontext = DEFAULT_FORM_VALUE_DICT[FormID.LOCUS]
     #    print('Parsing region text')
-    chrom, startbp, endbp = parseRegionText(regiontext, build)
+    chrom, startbp, endbp = parse_region_text(regiontext, build)
     summaryStats = summaryStats.loc[
         [str(x) != "." for x in list(summaryStats[chromcol])]
     ].copy()
@@ -1147,7 +1003,7 @@ def get_multiple_regions(regionstext: str, build: str) -> List[Tuple[int, int, i
     regionstext = regionstext.splitlines()
     regions = []
     for regiontext in regionstext:
-        chrom, start, end = parseRegionText(regiontext, build)
+        chrom, start, end = parse_region_text(regiontext, build)
         regions.append((chrom, start, end))
     return regions
 
@@ -1729,7 +1585,9 @@ def setbasedtest():
 
     combine_lds = False
 
-    snps_used_in_test = []  # List of list of positions, one list per test; position is (chrom, bp) tuple
+    snps_used_in_test = (
+        []
+    )  # List of list of positions, one list per test; position is (chrom, bp) tuple
 
     # TODO: need to determine used SNPs AFTER tests are performed
 
diff --git a/app/utils/__init__.py b/app/utils/__init__.py
index 8e8456b..35438c1 100644
--- a/app/utils/__init__.py
+++ b/app/utils/__init__.py
@@ -15,6 +15,7 @@
 from werkzeug.datastructures import FileStorage
 
 from app.utils.errors import InvalidUsage
+from app.utils.helpers import parse_region_text
 from app.utils.variants import get_variants_by_region
 
 GENOMIC_WINDOW_LIMIT = 2e6
@@ -107,6 +108,36 @@ def decompose_variant_list(variant_list):
     return df
 
 
+def get_dbnsp_snps(chrom: str, startbp: int, endbp: int, build: str) -> List[str]:
+    """Fetch SNPs from dbSNP for a certain region and build.
+
+    :return: The SNPs as a list of strings, the first five values are CHROM (no prefix),
+        POS, ID, REF, ALT
+    :rtype: List[str]
+    """
+
+    if build.lower() in ["hg38", "grch38"]:
+        dbsnp_filepath = os.path.join(
+            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh38p7", "All_20180418.vcf.gz"
+        )
+    else:
+        dbsnp_filepath = os.path.join(
+            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh37p13", "All_20180423.vcf.gz"
+        )
+
+    # Load dbSNP file
+    tbx = pysam.TabixFile(dbsnp_filepath)  # type: ignore
+
+    result = []
+
+    for row in tbx.fetch(str(chrom), startbp, endbp):
+        rowlist = row.split("\t")
+        rowlist[0] = rowlist[0].replace("chr", "")
+        result.append(rowlist)
+
+    return result
+
+
 def standardize_snps(variantlist, regiontxt, build):
     """
     Input: Variant names in any of these formats: rsid, chrom_pos_ref_alt, chrom:pos_ref_alt, chrom:pos_ref_alt_b37/b38
@@ -126,60 +157,33 @@ def standardize_snps(variantlist, regiontxt, build):
 
     # Ensure valid region:
     chrom, startbp, endbp = parse_region_text(regiontxt, build)
+
+    variants_df = get_variants_by_region(int(startbp), int(endbp), str(chrom), "V10")
+
+    rsid_colname = (
+        "rs_id_dbSNP147_GRCh37p13"
+        if build.lower() in ["hg38", "grch38"]
+        else "rs_id_dbSNP151_GRCh38p7"
+    )
+
+    suffix = "b38" if build.lower() in ["hg38", "grch38"] else "b37"
+
     chrom = str(chrom).replace("23", "X")
+
+    dbsnp_snps = get_dbnsp_snps(chrom, startbp, endbp, build)
+
     if build.lower() in ["hg19", "grch37"]:
         raise InvalidUsage(
             "Cannot standardize SNPs to hg19; GTEx V7 is no longer available."
         )
 
-    # Lookup variants in GTEx V10 db
-    variants_df = get_variants_by_region(int(startbp), int(endbp), str(chrom), "V10")
-
-    # Load dbSNP151 SNP names from region indicated
-    dbsnp_filepath = ""
-    suffix = "b37"
-    if build.lower() in ["hg38", "grch38"]:
-        rsid_colname = "rs_id_dbSNP151_GRCh38p7"
-        suffix = "b38"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh38p7", "All_20180418.vcf.gz"
-        )
-    else:
-        rsid_colname = "rs_id_dbSNP147_GRCh37p13"
-        suffix = "b37"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh37p13", "All_20180423.vcf.gz"
-        )
-
-    # Load dbSNP file
-    # delayeddf = delayed(pd.read_csv)(dbsnp_filepath,skiprows=getNumHeaderLines(dbsnp_filepath),sep='\t')
-    # dbsnp = dd.from_delayed(delayeddf)
-
-    tbx = pysam.TabixFile(dbsnp_filepath)  # type: ignore
-    #    print('Compiling list of known variants in the region from dbSNP151')
-    chromcol = []
-    poscol = []
-    idcol = []
-    refcol = []
-    altcol = []
-    variantid = []  # in chr_pos_ref_alt_build format
     rsids = dict(
         {}
     )  # a multi-allelic variant rsid (key) can be represented in several variantid formats (values)
-    for row in tbx.fetch(str(chrom), startbp, endbp):
-        rowlist = str(row).split("\t")
-        chromi = rowlist[0].replace("chr", "")
-        posi = rowlist[1]
-        idi = rowlist[2]
-        refi = rowlist[3]
-        alti = rowlist[4]
+    for row in dbsnp_snps:
+        chromi, posi, idi, refi, alti = row[:5]
         varstr = "_".join([chromi, posi, refi, alti, suffix])
-        chromcol.append(chromi)
-        poscol.append(posi)
-        idcol.append(idi)
-        refcol.append(refi)
-        altcol.append(alti)
-        variantid.append(varstr)
+
         rsids[idi] = [varstr]
         altalleles = alti.split(
             ","
@@ -191,7 +195,6 @@ def standardize_snps(variantlist, regiontxt, build):
                 varstr = "_".join([chromi, posi, refi, altalleles[i + 1], suffix])
                 rsids[idi].append(varstr)
 
-    #    print('Cleaning and mapping list of variants')
     variantlist = [
         asnp.split(";")[0].replace(":", "_").replace(".", "") for asnp in variantlist
     ]  # cleaning up the SNP names a bit
@@ -200,9 +203,7 @@ def standardize_snps(variantlist, regiontxt, build):
         if variant == "":
             stdvariantlist.append(".")
             continue
-        variantstr = variant.replace("chr", "")
-        if re.search("^23_", variantstr):
-            variantstr = variantstr.replace("23_", "X_", 1)
+        variantstr = variant.replace("chr23_", "chrX_").replace("chr", "")
         if variantstr.startswith("rs"):
             try:
                 # Here's the difference from the first function version (we look at GTEx first)
@@ -223,7 +224,7 @@ def standardize_snps(variantlist, regiontxt, build):
             strlist = variantstr.split("_")
             strlist = list(filter(None, strlist))  # remove empty strings
             try:
-                achr, astart, aend = parse_region_text(
+                achr, astart, _ = parse_region_text(
                     strlist[0] + ":" + strlist[1] + "-" + str(int(strlist[1]) + 1),
                     build,
                 )
@@ -269,68 +270,6 @@ def standardize_snps(variantlist, regiontxt, build):
     return stdvariantlist
 
 
-def parse_region_text(regiontext, build):
-    if build not in ["hg19", "hg38"]:
-        raise InvalidUsage(f"Unrecognized build: {build}", status_code=410)
-    regiontext = regiontext.strip().replace(" ", "").replace(",", "").replace("chr", "")
-    if not re.search(
-        r"^\d+:\d+-\d+$", regiontext.replace("X", "23").replace("x", "23")
-    ):
-        raise InvalidUsage(
-            f"Invalid coordinate format. '{regiontext}' e.g. 1:205,000,000-206,000,000",
-            status_code=410,
-        )
-    chrom = regiontext.split(":")[0].lower().replace("chr", "").upper()
-    pos = regiontext.split(":")[1]
-    startbp = pos.split("-")[0].replace(",", "")
-    endbp = pos.split("-")[1].replace(",", "")
-    chromLengths = pd.read_csv(
-        os.path.join(app.config["LF_DATA_FOLDER"], build + "_chrom_lengths.txt"),
-        sep="\t",
-        encoding="utf-8",
-    )
-    chromLengths.set_index("sequence", inplace=True)
-    if chrom in ["X", "x"] or chrom == "23":
-        chrom = 23
-        maxChromLength = chromLengths.loc["chrX", "length"]
-        try:
-            startbp = int(startbp)
-            endbp = int(endbp)
-        except Exception:
-            raise InvalidUsage(
-                f"Invalid coordinates input: '{regiontext}'", status_code=410
-            )
-    else:
-        try:
-            chrom = int(chrom)
-            if chrom == 23:
-                maxChromLength = chromLengths.loc["chrX", "length"]
-            else:
-                maxChromLength = chromLengths.loc["chr" + str(chrom), "length"]
-            startbp = int(startbp)
-            endbp = int(endbp)
-        except Exception:
-            raise InvalidUsage(
-                f"Invalid coordinates input '{regiontext}'", status_code=410
-            )
-    if chrom < 1 or chrom > 23:
-        raise InvalidUsage("Chromosome input must be between 1 and 23", status_code=410)
-    elif startbp > endbp:
-        raise InvalidUsage(
-            "Starting chromosome basepair position is greater than ending basepair position",
-            status_code=410,
-        )
-    elif startbp > maxChromLength or endbp > maxChromLength:
-        raise InvalidUsage("Start or end coordinates are out of range", status_code=410)
-    elif (endbp - startbp) > GENOMIC_WINDOW_LIMIT:
-        raise InvalidUsage(
-            f"Entered region size is larger than {GENOMIC_WINDOW_LIMIT / 1e6} Mbp",
-            status_code=410,
-        )
-    else:
-        return chrom, startbp, endbp
-
-
 def fetch_snv(chrom, bp, ref, build):
     variantid = "."
 
@@ -342,31 +281,15 @@ def fetch_snv(chrom, bp, ref, build):
         regiontxt = str(chrom) + ":" + str(bp) + "-" + str(int(bp) + 1)
     except Exception:
         raise InvalidUsage(f"Invalid input for {str(chrom):str(bp)}")
-    chrom, startbp, endbp = parse_region_text(regiontxt, build)
+    chrom = parse_region_text(regiontxt, build)[0]
     chrom = str(chrom).replace("chr", "").replace("23", "X")
+    suffix = "b37" if build.lower() in ["hg38", "grch38"] else "b37"
 
-    # Load dbSNP151 SNP names from region indicated
-    dbsnp_filepath = ""
-    if build.lower() in ["hg38", "grch38"]:
-        suffix = "b38"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh38p7", "All_20180418.vcf.gz"
-        )
-    else:
-        suffix = "b37"
-        dbsnp_filepath = os.path.join(
-            app.config["LF_DATA_FOLDER"], "dbSNP151", "GRCh37p13", "All_20180423.vcf.gz"
-        )
+    dbsnps = get_dbnsp_snps(chrom, bp - 1, bp, build)
 
-    # Load variant info from dbSNP151
-    tbx = pysam.TabixFile(dbsnp_filepath)  # type: ignore
     varlist = []
-    for row in tbx.fetch(str(chrom), bp - 1, bp):
-        rowlist = str(row).split("\t")
-        chromi = rowlist[0].replace("chr", "")
-        posi = rowlist[1]
-        refi = rowlist[3]
-        alti = rowlist[4]
+    for row in dbsnps:
+        chromi, posi, _, refi, alti = row[:5]
         varstr = "_".join([chromi, posi, refi, alti, suffix])
         varlist.append(varstr)
 
diff --git a/app/utils/gencode.py b/app/utils/gencode.py
index 31e26b1..d3a5c7f 100644
--- a/app/utils/gencode.py
+++ b/app/utils/gencode.py
@@ -6,7 +6,7 @@
 from typing import List
 
 import pandas as pd
-from app.utils import parse_region_text
+from app.utils.helpers import parse_region_text
 
 # /app/utils/gencode.py -> /data/
 DATA_FOLDER = os.path.abspath(
diff --git a/app/utils/helpers.py b/app/utils/helpers.py
index 73a8529..6da48e3 100644
--- a/app/utils/helpers.py
+++ b/app/utils/helpers.py
@@ -1,7 +1,14 @@
+import os
+import re
 from typing import List
 
+from flask import current_app as app
 import pandas as pd
 
+from app.utils.errors import InvalidUsage
+
+GENOMIC_WINDOW_LIMIT = 2e6
+
 
 def validate_chromosome(
     chr: str | int, prefix: str | None = "chr", x_y_numeric: bool = False
@@ -109,3 +116,65 @@ def adjust_snp_column(
         )
 
     return snps_df
+
+
+def parse_region_text(regiontext, build):
+    if build not in ["hg19", "hg38"]:
+        raise InvalidUsage(f"Unrecognized build: {build}", status_code=410)
+    regiontext = regiontext.strip().replace(" ", "").replace(",", "").replace("chr", "")
+    if not re.search(
+        r"^\d+:\d+-\d+$", regiontext.replace("X", "23").replace("x", "23")
+    ):
+        raise InvalidUsage(
+            f"Invalid coordinate format. '{regiontext}' e.g. 1:205,000,000-206,000,000",
+            status_code=410,
+        )
+    chrom = regiontext.split(":")[0].lower().replace("chr", "").upper()
+    pos = regiontext.split(":")[1]
+    startbp = pos.split("-")[0].replace(",", "")
+    endbp = pos.split("-")[1].replace(",", "")
+    chromLengths = pd.read_csv(
+        os.path.join(app.config["LF_DATA_FOLDER"], build + "_chrom_lengths.txt"),
+        sep="\t",
+        encoding="utf-8",
+    )
+    chromLengths.set_index("sequence", inplace=True)
+    if chrom in ["X", "x"] or chrom == "23":
+        chrom = 23
+        maxChromLength = chromLengths.loc["chrX", "length"]
+        try:
+            startbp = int(startbp)
+            endbp = int(endbp)
+        except Exception:
+            raise InvalidUsage(
+                f"Invalid coordinates input: '{regiontext}'", status_code=410
+            )
+    else:
+        try:
+            chrom = int(chrom)
+            if chrom == 23:
+                maxChromLength = chromLengths.loc["chrX", "length"]
+            else:
+                maxChromLength = chromLengths.loc["chr" + str(chrom), "length"]
+            startbp = int(startbp)
+            endbp = int(endbp)
+        except Exception:
+            raise InvalidUsage(
+                f"Invalid coordinates input '{regiontext}'", status_code=410
+            )
+    if chrom < 1 or chrom > 23:
+        raise InvalidUsage("Chromosome input must be between 1 and 23", status_code=410)
+    elif startbp > endbp:
+        raise InvalidUsage(
+            "Starting chromosome basepair position is greater than ending basepair position",
+            status_code=410,
+        )
+    elif startbp > maxChromLength or endbp > maxChromLength:
+        raise InvalidUsage("Start or end coordinates are out of range", status_code=410)
+    elif (endbp - startbp) > GENOMIC_WINDOW_LIMIT:
+        raise InvalidUsage(
+            f"Entered region size is larger than {GENOMIC_WINDOW_LIMIT/1e6} Mbp",
+            status_code=410,
+        )
+    else:
+        return chrom, startbp, endbp