From 694f70fff2a1a2a64c9097c13b5f1cc337a03517 Mon Sep 17 00:00:00 2001
From: nbtm-sh <nathan@nbti.net>
Date: Mon, 30 Jun 2025 12:47:18 +1000
Subject: [PATCH 1/5] feat(help): Remove doxx

---
 samplesheetutils/binaries/create_samplesheet.py | 2 +-
 samplesheetutils/binaries/sample_name.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samplesheetutils/binaries/create_samplesheet.py b/samplesheetutils/binaries/create_samplesheet.py
index cfe5396..8a519de 100644
--- a/samplesheetutils/binaries/create_samplesheet.py
+++ b/samplesheetutils/binaries/create_samplesheet.py
@@ -26,7 +26,7 @@ def create_samplesheet():
     parser = argparse.ArgumentParser(
         prog="Create Samplesheet",
         description="Utility to create a samplesheet from directory, or AA string",
-        epilog="Written by Nathan Glades <n.glades@unsw.edu.au>")
+        epilog="Written by N.G @ unsw.edu.au")
 
     parser.add_argument('-a', '--aa-string', help='Single amino acid string', dest='aa_string')
     parser.add_argument('-m', '--msa-dir', help='Directory containing corresponding MSA files for samples', dest='msa_dir')
diff --git a/samplesheetutils/binaries/sample_name.py b/samplesheetutils/binaries/sample_name.py
index 5da0d7f..51bf822 100644
--- a/samplesheetutils/binaries/sample_name.py
+++ b/samplesheetutils/binaries/sample_name.py
@@ -15,7 +15,7 @@ def sample_name():
     parser = argparse.ArgumentParser(
         prog="Read sample name(s) from FASTA",
         description="Utility to read the sample name(s) from a FASTA file and print them to stdout",
-        epilog="Written by Nathan Glades <n.glades@unsw.edu.au>"
+        epilog="Written by N.G @ unsw.edu.au"
     )
     
     parser.add_argument('-i', '--index', help='Index of the sample you wish to output.\nIf unset, all sample names will be output. Acceptable inputs are an integer, -1 for the last sample, or a range (a:b)', default=None, dest='index')

From 3236839c28a3ee053fb518bd07eda5488c42f7c2 Mon Sep 17 00:00:00 2001
From: nbtm-sh <nathan@nbti.net>
Date: Mon, 30 Jun 2025 12:49:14 +1000
Subject: [PATCH 2/5] feat(help): Update affiliation

---
 samplesheetutils/binaries/create_samplesheet.py | 4 ++--
 samplesheetutils/binaries/sample_name.py        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/samplesheetutils/binaries/create_samplesheet.py b/samplesheetutils/binaries/create_samplesheet.py
index 8a519de..1b6bd44 100644
--- a/samplesheetutils/binaries/create_samplesheet.py
+++ b/samplesheetutils/binaries/create_samplesheet.py
@@ -19,14 +19,14 @@
 logging.basicConfig()
 
 def version():
-    print("")
+    print("samplesheet-utils 1.2.2")
 
 #if __name__ == "__main__":
 def create_samplesheet():
     parser = argparse.ArgumentParser(
         prog="Create Samplesheet",
         description="Utility to create a samplesheet from directory, or AA string",
-        epilog="Written by N.G @ unsw.edu.au")
+        epilog="Written by nbtm-sh @ unsw.edu.au")
 
     parser.add_argument('-a', '--aa-string', help='Single amino acid string', dest='aa_string')
     parser.add_argument('-m', '--msa-dir', help='Directory containing corresponding MSA files for samples', dest='msa_dir')
diff --git a/samplesheetutils/binaries/sample_name.py b/samplesheetutils/binaries/sample_name.py
index 51bf822..8777db5 100644
--- a/samplesheetutils/binaries/sample_name.py
+++ b/samplesheetutils/binaries/sample_name.py
@@ -15,7 +15,7 @@ def sample_name():
     parser = argparse.ArgumentParser(
         prog="Read sample name(s) from FASTA",
         description="Utility to read the sample name(s) from a FASTA file and print them to stdout",
-        epilog="Written by N.G @ unsw.edu.au"
+        epilog="Written by nbtm-sh @ unsw.edu.au"
     )
     
     parser.add_argument('-i', '--index', help='Index of the sample you wish to output.\nIf unset, all sample names will be output. Acceptable inputs are an integer, -1 for the last sample, or a range (a:b)', default=None, dest='index')

From 0617254bdfebd33d80c8689b54ca0e29873f05a6 Mon Sep 17 00:00:00 2001
From: nbtm-sh <nathan@nbti.net>
Date: Wed, 2 Jul 2025 13:07:37 +1000
Subject: [PATCH 3/5] feat(truncate-msa): Update truncation script

---
 samplesheetutils/binaries/truncate_msa.py | 106 ++++++++++++++++++++++
 1 file changed, 106 insertions(+)
 create mode 100644 samplesheetutils/binaries/truncate_msa.py

diff --git a/samplesheetutils/binaries/truncate_msa.py b/samplesheetutils/binaries/truncate_msa.py
new file mode 100644
index 0000000..999a7ab
--- /dev/null
+++ b/samplesheetutils/binaries/truncate_msa.py
@@ -0,0 +1,106 @@
+import argparse, tempfile, logging, os
+from samplesheetutils.utils.sample import *
+from samplesheetutils.utils.output import *
+from samplesheetutils.utils.fasta import *
+from samplesheetutils.utils.input import *
+from samplesheetutils.utils.alignment import *
+from samplesheetutils.utils.a3m import *
+
+# Set up logging
+logger = logging.getLogger(__name__)
+logging.basicConfig()
+
+def version():
+    print("1.2.2")
+
+#if __name__ == "__main__":
+def truncate_msa():
+    parser = argparse.ArgumentParser(
+        prog="Truncate MSA",
+        description="Utility for truncating MSAs for targeting a specified region.",
+        epilog="Written by nbtm-sh @ unsw.edu.au")
+
+    parser.add_argument('input_file', default='?', help='Path to input file')
+    parser.add_argument('region_start', default='?', help='The index of the first residue to target.')
+    parser.add_argument('region_end', default='?', help='The index of the final residue to target.')
+    parser.add_argument('-o', '--output', help='Path to output file', default='output.a3m', dest='output')
+    parser.add_argument('-i', '--in-place', help='Replace the target file with the modified output (in-place)', default=False, action='store_true', dest='in_place')
+    parser.add_argument('-r', '--inverse', help='Invert output (delete residues within the target region)', default=False, action='store_true', dest='inverse')
+    parser.add_argument('--version', help='Show version number', default=False, action='store_true', dest='version')
+    parser.add_argument('--debug', help='Show debug output', default=False, action='store_true', dest='debug')
+
+    args = parser.parse_args()
+
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+
+    if (args.version):
+        version()
+        exit(0)
+
+    if (args.debug):
+        version()
+
+    # Check that all arguments are valid
+    args_valid = 0
+    args_valid |= args.input_file == '?'
+    args_valid |= args.region_start == '?'
+    args_valid |= args.region_end == '?'
+
+    if args_valid:
+        logger.error("Please specify all input arguments. Use --help to display required arguments")
+        exit(1)
+
+    # Validate that an input was provided
+    logger.debug(f"Loading {args.input_file}...")
+    samples = []
+
+    # Try to load variables
+    try:
+        r_start = int(args.region_start)
+        r_end = int(args.region_end)
+    except ValueError:
+        logger.error("Please enter only real numbers for region_start and region_end")
+
+    logger.debug(f"Region Start: {r_start}, Region End: {r_end}")
+
+    try:
+        with open(args.input_file, "r") as fp:
+            logger.debug(f"Opened input file {args.input_file} for reading")
+            samples = read_a3m(
+                    fp,
+                    read_data=True
+                )
+            logger.debug(f"Imported {len(samples)} samples.")
+    except FileNotFoundError:
+        logger.error(f"Input file {args.input_file} does not exist")
+        exit(1)
+    except PermissionError:
+        logger.error(f"Input file {args.input_file} could not be opened due to a permission error")
+        exit(1)
+    
+    # Edit the a3m file and preserve the first entry
+    # First check to make sure that the file is longer than 1
+    if len(samples) == 1:
+        logger.error("The a3m file only cotnains one sample. The first sample of the a3m file is always preserved, so no changes to the file are needed")
+        exit(1)
+
+    for i, o in enumerate(samples):
+        # Skip the first entry
+        if i == 0:
+            continue
+
+        # Get a list of elements from the string
+        aln_l = [el for el in o.data]
+        # Edit the string
+        aln_sub = ['-' if (i > r_start and i < r_end) ^ args.inverse else k for i, k in enumerate(aln_l)]
+        samples[i].data = ''.join(aln_sub)
+
+    output_file = args.output if not args.in_place else args.input_file
+    logger.debug(f"Opening output file {output_file} for writing...")
+    with open(output_file, "w") as fp:
+        make_a3m(fp, samples)
+
+    

From 0ecf62ecdad4b9318469887165445bf91f0a6fba Mon Sep 17 00:00:00 2001
From: nbtm-sh <nathan@nbti.net>
Date: Wed, 2 Jul 2025 13:08:03 +1000
Subject: [PATCH 4/5] feat(truncate-msa): Update setup.py

---
 setup.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index eef4bc7..12d2219 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="samplesheetutils",
     version="1.2.2",
-    author="Nathan Glades",
+    author="nbtm-sh",
     author_email="n.glades@unsw.edu.au",
     packages=find_packages(),
     description="Collection of utilities for creating and transforming samplesheets and samples",
@@ -12,7 +12,8 @@
     entry_points={
         'console_scripts': [
             'create-samplesheet=samplesheetutils.binaries.create_samplesheet:create_samplesheet',
-            'sample-name=samplesheetutils.binaries.sample_name:sample_name'
+            'sample-name=samplesheetutils.binaries.sample_name:sample_name',
+            'truncate-msa=samplesheetutils.binaries.truncate_msa:truncate_msa'
         ]
     }
 )

From 70445caaba270868c19d7acf9eb45792d6c59ebd Mon Sep 17 00:00:00 2001
From: nbtm-sh <nathan@nbti.net>
Date: Wed, 2 Jul 2025 13:17:17 +1000
Subject: [PATCH 5/5] feat(truncate-msa): Add supporting libraries

---
 samplesheetutils/utils/a3m.py       | 52 +++++++++++++++++++++++++++++
 samplesheetutils/utils/alignment.py |  5 +++
 2 files changed, 57 insertions(+)
 create mode 100644 samplesheetutils/utils/a3m.py
 create mode 100644 samplesheetutils/utils/alignment.py

diff --git a/samplesheetutils/utils/a3m.py b/samplesheetutils/utils/a3m.py
new file mode 100644
index 0000000..227a48b
--- /dev/null
+++ b/samplesheetutils/utils/a3m.py
@@ -0,0 +1,52 @@
+from samplesheetutils.utils.sample import Sample
+from samplesheetutils.utils.alignment import Alignment
+from typing import Union
+import re 
+
+def read_a3m(fp, read_data=True):
+    """
+    Read in an a3m file and return an array containing the alignment hits and their names.
+    read_data: Controls if you wish to read the a3m residue hits. Set to false if you only want to read the names of the hits
+    """
+
+    align_samples = []
+
+    lines = fp.readlines()
+
+    temp_aln_object = None
+
+    for fasta_line in lines:
+        if re.search("^\\>.*$", fasta_line):
+            # This is to add support for fixed-width fasta files
+            if temp_aln_object is not None:
+                align_samples.append(temp_aln_object)
+            temp_aln_object = Sample(fasta_line[1:].strip(), fp.name, "")
+        elif temp_aln_object is not None:
+            temp_aln_object.data += fasta_line.strip()
+
+    if temp_aln_object is not None:
+        align_samples.append(temp_aln_object)
+
+    fp.close()
+
+    return align_samples 
+
+
+def make_a3m(fp, sample: Union[Alignment, list], header='>', fixed_width=False, fixed_width_column_count=80):
+    """
+    Write an A3M file given a list of Alignment objects
+    sample: List of Alignment objects
+    header: The header character for each sample. It is recommended not to change this
+    fixed_width: Controls if data is written with line breaks every n characters, or not
+    fixed_width_column_count: Controls how often the data is broken up.
+    """
+    if type(sample) is not list:
+        sample = [sample]
+
+    for si in sample:
+        sample_data = si.data
+        if fixed_width:
+            sample_data = '\n'.join([sample_data[i:i+fixed_width_column_count] for i in range(0, len(sample_data), fixed_width_column_count)]) 
+        fp.write(f"{header}{si.name}\n{sample_data}")
+        fp.flush()
+
diff --git a/samplesheetutils/utils/alignment.py b/samplesheetutils/utils/alignment.py
new file mode 100644
index 0000000..3112132
--- /dev/null
+++ b/samplesheetutils/utils/alignment.py
@@ -0,0 +1,5 @@
+class Alignment:
+    def __init__(self, name, path, data, msa = None):
+        self.name = name
+        self.path = path
+        self.data = data