From 3f00767ee775f6bfe2a515eea0599942a7c9af04 Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Thu, 28 May 2026 19:10:54 +0200 Subject: [PATCH 1/7] Feat: Variant Identifier is a useful object. It contains already existing objects like Genomic position and descibes a unique identifer of a variant --- src/VariantIdentifier.php | 77 ++++++++++++++++++ src/VariantIdentifierFormat.php | 23 ++++++ tests/VariantIdentifierTest.php | 137 ++++++++++++++++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 src/VariantIdentifier.php create mode 100644 src/VariantIdentifierFormat.php create mode 100644 tests/VariantIdentifierTest.php diff --git a/src/VariantIdentifier.php b/src/VariantIdentifier.php new file mode 100644 index 0000000..3f3a01b --- /dev/null +++ b/src/VariantIdentifier.php @@ -0,0 +1,77 @@ +genomicPosition = $genomicPosition; + $this->reference = $reference; + $this->alternate = $alternate; + } + + public static function parse(string $value): self + { + if (strpos($value, '/') !== false) { + return self::parseCanonical($value); + } + + return self::parseVCF($value); + } + + public function toString(VariantIdentifierFormat $format, NamingConvention $namingConvention): string + { + $chromosome = $this->genomicPosition->chromosome->toString($namingConvention); + $position = $this->genomicPosition->position; + $ref = $this->reference->toString(); + $alt = $this->alternate->toString(); + + switch ($format->value) { + case VariantIdentifierFormat::VCF: + return "{$chromosome}-{$position}-{$ref}-{$alt}"; + case VariantIdentifierFormat::CANONICAL: + return "{$chromosome}-{$position}-{$ref}/{$alt}"; + default: + throw new \InvalidArgumentException("No toString logic implemented for format: {$format->value}."); + } + } + + private static function parseCanonical(string $value): self + { + if (preg_match('/^(.+)-(\d+)-([ATGC]+)\/([ATGC]+)$/i', $value, $matches) === 0) { + throw new \InvalidArgumentException("Invalid canonical variant identifier format: {$value}."); + } + + assert(isset($matches[1], $matches[2], $matches[3], $matches[4])); + + return new self( + new GenomicPosition(new Chromosome($matches[1]), NucleotidePosition::fromOneBased(SafeCast::toInt($matches[2]))), + new DnaSequence($matches[3]), + new DnaSequence($matches[4]) + ); + } + + private static function parseVCF(string $value): self + { + if (preg_match('/^(.+)-(\d+)-([ATGC]+)-([ATGC]+)$/i', $value, $matches) === 0) { + throw new \InvalidArgumentException("Invalid VCF variant identifier format: {$value}."); + } + + assert(isset($matches[1], $matches[2], $matches[3], $matches[4])); + + return new self( + new GenomicPosition(new Chromosome($matches[1]), NucleotidePosition::fromOneBased(SafeCast::toInt($matches[2]))), + new DnaSequence($matches[3]), + new DnaSequence($matches[4]) + ); + } +} diff --git a/src/VariantIdentifierFormat.php b/src/VariantIdentifierFormat.php new file mode 100644 index 0000000..5a3713e --- /dev/null +++ b/src/VariantIdentifierFormat.php @@ -0,0 +1,23 @@ +value = $value; + break; + default: + throw new \InvalidArgumentException("Invalid variant identifier format: {$value}."); + } + } +} diff --git a/tests/VariantIdentifierTest.php b/tests/VariantIdentifierTest.php new file mode 100644 index 0000000..ba4dd5a --- /dev/null +++ b/tests/VariantIdentifierTest.php @@ -0,0 +1,137 @@ +toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::UCSC)) + ); + } + + public function testConstructAndToStringCanonical(): void + { + $variant = new VariantIdentifier( + new GenomicPosition(new Chromosome('chr1'), NucleotidePosition::fromOneBased(12345)), + new DnaSequence('A'), + new DnaSequence('ATT') + ); + + self::assertSame( + 'chr1-12345-A/ATT', + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::CANONICAL), new NamingConvention(NamingConvention::UCSC)) + ); + } + + public function testToStringEnsembl(): void + { + $variant = new VariantIdentifier( + new GenomicPosition(new Chromosome('chrX'), NucleotidePosition::fromOneBased(99999)), + new DnaSequence('GC'), + new DnaSequence('A') + ); + + self::assertSame( + 'X-99999-GC-A', + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::ENSEMBL)) + ); + self::assertSame( + 'X-99999-GC/A', + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::CANONICAL), new NamingConvention(NamingConvention::ENSEMBL)) + ); + } + + #[DataProvider('parseVCFProvider')] + public function testParseVCF(string $input, string $expectedChromosome, int $expectedPosition, string $expectedRef, string $expectedAlt): void + { + $variant = VariantIdentifier::parse($input); + + self::assertSame($expectedChromosome, $variant->genomicPosition->chromosome->value()); + self::assertSame($expectedPosition, $variant->genomicPosition->position); + self::assertSame($expectedRef, $variant->reference->toString()); + self::assertSame($expectedAlt, $variant->alternate->toString()); + } + + /** @return iterable */ + public static function parseVCFProvider(): iterable + { + yield 'SNV with chr prefix' => ['chr1-12345-A-T', '1', 12345, 'A', 'T']; + yield 'SNV without chr prefix' => ['1-12345-A-T', '1', 12345, 'A', 'T']; + yield 'deletion' => ['chr7-5000-ATG-A', '7', 5000, 'ATG', 'A']; + yield 'insertion' => ['chrX-99999-A-ATCG', 'X', 99999, 'A', 'ATCG']; + yield 'chromosome 22' => ['chr22-100-G-C', '22', 100, 'G', 'C']; + yield 'mitochondrial' => ['chrM-8000-T-C', 'M', 8000, 'T', 'C']; + } + + #[DataProvider('parseCanonicalProvider')] + public function testParseCanonical(string $input, string $expectedChromosome, int $expectedPosition, string $expectedRef, string $expectedAlt): void + { + $variant = VariantIdentifier::parse($input); + + self::assertSame($expectedChromosome, $variant->genomicPosition->chromosome->value()); + self::assertSame($expectedPosition, $variant->genomicPosition->position); + self::assertSame($expectedRef, $variant->reference->toString()); + self::assertSame($expectedAlt, $variant->alternate->toString()); + } + + /** @return iterable */ + public static function parseCanonicalProvider(): iterable + { + yield 'SNV with chr prefix' => ['chr1-12345-A/T', '1', 12345, 'A', 'T']; + yield 'multi-base alternate' => ['chr1-12345-A/ATT', '1', 12345, 'A', 'ATT']; + yield 'without chr prefix' => ['12-500-G/CA', '12', 500, 'G', 'CA']; + } + + public function testParseInvalidVCFThrows(): void + { + $this->expectException(\InvalidArgumentException::class); + VariantIdentifier::parse('invalid'); + } + + public function testParseInvalidCanonicalThrows(): void + { + $this->expectException(\InvalidArgumentException::class); + VariantIdentifier::parse('invalid/stuff'); + } + + public function testRoundtripVCF(): void + { + $input = 'chr7-140753336-A-T'; + $variant = VariantIdentifier::parse($input); + + self::assertSame( + $input, + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::UCSC)) + ); + } + + public function testRoundtripCanonical(): void + { + $input = 'chr7-140753336-A/T'; + $variant = VariantIdentifier::parse($input); + + self::assertSame( + $input, + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::CANONICAL), new NamingConvention(NamingConvention::UCSC)) + ); + } +} From 9545b90e774df44778590a45b4aaaaa4de02b70e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 17:23:11 +0000 Subject: [PATCH 2/7] Allow Feat type in PR title validation --- .github/workflows/pr-title.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml index 2f2bbf8..3b6c153 100644 --- a/.github/workflows/pr-title.yml +++ b/.github/workflows/pr-title.yml @@ -15,5 +15,19 @@ jobs: runs-on: ubuntu-latest steps: - uses: amannn/action-semantic-pull-request@v5 + with: + types: | + feat + Feat + fix + docs + style + refactor + perf + test + build + ci + chore + revert env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From cfe786a91634d892774b6b9ce969d0dbd5514a1a Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Thu, 28 May 2026 19:24:04 +0200 Subject: [PATCH 3/7] fix: add @dataProvider annotations for PHPUnit 9 compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with Claude Code --- tests/VariantIdentifierTest.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/VariantIdentifierTest.php b/tests/VariantIdentifierTest.php index ba4dd5a..2d05f9f 100644 --- a/tests/VariantIdentifierTest.php +++ b/tests/VariantIdentifierTest.php @@ -60,6 +60,7 @@ public function testToStringEnsembl(): void ); } + /** @dataProvider parseVCFProvider */ #[DataProvider('parseVCFProvider')] public function testParseVCF(string $input, string $expectedChromosome, int $expectedPosition, string $expectedRef, string $expectedAlt): void { @@ -82,6 +83,7 @@ public static function parseVCFProvider(): iterable yield 'mitochondrial' => ['chrM-8000-T-C', 'M', 8000, 'T', 'C']; } + /** @dataProvider parseCanonicalProvider */ #[DataProvider('parseCanonicalProvider')] public function testParseCanonical(string $input, string $expectedChromosome, int $expectedPosition, string $expectedRef, string $expectedAlt): void { From 9e79fc7913e4cfd95e4b88aeab6fc41c91a62055 Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Fri, 29 May 2026 11:29:15 +0200 Subject: [PATCH 4/7] Add tab format --- src/VariantIdentifier.php | 2 ++ src/VariantIdentifierFormat.php | 1 + 2 files changed, 3 insertions(+) diff --git a/src/VariantIdentifier.php b/src/VariantIdentifier.php index 3f3a01b..340d7e7 100644 --- a/src/VariantIdentifier.php +++ b/src/VariantIdentifier.php @@ -40,6 +40,8 @@ public function toString(VariantIdentifierFormat $format, NamingConvention $nami return "{$chromosome}-{$position}-{$ref}-{$alt}"; case VariantIdentifierFormat::CANONICAL: return "{$chromosome}-{$position}-{$ref}/{$alt}"; + case VariantIdentifierFormat::TAB: + return "{$chromosome}\t{$position}\t{$ref}\t{$alt}"; default: throw new \InvalidArgumentException("No toString logic implemented for format: {$format->value}."); } diff --git a/src/VariantIdentifierFormat.php b/src/VariantIdentifierFormat.php index 5a3713e..3e4a2c6 100644 --- a/src/VariantIdentifierFormat.php +++ b/src/VariantIdentifierFormat.php @@ -6,6 +6,7 @@ class VariantIdentifierFormat { public const VCF = 'VCF'; public const CANONICAL = 'CANONICAL'; + public const TAB = 'TAB'; public string $value; From 59be1b8fad7aed03e33e762a71d9ae7f2d92281c Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Fri, 29 May 2026 15:34:14 +0200 Subject: [PATCH 5/7] dash instead of VCF --- src/VariantIdentifier.php | 6 +++--- src/VariantIdentifierFormat.php | 5 +++-- tests/VariantIdentifierTest.php | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/VariantIdentifier.php b/src/VariantIdentifier.php index 340d7e7..5136b49 100644 --- a/src/VariantIdentifier.php +++ b/src/VariantIdentifier.php @@ -25,7 +25,7 @@ public static function parse(string $value): self return self::parseCanonical($value); } - return self::parseVCF($value); + return self::parseDash($value); } public function toString(VariantIdentifierFormat $format, NamingConvention $namingConvention): string @@ -36,7 +36,7 @@ public function toString(VariantIdentifierFormat $format, NamingConvention $nami $alt = $this->alternate->toString(); switch ($format->value) { - case VariantIdentifierFormat::VCF: + case VariantIdentifierFormat::DASH: return "{$chromosome}-{$position}-{$ref}-{$alt}"; case VariantIdentifierFormat::CANONICAL: return "{$chromosome}-{$position}-{$ref}/{$alt}"; @@ -62,7 +62,7 @@ private static function parseCanonical(string $value): self ); } - private static function parseVCF(string $value): self + private static function parseDash(string $value): self { if (preg_match('/^(.+)-(\d+)-([ATGC]+)-([ATGC]+)$/i', $value, $matches) === 0) { throw new \InvalidArgumentException("Invalid VCF variant identifier format: {$value}."); diff --git a/src/VariantIdentifierFormat.php b/src/VariantIdentifierFormat.php index 3e4a2c6..dffaab8 100644 --- a/src/VariantIdentifierFormat.php +++ b/src/VariantIdentifierFormat.php @@ -4,7 +4,7 @@ class VariantIdentifierFormat { - public const VCF = 'VCF'; + public const DASH = 'DASH'; public const CANONICAL = 'CANONICAL'; public const TAB = 'TAB'; @@ -13,8 +13,9 @@ class VariantIdentifierFormat public function __construct(string $value) { switch ($value) { - case self::VCF: + case self::DASH: case self::CANONICAL: + case self::TAB: $this->value = $value; break; default: diff --git a/tests/VariantIdentifierTest.php b/tests/VariantIdentifierTest.php index 2d05f9f..c6e2236 100644 --- a/tests/VariantIdentifierTest.php +++ b/tests/VariantIdentifierTest.php @@ -24,7 +24,7 @@ public function testConstructAndToStringVCF(): void self::assertSame( 'chr1-12345-A-T', - $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::UCSC)) + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::DASH), new NamingConvention(NamingConvention::UCSC)) ); } @@ -52,7 +52,7 @@ public function testToStringEnsembl(): void self::assertSame( 'X-99999-GC-A', - $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::ENSEMBL)) + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::DASH), new NamingConvention(NamingConvention::ENSEMBL)) ); self::assertSame( 'X-99999-GC/A', @@ -122,7 +122,7 @@ public function testRoundtripVCF(): void self::assertSame( $input, - $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::VCF), new NamingConvention(NamingConvention::UCSC)) + $variant->toString(new VariantIdentifierFormat(VariantIdentifierFormat::DASH), new NamingConvention(NamingConvention::UCSC)) ); } From b38b2f8fc2bb9f1e3bbd6a532a74359cf10d161f Mon Sep 17 00:00:00 2001 From: Dennis Haupt Date: Fri, 29 May 2026 15:46:14 +0200 Subject: [PATCH 6/7] prevent Feat instead of feat --- .github/workflows/pr-title.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml index 3b6c153..d2e5fe0 100644 --- a/.github/workflows/pr-title.yml +++ b/.github/workflows/pr-title.yml @@ -18,7 +18,6 @@ jobs: with: types: | feat - Feat fix docs style From d45441e16409f153081c0f3cc6c36aade03b17d3 Mon Sep 17 00:00:00 2001 From: Simon Bigelmayr Date: Fri, 29 May 2026 15:50:08 +0200 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Simon Bigelmayr --- .github/workflows/pr-title.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml index d2e5fe0..2f2bbf8 100644 --- a/.github/workflows/pr-title.yml +++ b/.github/workflows/pr-title.yml @@ -15,18 +15,5 @@ jobs: runs-on: ubuntu-latest steps: - uses: amannn/action-semantic-pull-request@v5 - with: - types: | - feat - fix - docs - style - refactor - perf - test - build - ci - chore - revert env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}