Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
9aeaf12
feat: implement interop
dhaupt88 Apr 24, 2026
e27b9e7
feat: implement interop
dhaupt88 Apr 24, 2026
029de10
feat: implement interop
dhaupt88 Apr 24, 2026
d245f55
Claude review
dhaupt88 Apr 24, 2026
e05a782
Claude review
dhaupt88 Apr 24, 2026
3c47bf5
Claude review
dhaupt88 Apr 24, 2026
3eac2ab
Update shortcuts use fully names
dhaupt88 Apr 24, 2026
1f8ecfa
Multiline fix
dhaupt88 Apr 24, 2026
0c1e59c
Fix named arguments
dhaupt88 Apr 24, 2026
64d4ce6
Fix stan errors
dhaupt88 Apr 24, 2026
3c00457
Fix stan errors
dhaupt88 Apr 24, 2026
b9f2531
Merge remote-tracking branch 'origin/master' into interop
simbig Apr 27, 2026
032d8ba
fix: address review comments (no-brainer fixes)
simbig Apr 27, 2026
c6aef61
fix(phpstan): use @phpstan-ignore-line for cross-version compatibility
simbig Apr 27, 2026
2606058
fix: replace assert() with exceptions for runtime validation
simbig Apr 27, 2026
cdb249d
fix(phpstan): restore assert for preg_match offset access
simbig Apr 27, 2026
5e7981d
fix: validate required row keys in LaneResult::fromInterOpRow()
simbig Apr 27, 2026
8237496
refactor: extract MetaInfo payload type to class-level PHPDoc
simbig Apr 27, 2026
20e8e39
Apply php-cs-fixer changes
simbig Apr 27, 2026
0e640b7
fix(phpstan): use native preg_match for capture type narrowing
simbig Apr 27, 2026
b7a1f2a
Prevent Single End Sequencing Results
dhaupt88 Apr 29, 2026
2f59c68
Apply php-cs-fixer changes
dhaupt88 Apr 29, 2026
09be5e7
Add assertions
dhaupt88 Apr 29, 2026
b52b71a
Use non-index result as well
dhaupt88 Apr 30, 2026
bf6416a
Use non-index result as well
dhaupt88 Apr 30, 2026
2b977b7
Fix stan
dhaupt88 Apr 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions phpstan/php-below-8.1.neon
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ parameters:
# Existing code with @phpstan-ignore that older versions don't understand
- message: '#Cannot access property \$name on SimpleXMLElement\|null\.#'
path: ../src/LightcyclerExportSheet/LightcyclerXmlParser.php
- message: '#array\{UnknownKey: .* given\.#'
path: ../tests/InterOp/RunParametersTest.php
22 changes: 22 additions & 0 deletions src/InterOp/ClusterStatistic.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

class ClusterStatistic
{
public DeviationValue $density;

public DeviationValue $clusterPassingFilter;

public float $clusterCountMillions;

public float $clusterCountPassingFilterMillions;

public function __construct(DeviationValue $density, DeviationValue $clusterPassingFilter, float $clusterCountMillions, float $clusterCountPassingFilterMillions)
{
$this->density = $density;
$this->clusterPassingFilter = $clusterPassingFilter;
$this->clusterCountMillions = $clusterCountMillions;
$this->clusterCountPassingFilterMillions = $clusterCountPassingFilterMillions;
}
}
43 changes: 43 additions & 0 deletions src/InterOp/DeviationValue.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

use MLL\Utils\SafeCast;

class DeviationValue
{
public float $value;

public float $deviation;

public function __construct(float $value, float $deviation)
{
$this->value = $value;
$this->deviation = $deviation;
}

/**
* Parses strings like "851 +/- 32" into value and deviation.
*
* Returns null for "nan +/- nan" (occurs for index reads).
*/
public static function parse(string $raw): ?self
{
if (preg_match('/^([\d.]+)\s*\+\/-\s*([\d.]+)$/', $raw, $matches) !== 1) { // @phpstan-ignore-line theCodingMachineSafe.function (native preg_match needed for PHPStan 2.x capture type narrowing)
return null;
}

return new self(
SafeCast::toFloat($matches[1]),
SafeCast::toFloat($matches[2])
);
}

public static function average(self $a, self $b): self
{
return new self(
($a->value + $b->value) / 2,
($a->deviation + $b->deviation) / 2
);
}
}
5 changes: 5 additions & 0 deletions src/InterOp/InterOpException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

class InterOpException extends \Exception {}
95 changes: 95 additions & 0 deletions src/InterOp/InterOpResult.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

class InterOpResult
{
public LaneResult $resultsForRead1;

public LaneResult $resultsForRead2;
Comment on lines +7 to +9

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

resultsForRead1/resultsForRead2resultsForFirstDataRead/resultsForLastDataRead. For i100 these are Read 3/4, not Read 1/2.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stimmt — bei i100 sind es Read 3/4. Vorschlag:

public LaneResult $resultsForFirstDataRead;
public LaneResult $resultsForLastDataRead;
public RunResult $resultsForRun;

Sollen wir umbenennen?


public RunResult $resultsForRun;

/**
* @param array<int, array<string, string>> $summary interop summary rows
* @param array<string, array<int, array<string, string>>> $reads interop reads keyed by read name
*/
public function __construct(array $summary, array $reads)
{
[$dataRead1Tag, $dataRead2Tag] = self::findDataReadTags($summary);

$read1Rows = $reads[$dataRead1Tag] ?? null;
if ($read1Rows === null || $read1Rows === []) {
throw new InterOpException("Reads data missing or empty for: {$dataRead1Tag}.");
}

$read2Rows = $reads[$dataRead2Tag] ?? null;
if ($read2Rows === null || $read2Rows === []) {
throw new InterOpException("Reads data missing or empty for: {$dataRead2Tag}.");
}

$nonIndexedRow = self::findNonIndexedRow($summary);

// First row per read key is the Surface "-" aggregate across all tiles
$this->resultsForRead1 = LaneResult::fromInterOpRow($read1Rows[0]);
$this->resultsForRead2 = LaneResult::fromInterOpRow($read2Rows[0]);
$this->resultsForRun = RunResult::fromLaneResults($this->resultsForRead1, $this->resultsForRead2, $nonIndexedRow);
}

/**
* Finds the first and last data (non-index) reads from summary entries.
*
* Index reads have "(I)" suffix in their Level field (e.g. "Read 2 (I)").
* Data reads lack this suffix. The first and last non-index entries are the
* two data reads, regardless of device type:
* - MiSeq single-index: Read 1, Read 3
* - MiSeq dual-index: Read 1, Read 4
* - i100: Read 3, Read 4
*
* @param array<int, array<string, string>> $summary
*
* @return array{0: string, 1: string}
*/
public static function findDataReadTags(array $summary): array
{
// Summary count depends on indexing type (Sinlge or Dual) and sequencing type (Single-End or Paired-End). Possible reads are: Read 1, Read 2, Read 3, Non-indexed or Total
$dataReadTags = [];
foreach ($summary as $entry) {
$level = $entry['Level'];
if ($level === 'Non-indexed' || $level === 'Total') {
continue;
}

// Identify index reads
if (substr($level, -3) !== '(I)') { // @phpstan-ignore-line theCodingMachineSafe.function (safe from PHP 8.0)
$dataReadTags[] = $level;
}
}

$count = count($dataReadTags);
if ($count === 0 || $count > 2) {
throw new InterOpException("Unlogic behaviour. Expect 2 data reads, found {$count}.");
}
if ($count === 1) {
throw new InterOpException('Single-End Sequencing results are not implemented.');
}

return [$dataReadTags[0], $dataReadTags[1]];
}

/**
* @param array<int, array<string, string>> $summary
*
* @return array<string, string>
*/
public static function findNonIndexedRow(array $summary): array
{
foreach ($summary as $entry) {
if ($entry['Level'] === 'Non-indexed') {
return $entry;
}
}

throw new InterOpException('No "Non-indexed" summary row found.');
}
}
119 changes: 119 additions & 0 deletions src/InterOp/LaneResult.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

use MLL\Utils\SafeCast;

class LaneResult
{
public ClusterStatistic $clusterStatistic;

public SequencingQualityControl $sequencingQualityControl;

public int $intensityCycle;

/** Yield in kilobases (JSON float in gigabases * 1_000_000). */
public int $yield;

public function __construct(ClusterStatistic $clusterStatistic, SequencingQualityControl $sequencingQualityControl, int $intensityCycle, int $yield)
{
$this->clusterStatistic = $clusterStatistic;
$this->sequencingQualityControl = $sequencingQualityControl;
$this->intensityCycle = $intensityCycle;
$this->yield = $yield;
}

/**
* Builds a LaneResult from the first row (Surface "-") of a reads entry.
*
* @param array<string, string> $row single lane row from interop reads data
*/
public static function fromInterOpRow(array $row): self
{
$requiredKeys = ['Density', 'Cluster PF', 'Aligned', 'Error', 'Intensity C1', 'Legacy Phasing/Prephasing Rate', 'Reads', 'Reads PF', '%>=Q30', 'Yield'];
$missingKeys = array_diff($requiredKeys, array_keys($row));
if ($missingKeys !== []) {
throw new InterOpException('Missing InterOp row keys: ' . implode(', ', $missingKeys) . '.');
}

$density = DeviationValue::parse($row['Density']);
if (! $density instanceof DeviationValue) {
throw new InterOpException("Expected parseable Density, got: {$row['Density']}.");
}

$clusterPassingFilter = DeviationValue::parse($row['Cluster PF']);
if (! $clusterPassingFilter instanceof DeviationValue) {
throw new InterOpException("Expected parseable Cluster PF, got: {$row['Cluster PF']}.");
}

$aligned = DeviationValue::parse($row['Aligned']);
if (! $aligned instanceof DeviationValue) {
throw new InterOpException("Expected parseable Aligned, got: {$row['Aligned']}.");
}

$error = DeviationValue::parse($row['Error']);
if (! $error instanceof DeviationValue) {
throw new InterOpException("Expected parseable Error, got: {$row['Error']}.");
}

$intensityCycle = DeviationValue::parse($row['Intensity C1']);
if (! $intensityCycle instanceof DeviationValue) {
throw new InterOpException("Expected parseable Intensity C1, got: {$row['Intensity C1']}.");
}

$phasingParts = explode(' / ', $row['Legacy Phasing/Prephasing Rate']);
if (count($phasingParts) !== 2) {
throw new InterOpException("Expected 'phasing / prephasing' format, got: {$row['Legacy Phasing/Prephasing Rate']}.");
}
if ($phasingParts[0] === 'nan' || $phasingParts[1] === 'nan') {
throw new InterOpException('Unexpected nan phasing rate for data read.');
}

$clusterStatistic = new ClusterStatistic(
$density,
$clusterPassingFilter,
SafeCast::toFloat($row['Reads']),
SafeCast::toFloat($row['Reads PF'])
Comment thread
simbig marked this conversation as resolved.
);

$sequencingQualityControl = new SequencingQualityControl(
SafeCast::toFloat($row['%>=Q30']),
SafeCast::toFloat($phasingParts[0]),
SafeCast::toFloat($phasingParts[1]),
$aligned,
$error
);

return new self(
$clusterStatistic,
$sequencingQualityControl,
SafeCast::toInt($intensityCycle->value),
SafeCast::toInt(SafeCast::toFloat($row['Yield']) * 1000000)
);
}

public static function aggregate(self $a, self $b): self
{
$clusterStatistic = new ClusterStatistic(
DeviationValue::average($a->clusterStatistic->density, $b->clusterStatistic->density),
DeviationValue::average($a->clusterStatistic->clusterPassingFilter, $b->clusterStatistic->clusterPassingFilter),
$a->clusterStatistic->clusterCountMillions + $b->clusterStatistic->clusterCountMillions,
$a->clusterStatistic->clusterCountPassingFilterMillions + $b->clusterStatistic->clusterCountPassingFilterMillions
);

$sequencingQualityControl = new SequencingQualityControl(
($a->sequencingQualityControl->q30 + $b->sequencingQualityControl->q30) / 2,
($a->sequencingQualityControl->phasing + $b->sequencingQualityControl->phasing) / 2,
($a->sequencingQualityControl->prephasing + $b->sequencingQualityControl->prephasing) / 2,
DeviationValue::average($a->sequencingQualityControl->aligned, $b->sequencingQualityControl->aligned),
DeviationValue::average($a->sequencingQualityControl->error, $b->sequencingQualityControl->error)
);

return new self(
$clusterStatistic,
$sequencingQualityControl,
intdiv($a->intensityCycle + $b->intensityCycle, 2),
$a->yield + $b->yield
);
}
}
37 changes: 37 additions & 0 deletions src/InterOp/MetaInfo.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?php declare(strict_types=1);

namespace MLL\Utils\InterOp;

use function Safe\json_decode;

/**
* @phpstan-import-type MiSeqParams from RunParameters
* @phpstan-import-type I100Params from RunParameters
*
* @phpstan-type MetaInfoPayload array{
* runParameters: array{RunParameters: MiSeqParams|I100Params},
* interop: array{
* summary: array<int, array<string, string>>,
* reads: array<string, array<int, array<string, string>>>,
* },
* uncPath: string,
* }
*/
class MetaInfo
{
public RunParameters $runParameters;

public InterOpResult $interOpResult;

public string $uncPath;

public function __construct(string $json)
{
/** @var MetaInfoPayload $data */
$data = json_decode($json, true);

$this->runParameters = new RunParameters($data['runParameters']['RunParameters']);
$this->interOpResult = new InterOpResult($data['interop']['summary'], $data['interop']['reads']);
$this->uncPath = $data['uncPath'];
}
}
Loading
Loading