Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion src/Parser/Block/TableParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public function isTableRow(string $line): bool
return false;
}

// Trailing whitespace after the closing pipe is insignificant (parity
// with carve-js / carve-rs); strip it before the structural checks.
$line = rtrim($line, " \t");

// Strip row attributes if present (|...|{.class})
$lineWithoutRowAttrs = $this->stripRowAttributes($line);

Expand Down Expand Up @@ -88,7 +92,29 @@ public function extractRowAttributes(string $line): array
*/
public function isSeparatorRow(string $line): bool
{
return preg_match('/^\|[\s:|-]+\|$/', $line) === 1 && str_contains($line, '-');
// Trailing whitespace after the closing pipe is insignificant.
$line = rtrim($line, " \t");

$len = strlen($line);
if ($len < 2 || $line[0] !== '|' || $line[$len - 1] !== '|') {
return false;
}

// Every cell must be a delimiter cell: optional whitespace, an optional
// leading ':', one or more '-', an optional trailing ':', optional
// whitespace. An EMPTY cell (`|---||`) or any other content disqualifies
// the row -- it is then an ordinary data row (matches carve-js/carve-rs).
$cells = $this->parseTableCells($line);
if ($cells === []) {
return false;
}
foreach ($cells as $cell) {
if (preg_match('/^\s*:?-+:?\s*$/', $cell) !== 1) {
return false;
}
}

return true;
}

/**
Expand Down Expand Up @@ -152,6 +178,9 @@ public function parseTableCells(string $line): array
// Strip row attributes first
$line = $this->stripRowAttributes($line);

// Trailing whitespace after the closing pipe is insignificant.
$line = rtrim($line, " \t");

// Remove leading and trailing |
$line = substr($line, 1, -1);

Expand Down Expand Up @@ -357,6 +386,9 @@ public function parseTableCellsRaw(string $line): array
// Strip row attributes first
$line = $this->stripRowAttributes($line);

// Trailing whitespace after the closing pipe is insignificant.
$line = rtrim($line, " \t");

// Must start with | to be a potential table row
if (!str_starts_with($line, '|')) {
return [];
Expand Down
4 changes: 3 additions & 1 deletion src/Parser/BlockParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -2750,7 +2750,9 @@ protected function tryParseTable(Node $parent, array $lines, int $start): ?int
// Strip row attributes for validation (|...|{.class} → |...|)
$lineWithoutRowAttrs = $this->tableParser->stripRowAttributes($currentLine);

if (!preg_match('/^\|.*\|$/', $lineWithoutRowAttrs)) {
// Trailing whitespace after the closing pipe is insignificant
// (parity with carve-js / carve-rs).
if (!preg_match('/^\|.*\|[ \t]*$/', $lineWithoutRowAttrs)) {
break;
}

Expand Down
56 changes: 56 additions & 0 deletions tests/TestCase/TableDelimiterRowTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
<?php

declare(strict_types=1);

namespace Djot\Test\TestCase;

use Djot\DjotConverter;
use PHPUnit\Framework\TestCase;

/**
* Tests for table delimiter (separator) row edge cases.
*
* - Trailing whitespace after a row's closing pipe is insignificant.
* - A delimiter row with an empty cell (|---||) is not a delimiter row.
*
* Ported from carve-php (parity with carve-js / carve-rs).
*/
class TableDelimiterRowTest extends TestCase
{
protected DjotConverter $converter;

protected function setUp(): void
{
$this->converter = new DjotConverter();
}

public function testSeparatorRowWithTrailingWhitespaceStillPromotesHeader(): void
{
// Trailing whitespace after the closing pipe is insignificant; the
// separator must still promote the first row to a header.
$result = $this->converter->convert("| H | G |\n|---| \n| a | b |");

$this->assertStringContainsString('<table>', $result);
$this->assertStringContainsString('<th>H</th>', $result);
$this->assertStringContainsString('<th>G</th>', $result);
$this->assertStringNotContainsString('<p>', $result);
}

public function testDataRowWithTrailingWhitespaceStillParsesAsTable(): void
{
$result = $this->converter->convert('| a | ');

$this->assertStringContainsString('<td>a</td>', $result);
$this->assertStringNotContainsString('<p>', $result);
}

public function testSeparatorRowWithEmptyCellIsNotASeparator(): void
{
// `|---||` has an empty second cell, so it is NOT a delimiter row: the
// first row must not be promoted to a header.
$result = $this->converter->convert("| H | G |\n|---||\n| a | b |");

$this->assertStringNotContainsString('<th>', $result);
$this->assertStringContainsString('<td>H</td>', $result);
}
}
Loading