Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 12 additions & 89 deletions src/Parser/InlineParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -1369,74 +1369,6 @@ protected function parseSmartQuote(string $text, int $pos, string $quote): strin
return $this->closeSingleQuote;
}

/**
* Find a matching single quote closer for a potential opener at $pos
*
* Returns the position of the closer if found, null otherwise.
* Uses a matching algorithm similar to emphasis - potential openers and closers
* are matched from innermost pairs outward.
*/
protected function findMatchingSingleQuoteCloser(string $text, int $openerPos): ?int
{
$length = strlen($text);

// Collect all potential openers and closers after this position
$openers = [$openerPos];
$closers = [];

for ($i = $openerPos + 1; $i < $length; $i++) {
if ($text[$i] !== "'") {
continue;
}

$prevChar = $text[$i - 1] ?? ' ';
$nextChar = $text[$i + 1] ?? ' ';
$prevIsSpace = ctype_space($prevChar);
// Closer can be followed by space, punctuation, or end of string
$nextIsSpaceOrPunct = ctype_space($nextChar) || $i === $length - 1
|| preg_match('/^[\p{P}\p{S}]/u', $nextChar);

// Skip quotes before digits (always apostrophe)
if (ctype_digit($nextChar)) {
continue;
}

// Skip quotes after ] or )
if ($prevChar === ']' || $prevChar === ')') {
continue;
}

$nextIsSpace = ctype_space($nextChar);
if ($prevIsSpace && !$nextIsSpace) {
// Could be opener (after space, before non-space)
$openers[] = $i;
} elseif (!$prevIsSpace && $nextIsSpaceOrPunct) {
// Could be closer (after non-space, before space/punct)
$closers[] = $i;
} elseif (!$prevIsSpace) {
// Mid-word quote (like Jane's) - typically apostrophe
continue;
}
}

// Now match openers with closers, innermost first
// For each closer, find the nearest preceding unmatched opener
$matched = [];
foreach ($closers as $closer) {
for ($j = count($openers) - 1; $j >= 0; $j--) {
$opener = $openers[$j];
if ($opener < $closer && !isset($matched[$opener])) {
$matched[$opener] = $closer;

break;
}
}
}

// Return the closer for our position, if any
return $matched[$openerPos] ?? null;
}

/**
* Build a cache of all single quote opener→closer matches for the text.
*
Expand All @@ -1453,10 +1385,14 @@ protected function buildSingleQuoteMatchCache(string $text): array
}

$length = strlen($text);
$openers = [];
$closers = [];
$matched = [];
$openerStack = [];

// Single pass: collect all potential openers and closers
// Single forward pass: classify each quote and pair a closer with the
// innermost still-open opener via a stack. The stack top is always the
// largest-index unmatched opener seen so far, so popping it reproduces
// the former "nearest preceding unmatched opener" pairing in O(n)
// instead of the previous O(n²) closer-by-opener scan.
for ($i = 0; $i < $length; $i++) {
if ($text[$i] !== "'") {
continue;
Expand Down Expand Up @@ -1493,28 +1429,15 @@ protected function buildSingleQuoteMatchCache(string $text): array
}

if ($prevIsSpace && !$nextIsSpace) {
// Potential opener
$openers[] = $i;
} elseif (!$prevIsSpace && $nextIsSpaceOrPunct) {
// Potential closer
$closers[] = $i;
// Potential opener - push onto the stack of open quotes
$openerStack[] = $i;
} elseif (!$prevIsSpace && $nextIsSpaceOrPunct && $openerStack) {
// Potential closer - pair with the innermost unmatched opener
$matched[array_pop($openerStack)] = $i;
}
// Mid-word quotes are skipped (apostrophes)
}

// Match openers with closers, innermost first
$matched = [];
foreach ($closers as $closer) {
for ($j = count($openers) - 1; $j >= 0; $j--) {
$opener = $openers[$j];
if ($opener < $closer && !isset($matched[$opener])) {
$matched[$opener] = $closer;

break;
}
}
}

return $matched;
}

Expand Down
42 changes: 42 additions & 0 deletions tests/TestCase/Parser/SingleQuoteMatchingTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php

declare(strict_types=1);

namespace Djot\Test\TestCase\Parser;

use Djot\DjotConverter;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

/**
* Locks the single-quote opener/closer pairing produced by
* InlineParser::buildSingleQuoteMatchCache(), so the O(n) stack-based
* matching keeps the exact behavior of the former O(n^2) scan.
*/
class SingleQuoteMatchingTest extends TestCase
{
/**
* @return array<string, array{0: string, 1: string}>
*/
public static function quoteProvider(): array
{
return [
// A balanced pair becomes an open + close curly quote.
'matched pair' => ["'hello'", "\u{2018}hello\u{2019}"],
// A flanking opener with no later closer stays an apostrophe.
'lone opener is apostrophe' => ['say \'what', "say \u{2019}what"],
// Nested pairs match innermost-first (inner pair closes before outer).
'nested pairs' => ["'a 'b' c'", "\u{2018}a \u{2018}b\u{2019} c\u{2019}"],
// Mid-word apostrophe is untouched; the following pair still matches.
'apostrophe then pair' => ["it's a 'test'", "it\u{2019}s a \u{2018}test\u{2019}"],
];
}

#[DataProvider('quoteProvider')]
public function testSingleQuotePairing(string $input, string $expected): void
{
$html = (new DjotConverter())->convert($input);

$this->assertStringContainsString($expected, $html);
}
}
Loading