diff --git a/docs/guide/syntax.md b/docs/guide/syntax.md index 20a2e149..c4c31483 100644 --- a/docs/guide/syntax.md +++ b/docs/guide/syntax.md @@ -460,7 +460,52 @@ The underscore notation `[_]` is useful on mobile devices or in editors without #### List Item Attributes (Extension) -Attributes can be added to list items on the following indented line: +Attributes can be attached to a list item by placing them in curly braces +**immediately after the marker**, with no space before the brace (per the djot +proposal [jgm/djot#262](https://github.com/jgm/djot/pull/262)): + +**Input:** +```djot ++{.blue} A blue list item. ++{#id1 .highlight} Item with id and class. +1.{data-value="test"} Numbered item with a data attribute. +``` + + + + + + +Works with every marker type (bullet, ordered, parenthesized, roman, alpha, and +task lists). A **space** between the marker and the brace changes the meaning: +`+ {.blue}` makes the `{.blue}` ordinary item content (a block attribute for the +following block inside the item), not an attribute on the `
  • `. + +::: tip Soft-deprecated alternative +Attributes can also be added on the following indented line. This older form +still attaches to the `
  • `, but the marker-adjacent form above is now the +preferred syntax. +::: **Input:** ```djot diff --git a/src/Parser/Block/ListParser.php b/src/Parser/Block/ListParser.php index 7e497b8c..75924365 100644 --- a/src/Parser/Block/ListParser.php +++ b/src/Parser/Block/ListParser.php @@ -46,31 +46,38 @@ class ListParser * * @param string $line The line to parse * - * @return array{type: string, marker: string, content: string, start?: int, checked?: bool, taskMarker?: string, style?: string, marker_indent?: int, ambiguous?: bool, alpha_start?: int, alpha_style?: string}|null + * Attributes in curly braces that immediately follow the marker (no space + * before the brace) attach to the list item itself, per the djot proposal + * jgm/djot#262, e.g. `+{.blue} text` or `(a){.bar} text`. They are returned + * raw (without the braces) in the `attrs` key; a space before the brace + * instead makes it ordinary item content (a block attribute), so it is not + * captured here. + * + * @return array{type: string, marker: string, content: string, start?: int, checked?: bool, taskMarker?: string, style?: string, marker_indent?: int, ambiguous?: bool, alpha_start?: int, alpha_style?: string, attrs?: string}|null */ public function parseListItemMarker(string $line): ?array { // Task list: - [.] where . is any single character // Standard markers: ' ' (unchecked), 'x'/'X' (checked) // Extended markers: '-' (cancelled), '/' (partial), '>' (deferred), etc. - if (preg_match('/^([-*+]) +\[(.)\] +(.*)$/', $line, $matches)) { - $taskMarker = $matches[2]; + if (preg_match('/^(?[-*+]) +\[(?.)\](?:\{(?[^{}]+)\})? +(?.*)$/', $line, $matches)) { + $taskMarker = $matches['task']; - return [ + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_TASK, - 'marker' => $matches[1], - 'content' => $matches[3], + 'marker' => $matches['marker'], + 'content' => $matches['content'], 'checked' => strtolower($taskMarker) === 'x', 'taskMarker' => $taskMarker, - ]; + ], $matches['attrs']); } // Bullet list: -, +, or * // A marker followed by a space (or end of line) is a valid item; a bare // marker alone on its line is an empty item (djot allows marker + newline). - if (preg_match('/^([-*+])(?: +(.*))?$/', $line, $matches)) { + if (preg_match('/^([-*+])(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { $marker = $matches[1]; - $content = $matches[2] ?? ''; + $content = $matches[3] ?? ''; // Don't treat as list if content ends with same marker (likely emphasis) if ($marker === '*' || $marker === '-') { @@ -83,34 +90,34 @@ public function parseListItemMarker(string $line): ?array } } - return [ + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_BULLET, 'marker' => $marker, 'content' => $content, - ]; + ], $matches[2] ?? ''); } // Ordered list: 1. or 1) or (1) - bare marker (no content) is an empty item. - if (preg_match('/^(\d+)([.)])(?: +(.*))?$/', $line, $matches)) { - return [ + if (preg_match('/^(\d+)([.)])(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_ORDERED, 'marker' => $matches[2], - 'content' => $matches[3] ?? '', + 'content' => $matches[4] ?? '', 'start' => (int)$matches[1], - ]; + ], $matches[3] ?? ''); } - if (preg_match('/^\((\d+)\)(?: +(.*))?$/', $line, $matches)) { - return [ + if (preg_match('/^\((\d+)\)(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_ORDERED, 'marker' => '()', - 'content' => $matches[2] ?? '', + 'content' => $matches[3] ?? '', 'start' => (int)$matches[1], - ]; + ], $matches[2] ?? ''); } // Roman numeral ordered list - if (preg_match('/^([ivxlcdmIVXLCDM]+)([.)])(?: +(.*))?$/', $line, $matches)) { + if (preg_match('/^([ivxlcdmIVXLCDM]+)([.)])(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { $roman = $matches[1]; $isLower = ctype_lower($roman[0]); $start = $this->romanToInt(strtoupper($roman)); @@ -118,7 +125,7 @@ public function parseListItemMarker(string $line): ?array $result = [ 'type' => ListBlock::TYPE_ORDERED, 'marker' => $matches[2], - 'content' => $matches[3] ?? '', + 'content' => $matches[4] ?? '', 'start' => $start, 'style' => $isLower ? 'i' : 'I', ]; @@ -129,11 +136,11 @@ public function parseListItemMarker(string $line): ?array $result['alpha_style'] = $isLower ? 'a' : 'A'; } - return $result; + return $this->withMarkerAttrs($result, $matches[3] ?? ''); } } - if (preg_match('/^\(([ivxlcdmIVXLCDM]+)\)(?: +(.*))?$/', $line, $matches)) { + if (preg_match('/^\(([ivxlcdmIVXLCDM]+)\)(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { $roman = $matches[1]; $isLower = ctype_lower($roman[0]); $start = $this->romanToInt(strtoupper($roman)); @@ -141,7 +148,7 @@ public function parseListItemMarker(string $line): ?array $result = [ 'type' => ListBlock::TYPE_ORDERED, 'marker' => '()', - 'content' => $matches[2] ?? '', + 'content' => $matches[3] ?? '', 'start' => $start, 'style' => $isLower ? 'i' : 'I', ]; @@ -152,37 +159,37 @@ public function parseListItemMarker(string $line): ?array $result['alpha_style'] = $isLower ? 'a' : 'A'; } - return $result; + return $this->withMarkerAttrs($result, $matches[2] ?? ''); } } // Alpha ordered list: a. or A. or a) or A) or (a) or (A) - if (preg_match('/^([a-zA-Z])([.)])(?: +(.*))?$/', $line, $matches)) { + if (preg_match('/^([a-zA-Z])([.)])(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { $letter = $matches[1]; $isLower = ctype_lower($letter); $start = ord(strtolower($letter)) - ord('a') + 1; - return [ + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_ORDERED, 'marker' => $matches[2], - 'content' => $matches[3] ?? '', + 'content' => $matches[4] ?? '', 'start' => $start, 'style' => $isLower ? 'a' : 'A', - ]; + ], $matches[3] ?? ''); } - if (preg_match('/^\(([a-zA-Z])\)(?: +(.*))?$/', $line, $matches)) { + if (preg_match('/^\(([a-zA-Z])\)(?:\{([^{}]+)\})?(?: +(.*))?$/', $line, $matches)) { $letter = $matches[1]; $isLower = ctype_lower($letter); $start = ord(strtolower($letter)) - ord('a') + 1; - return [ + return $this->withMarkerAttrs([ 'type' => ListBlock::TYPE_ORDERED, 'marker' => '()', - 'content' => $matches[2] ?? '', + 'content' => $matches[3] ?? '', 'start' => $start, 'style' => $isLower ? 'a' : 'A', - ]; + ], $matches[2] ?? ''); } // Definition list: : @@ -197,6 +204,26 @@ public function parseListItemMarker(string $line): ?array return null; } + /** + * Attach the raw marker-adjacent attribute string to a parsed marker result. + * + * Empty strings (no `{...}` after the marker) are dropped so the `attrs` key + * is only present when the item actually carries marker attributes. + * + * @param array{type: string, marker: string, content: string, start?: int, checked?: bool, taskMarker?: string, style?: string, marker_indent?: int, ambiguous?: bool, alpha_start?: int, alpha_style?: string, attrs?: string} $result The parsed marker result + * @param string $attrs Raw attribute string captured after the marker (no braces) + * + * @return array{type: string, marker: string, content: string, start?: int, checked?: bool, taskMarker?: string, style?: string, marker_indent?: int, ambiguous?: bool, alpha_start?: int, alpha_style?: string, attrs?: string} + */ + protected function withMarkerAttrs(array $result, string $attrs): array + { + if ($attrs !== '') { + $result['attrs'] = $attrs; + } + + return $result; + } + /** * Disambiguate between roman numeral and alphabetical list styles. * diff --git a/src/Parser/BlockParser.php b/src/Parser/BlockParser.php index bbade6db..86c60e64 100644 --- a/src/Parser/BlockParser.php +++ b/src/Parser/BlockParser.php @@ -983,7 +983,7 @@ private function parseBlocksImpl(Node $parent, array $lines, int $indent): void if ( $marker !== '' && !str_contains(self::BLOCK_MARKER_CHARS, $marker) - && !($marker >= 'A' && preg_match('/^[ \t]*[A-Za-z]+[.)]([ \t]|$)/', $line) === 1) + && !($marker >= 'A' && preg_match('/^[ \t]*[A-Za-z]+[.)](?:\{[^{}]+\})?([ \t]|$)/', $line) === 1) ) { $i += $this->tryParseParagraph($parent, $lines, $i); @@ -2059,6 +2059,14 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int // Ordered list marker width = length of trimmed line - length of content // Examples: "1. " = 3, "10. " = 4, "(1) " = 4, "(10) " = 5 $markerWidth = strlen($trimmedLine) - strlen($itemContent); + // Marker-adjacent attributes (`1.{.x} item`) sit between the marker + // and the content but are not part of the content-indent column, so + // exclude them from the marker width. + if (isset($itemInfo['attrs'])) { + /** @var string $markerAttrsRaw */ + $markerAttrsRaw = $itemInfo['attrs']; + $markerWidth -= strlen('{' . $markerAttrsRaw . '}'); + } } else { // Bullet and task lists use 2-char base marker ("- " or "* " or "+ ") $markerWidth = 2; @@ -2133,7 +2141,15 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int // is parsed as a standard djot block attribute for the following // block inside the item. This keeps the list / item intact instead // of terminating it on a mid-item {...} line. + // Marker-adjacent attributes (`+{.blue} item`, jgm/djot#262) attach to + // the
  • . They seed the item attributes; the soft-deprecated + // separate-line form below merges on top of them. $itemAttributes = []; + if (isset($itemInfo['attrs'])) { + /** @var string $markerAttrsRaw */ + $markerAttrsRaw = $itemInfo['attrs']; + $itemAttributes = AttributeParser::parseOrdered($markerAttrsRaw); + } $parseItemLinesAsBlocks = false; if ($i < $count) { $potentialAttrLine = $lines[$i]; @@ -2241,7 +2257,7 @@ protected function tryParseList(Node $parent, array $lines, int $start): ?int } } } else { - $itemAttributes = AttributeParser::parseOrdered($attrMatch[1]); + $itemAttributes = AttributeParser::parseAndMerge($itemAttributes, $attrMatch[1]); $i++; } } @@ -3578,14 +3594,21 @@ protected function startsNewBlockSignificant(string $line, ?array $lines = null, // is an opt-in markdown/chat-like mode, so a line-leading marker // interrupts without a blank line (it would otherwise drop a // genuine single-line or lazily-wrapped list). - if (isset($line[1]) && $line[1] === ' ') { + // Marker-adjacent attributes (`-{.x} item`) sit between the marker + // and the separating space, so skip an optional `{...}` block before + // looking for the content. + $afterMarker = substr($line, 1); + if (preg_match('/^\{[^{}]+\}/', $afterMarker, $attrMatch) === 1) { + $afterMarker = substr($afterMarker, strlen($attrMatch[0])); + } + if (isset($afterMarker[0]) && $afterMarker[0] === ' ') { // An empty list item (marker followed by only whitespace) // cannot interrupt an OPEN PARAGRAPH (CommonMark/djot rule) - // it folds into the paragraph instead of opening a stray empty //
  • . Paragraph interruption is the only caller that passes // $lines for lookahead; in every other context (heading // termination, etc.) an empty marker still starts a new block. - if ($lines === null || trim(substr($line, 2)) !== '') { + if ($lines === null || trim(substr($afterMarker, 1)) !== '') { return true; // Unordered list } } @@ -3617,7 +3640,10 @@ protected function startsNewBlockSignificant(string $line, ?array $lines = null, // a non-space char after the marker; other contexts keep starting // a block on a bare empty marker. if ($first === '1') { - $pattern = $lines === null ? '/^1[.)]\s/' : '/^1[.)]\s+\S/'; + // Allow optional marker-adjacent attributes: `1.{.x} item`. + $pattern = $lines === null + ? '/^1[.)](?:\{[^{}]+\})?\s/' + : '/^1[.)](?:\{[^{}]+\})?\s+\S/'; return preg_match($pattern, $line) === 1; } diff --git a/tests/TestCase/ListItemMarkerAttributesTest.php b/tests/TestCase/ListItemMarkerAttributesTest.php new file mode 100644 index 00000000..9b2123de --- /dev/null +++ b/tests/TestCase/ListItemMarkerAttributesTest.php @@ -0,0 +1,186 @@ +` element: + * + * +{.blue} A blue list item. + * (a){.bar} Ordered list item with an attribute. + * + * A space between the marker and the brace means the `{...}` is item *content* + * (and thus a block attribute for the following block), NOT a list-item + * attribute. That distinction is what these tests pin down. + * + * The older separate-indented-line form (`{...}` on its own line under the item) + * is soft-deprecated but still attaches to the `
  • ` for back-compat; the + * regression cases below guard that. + */ +class ListItemMarkerAttributesTest extends TestCase +{ + protected DjotConverter $converter; + + protected function setUp(): void + { + $this->converter = new DjotConverter(); + } + + // ==================== Bullet markers ==================== + + public function testBulletPlusClassAttribute(): void + { + $result = $this->converter->convert("+{.blue} A blue list item.\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + public function testBulletHyphenIdAttribute(): void + { + $result = $this->converter->convert("-{#foo} item\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + public function testBulletKeyValueAttribute(): void + { + $result = $this->converter->convert("-{data-x=\"y\"} item\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + public function testBulletMultipleAttributes(): void + { + $result = $this->converter->convert("-{#foo .bar .baz} item\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + public function testBareBulletMarkerWithAttribute(): void + { + $result = $this->converter->convert("+{.blue}\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + // ==================== Ordered markers ==================== + + public function testOrderedNumericClassAttribute(): void + { + $result = $this->converter->convert("1.{.cls} item\n"); + + $this->assertSame( + "
      \n
    1. \nitem\n
    2. \n
    \n", + $result, + ); + } + + public function testOrderedParenAlphaClassAttribute(): void + { + $result = $this->converter->convert("(a){.bar} Ordered list item with an attribute.\n"); + + $this->assertSame( + "
      \n
    1. \nOrdered list item with an attribute.\n
    2. \n
    \n", + $result, + ); + } + + public function testAlphaDotClassAttribute(): void + { + $result = $this->converter->convert("a.{.cls} item\n"); + + $this->assertSame( + "
      \n
    1. \nitem\n
    2. \n
    \n", + $result, + ); + } + + public function testTaskListMarkerAttribute(): void + { + $result = $this->converter->convert("- [x]{.done} finish it\n"); + + $this->assertStringContainsString('class="done"', $result); + $this->assertStringContainsString('checked', $result); + } + + // ==================== Mixed lists ==================== + + public function testOnlyMarkedItemGetsAttribute(): void + { + $result = $this->converter->convert("- a\n-{.x} b\n- c\n"); + + $this->assertSame( + "\n", + $result, + ); + } + + // ==================== Adjacency rule (space => NOT an item attribute) ==================== + + public function testSpaceBeforeBraceIsNotListItemAttribute(): void + { + // A space between marker and brace means the brace is item content, so it + // must NOT become a class on the
  • . + $result = $this->converter->convert("- {.blue} text\n"); + + $this->assertStringNotContainsString('
  • ', $result); + } + + // ==================== Paragraph interruption (blocksInterruptParagraphs) ==================== + + public function testAttributedBulletInterruptsParagraph(): void + { + $converter = new DjotConverter(blocksInterruptParagraphs: true); + + $result = $converter->convert("para\n-{.x} item\n"); + + // Must interrupt the paragraph and open a list, exactly like `- item` does. + $this->assertStringContainsString('