Skip to content

Commit 5d3bae9

Browse files
committed
resolved copilot comments and added comments to markdown function
1 parent 58bfbac commit 5d3bae9

1 file changed

Lines changed: 55 additions & 18 deletions

File tree

src/powershell/private/core/ConvertTo-ZtMarkdown.ps1

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,42 +31,79 @@ function ConvertTo-ZtMarkdown {
3131

3232
$md = $Html
3333

34-
# Convert anchor tags to Markdown links
34+
# Convert <a href="url">text</a> → [text](url)
35+
# <a\s+[^>]*href="([^"]*)"[^>]*> — opening <a> tag with any attributes; captures href value in group 1
36+
# ([^<]*) — captures the visible link text (no nested tags) in group 2
37+
# </a> — closing tag
3538
$md = [regex]::Replace($md, '<a\s+[^>]*href="([^"]*)"[^>]*>([^<]*)</a>', '[$2]($1)')
3639

37-
# Convert ordered list items with sequential numbering
38-
$liCounter = [ref]0
39-
$md = [regex]::Replace($md, '<li[^>]*>', {
40-
$liCounter.Value++
41-
"`n$($liCounter.Value). "
40+
# Process <ol> and <ul> blocks: numbered items for <ol>, '-' bullets for <ul>.
41+
# The counter resets to 0 for each list so separate <ol> blocks both start at 1.
42+
# (?si) — s: dot matches newlines (multi-line list content); i: case-insensitive tags
43+
# <(ol|ul)[^>]*> — opening list tag; captures tag name (ol/ul) in group 1
44+
# (.*?) — lazily captures everything inside the list in group 2
45+
# </\1> — closing tag matching the same tag name captured in group 1 (ol or ul)
46+
$md = [regex]::Replace($md, '(?si)<(ol|ul)[^>]*>(.*?)</\1>', {
47+
param($m)
48+
$isOrdered = $m.Groups[1].Value -ieq 'ol'
49+
$inner = $m.Groups[2].Value
50+
$idx = 0
51+
$liOut = [System.Text.StringBuilder]::new()
52+
$last = 0
53+
# (?si)<li[^>]*>(.*?)</li> — matches each list item; group 1 is the item content
54+
foreach ($li in ([regex]'(?si)<li[^>]*>(.*?)</li>').Matches($inner)) {
55+
$null = $liOut.Append($inner.Substring($last, $li.Index - $last))
56+
$content = $li.Groups[1].Value.Trim()
57+
if ($isOrdered) { $idx++; $null = $liOut.Append("`n$idx. $content") }
58+
else { $null = $liOut.Append("`n- $content") }
59+
$last = $li.Index + $li.Length
60+
}
61+
if ($last -lt $inner.Length) { $null = $liOut.Append($inner.Substring($last)) }
62+
# Remove any <li>/</ li> tags not matched above (e.g. malformed HTML without closing </li>)
63+
$processedInner = [regex]::Replace($liOut.ToString(), '</?li[^>]*>', '')
64+
return $processedInner + "`n"
4265
})
4366

44-
$md = $md -replace '</li>', ''
45-
$md = $md -replace '<[ou]l[^>]*>', ''
46-
$md = $md -replace '</[ou]l>', ''
67+
# <br\s*/?> — self-closing or open <br>, with optional whitespace before the slash
4768
$md = $md -replace '<br\s*/?>', "`n"
69+
70+
# </?p[^>]*> — opening or closing <p> with any attributes
71+
# </?div[^>]*> — opening or closing <div> with any attributes
4872
$md = $md -replace '</?p[^>]*>', "`n"
4973
$md = $md -replace '</?div[^>]*>', "`n"
74+
75+
# <(?:b|strong)[^>]*>([^<]*)</(?:b|strong)> — bold tags wrapping plain text; group 1 = content
76+
# (?:...) is a non-capturing group so $1 refers to the text content, not the tag name
5077
$md = $md -replace '<(?:b|strong)[^>]*>([^<]*)</(?:b|strong)>', '**$1**'
78+
79+
# <(?:i|em)[^>]*>([^<]*)</(?:i|em)> — italic tags wrapping plain text; group 1 = content
5180
$md = $md -replace '<(?:i|em)[^>]*>([^<]*)</(?:i|em)>', '*$1*'
5281

53-
# Strip any remaining HTML tags
82+
# <[^>]+> — any remaining HTML tag: one or more non-'>' characters between angle brackets
5483
$md = $md -replace '<[^>]+>', ''
5584

5685
# Decode HTML entities
57-
$md = $md -replace '&amp;', '&'
58-
$md = $md -replace '&lt;', '<'
59-
$md = $md -replace '&gt;', '>'
60-
$md = $md -replace '&quot;', '"'
61-
$md = $md -replace '&#39;', "'"
62-
$md = $md -replace '&nbsp;', ' '
63-
64-
# Convert bare URLs (not already wrapped in a Markdown link) → [url](url)
86+
$md = $md -replace '&amp;', '&'
87+
$md = $md -replace '&lt;', '<'
88+
$md = $md -replace '&gt;', '>'
89+
$md = $md -replace '&quot;', '"'
90+
$md = $md -replace '&#39;', "'"
91+
$md = $md -replace '&nbsp;', ' '
92+
93+
# Convert bare URLs that are not already inside a Markdown link → [url](url)
94+
# (?<!\() — negative lookbehind: not preceded by '(' (already a Markdown link)
95+
# (https?://[^\s<>"\[\]()]+?) — captures the URL lazily (stops before whitespace or special chars)
96+
# ([.,;]?) — optionally captures a trailing punctuation character
97+
# (?=\s|$) — lookahead: must be followed by whitespace or end of string
6598
$md = [regex]::Replace($md, '(?<!\()(https?://[^\s<>"\[\]()]+?)([.,;]?)(?=\s|$)', '[${1}](${1})${2}')
6699

67100
# Break inline numbered steps onto separate lines: "text 2. Word" → "text\n2. Word"
101+
# (?<=\S) — positive lookbehind: preceded by a non-whitespace character (mid-sentence)
102+
# (\d{1,2})\. — captures 1–2 digit number followed by a literal dot
103+
# ([A-Z]) — lookahead-style capture: next word starts with uppercase (new sentence/step)
68104
$md = [regex]::Replace($md, '(?<=\S) (\d{1,2})\. ([A-Z])', "`n" + '$1. $2')
69105

106+
# Trim each line and remove blank lines
70107
$md = ($md -split "`n" | ForEach-Object { $_.Trim() } | Where-Object { $_ -ne '' }) -join "`n"
71108

72109
return $md

0 commit comments

Comments
 (0)