From 7411029850b72a188cf6a42f02f06369304e7418 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:21:42 +0200 Subject: [PATCH 01/14] fix(tooling): add .doc, .html, .rtf extensions to check-duplicates.sh --- bin/check-duplicates.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check-duplicates.sh b/bin/check-duplicates.sh index d6fa6b0..1849909 100755 --- a/bin/check-duplicates.sh +++ b/bin/check-duplicates.sh @@ -10,7 +10,7 @@ echo "" TMPFILE=$(mktemp) trap 'rm -f "$TMPFILE"' EXIT -find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.pdf" -o -name "*.epub" -o -name "*.md" \) -exec sha256sum {} \; | sort >"$TMPFILE" +find documents/ -type f \( -name "*.odt" -o -name "*.docx" -o -name "*.pdf" -o -name "*.epub" -o -name "*.md" -o -name "*.doc" -o -name "*.html" -o -name "*.rtf" \) -exec sha256sum {} \; | sort >"$TMPFILE" DUPLICATES=$(awk '{print $1}' "$TMPFILE" | uniq -d) From 373ec1a8c1deae107eaef895c6fff4f6afc40b02 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:22:16 +0200 Subject: [PATCH 02/14] docs: add new format examples to README and CONTRIBUTING --- CONTRIBUTING.md | 5 +++++ README.md | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1b28d27..aa96cdb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -84,6 +84,8 @@ documents/pdf/books/chapter-sample.pdf documents/odt/libreoffice/newsletter.odt documents/html/websites/landing-page.html documents/epub/books/sample-ebook.epub +documents/doc/apache-poi/test-document.doc +documents/rtf/pandoc/basic-formatting.rtf ``` ### Suggested groups @@ -96,6 +98,9 @@ documents/epub/books/sample-ebook.epub - `pandoc` - Pandoc test cases - `libreoffice` - LibreOffice examples - `microsoft` - Microsoft Office examples +- `standard-ebooks` - Standard Ebooks (CC0-licensed EPUB collection) +- `w3c` - W3C HTML test suite files +- `apache-poi` - Apache POI test documents (.doc format) Feel free to create new groups as needed! diff --git a/README.md b/README.md index 791112d..db1e9b7 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,10 @@ Examples: documents/markdown/examples/sample.md documents/pdf/academic/paper.pdf documents/odt/libreoffice/newsletter.odt + documents/epub/standard-ebooks/pride-and-prejudice.epub + documents/html/w3c/html5-spec-example.html + documents/doc/apache-poi/test-document.doc + documents/rtf/pandoc/basic-formatting.rtf ``` All attribution and metadata is tracked in [`ATTRIBUTION.json`](ATTRIBUTION.json). The [`ATTRIBUTION.md`](ATTRIBUTION.md) file is autogenerated from the JSON. @@ -55,7 +59,7 @@ Diverse documents with interesting features: - Complex formatting (tables, images, footnotes, equations) - Different languages and scripts - Edge cases and unusual structures -- Various file formats +- Various file formats (DOCX, ODT, Markdown, HTML, EPUB, legacy .doc, RTF) - Real-world examples ## Questions? From f0c11b72521ba1588c67b14d4f03ab423262e084 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:27:08 +0200 Subject: [PATCH 03/14] feat(scrape): add scraper infrastructure with shared utilities --- scrape/__init__.py | 1 + scrape/requirements.txt | 4 + scrape/utils.py | 169 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+) create mode 100644 scrape/__init__.py create mode 100644 scrape/requirements.txt create mode 100644 scrape/utils.py diff --git a/scrape/__init__.py b/scrape/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/scrape/__init__.py @@ -0,0 +1 @@ + diff --git a/scrape/requirements.txt b/scrape/requirements.txt new file mode 100644 index 0000000..cece3da --- /dev/null +++ b/scrape/requirements.txt @@ -0,0 +1,4 @@ +requests>=2.31.0 +beautifulsoup4>=4.12.0 +lxml>=4.9.0 +python-magic>=0.4.27 diff --git a/scrape/utils.py b/scrape/utils.py new file mode 100644 index 0000000..0494a28 --- /dev/null +++ b/scrape/utils.py @@ -0,0 +1,169 @@ +"""Shared utilities for DocSpec document corpus scrapers.""" + +import re +import time +import unicodedata +import urllib.robotparser +from pathlib import Path +from typing import Optional + +import requests + +# ── Rate limiting ───────────────────────────────────────────────────────────── + +_last_request_time: float = 0.0 + + +def rate_limit(delay: float = 2.0) -> None: + """Block until at least `delay` seconds have passed since the last call.""" + global _last_request_time + elapsed = time.monotonic() - _last_request_time + if elapsed < delay: + time.sleep(delay - elapsed) + _last_request_time = time.monotonic() + + +# ── Robots.txt compliance ───────────────────────────────────────────────────── + +_robots_cache: dict = {} + + +def check_robots_txt(url: str, user_agent: str = "*") -> bool: + """Return True if scraping `url` is allowed by robots.txt.""" + from urllib.parse import urlparse + + parsed = urlparse(url) + base = f"{parsed.scheme}://{parsed.netloc}" + if base not in _robots_cache: + rp = urllib.robotparser.RobotFileParser() + rp.set_url(f"{base}/robots.txt") + try: + rp.read() + except Exception: + _robots_cache[base] = None + return True + _robots_cache[base] = rp + rp = _robots_cache[base] + if rp is None: + return True + return rp.can_fetch(user_agent, url) + + +# ── License validation ──────────────────────────────────────────────────────── + +# CC-BY-ND defeats the purpose of conversion testing — reject it +_REJECTED_PATTERNS = ["-ND-", "-ND"] + + +def validate_license(license_id: str) -> bool: + """Return True if license is allowed (not CC-BY-ND, not empty).""" + if not license_id or not license_id.strip(): + return False + for pattern in _REJECTED_PATTERNS: + if pattern in license_id: + return False + return True + + +# ── Filename sanitization ───────────────────────────────────────────────────── + + +def sanitize_filename(name: str) -> str: + """Convert a filename to an ASCII-safe lowercase slug, preserving extension. + + Examples: + "Hello World (2024) — Draft.doc" -> "hello-world-2024-draft.doc" + "Über-Dokument.rtf" -> "uber-dokument.rtf" + """ + stem, _, ext = name.rpartition(".") + if not stem: + stem, ext = ext, "" + + # Normalize unicode (é -> e, ü -> u, em-dash -> nothing, etc.) + stem = unicodedata.normalize("NFKD", stem) + stem = stem.encode("ascii", "ignore").decode("ascii") + + stem = stem.lower() + stem = re.sub(r"[^a-z0-9]+", "-", stem) + stem = re.sub(r"-+", "-", stem).strip("-") + + return f"{stem}.{ext.lower()}" if ext else stem + + +# ── File format validation ──────────────────────────────────────────────────── + +# Magic bytes constants +MAGIC_OLE2 = b"\xd0\xcf\x11\xe0" # .doc WW8 (OLE2 compound binary) +MAGIC_EPUB = b"PK\x03\x04" # .epub (ZIP-based) + + +def validate_file_format(filepath, expected_magic: bytes) -> bool: + """Return True if the file starts with expected_magic bytes.""" + p = Path(filepath) + if not p.exists(): + return False + with open(p, "rb") as f: + return f.read(len(expected_magic)) == expected_magic + + +def validate_rtf(filepath) -> bool: + """Return True if the file starts with {\\rtf (RTF magic prefix).""" + p = Path(filepath) + if not p.exists(): + return False + with open(p, "rb") as f: + return f.read(5).startswith(b"{\\rtf") + + +# ── File size check ─────────────────────────────────────────────────────────── + + +def check_file_size(filepath, min_bytes: int = 100) -> bool: + """Return True if the file exists and is at least min_bytes in size.""" + p = Path(filepath) + return p.exists() and p.stat().st_size >= min_bytes + + +# ── Download with retry ─────────────────────────────────────────────────────── + + +def download_file( + url: str, + dest, + delay: float = 2.0, + max_retries: int = 3, + timeout: int = 30, + session: Optional[requests.Session] = None, +) -> bool: + """Download url to dest with rate limiting and exponential backoff on 429. + + Returns True on success, False on failure. + """ + dest = Path(dest) + dest.parent.mkdir(parents=True, exist_ok=True) + + sess = session or requests.Session() + sess.headers.setdefault( + "User-Agent", + "DocSpec-Corpus-Scraper/1.0 (https://github.com/docspec/documents)", + ) + + for attempt in range(max_retries): + rate_limit(delay) + try: + resp = sess.get(url, timeout=timeout, stream=True) + if resp.status_code == 429: + wait = 2**attempt * delay + print(f" Rate limited (429), waiting {wait:.1f}s…") + time.sleep(wait) + continue + resp.raise_for_status() + with open(dest, "wb") as f: + for chunk in resp.iter_content(chunk_size=8192): + f.write(chunk) + return True + except requests.RequestException as e: + print(f" Download failed (attempt {attempt + 1}/{max_retries}): {e}") + if attempt < max_retries - 1: + time.sleep(2**attempt) + return False From 419bbd4c056b7aafd79fa738987c7744f73184b6 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:36:34 +0200 Subject: [PATCH 04/14] feat(rtf): add RTF corpus from test suites and archives --- ATTRIBUTION.json | 47 +- documents/rtf/libreoffice/165333.rtf | 3 + documents/rtf/libreoffice/165483.rtf | 3 + documents/rtf/libreoffice/165717.rtf | 3 + documents/rtf/libreoffice/165805.rtf | 3 + documents/rtf/libreoffice/background.rtf | 3 + documents/rtf/libreoffice/behind-doc.rtf | 3 + documents/rtf/libreoffice/chtoutline.rtf | 3 + .../rtf/libreoffice/classification-no.rtf | 3 + .../rtf/libreoffice/classification-yes.rtf | 3 + documents/rtf/libreoffice/column-break.rtf | 3 + .../libreoffice/cont-section-pagebreak.rtf | 3 + .../rtf/libreoffice/cp950listleveltext1.rtf | 3 + .../rtf/libreoffice/cp950listleveltext2.rtf | 3 + .../rtf/libreoffice/cp950listleveltext3.rtf | 3 + documents/rtf/libreoffice/default-values.rtf | 3 + documents/rtf/libreoffice/fdo44984.rtf | 3 + documents/rtf/libreoffice/fdo45182.rtf | 3 + documents/rtf/libreoffice/fdo45183.rtf | 3 + documents/rtf/libreoffice/fdo45190.rtf | 3 + documents/rtf/libreoffice/fdo46662.rtf | 3 + documents/rtf/libreoffice/fdo47036.rtf | 3 + documents/rtf/libreoffice/fdo47440.rtf | 3 + documents/rtf/libreoffice/fdo48442.rtf | 3 + documents/rtf/libreoffice/fdo49692.rtf | 3 + documents/rtf/libreoffice/fdo49892.rtf | 3 + documents/rtf/libreoffice/fdo49893-3.rtf | 3 + documents/rtf/libreoffice/fdo49893.rtf | 3 + documents/rtf/libreoffice/fdo52052.rtf | 3 + documents/rtf/libreoffice/fdo52066.rtf | 3 + documents/rtf/libreoffice/fdo53556.rtf | 3 + documents/rtf/libreoffice/fdo55525.rtf | 3 + documents/rtf/libreoffice/fdo57708.rtf | 3 + documents/rtf/libreoffice/fdo59953.rtf | 3 + documents/rtf/libreoffice/fdo66565.rtf | 3 + documents/rtf/libreoffice/fdo67365.rtf | 3 + documents/rtf/libreoffice/fdo68291-paste.rtf | 3 + documents/rtf/libreoffice/fdo74599.rtf | 3 + documents/rtf/libreoffice/fdo74823.rtf | 3 + documents/rtf/libreoffice/fdo79319.rtf | 3 + documents/rtf/libreoffice/fdo80742.rtf | 3 + documents/rtf/libreoffice/fdo81033.rtf | 3 + documents/rtf/libreoffice/fdo81944.rtf | 3 + documents/rtf/libreoffice/fdo82071.rtf | 3 + documents/rtf/libreoffice/fdo82114.rtf | 3 + documents/rtf/libreoffice/fdo82512.rtf | 3 + documents/rtf/libreoffice/fdo84685.rtf | 3 + documents/rtf/libreoffice/fdo85179.rtf | 3 + documents/rtf/libreoffice/fdo85812.rtf | 3 + documents/rtf/libreoffice/flip.rtf | 3 + .../rtf/libreoffice/groupshape-notext.rtf | 3 + .../rtf/libreoffice/groupshape-rotation.rtf | 3 + documents/rtf/libreoffice/groupshape.rtf | 3 + documents/rtf/libreoffice/hello.rtf | 3 + .../rtf/libreoffice/hidden-para-separator.rtf | 3 + documents/rtf/libreoffice/ink.rtf | 3 + documents/rtf/libreoffice/lndscpsxn.rtf | 3 + documents/rtf/libreoffice/n695479.rtf | 3 + documents/rtf/libreoffice/n823655.rtf | 3 + documents/rtf/libreoffice/n823675.rtf | 3 + documents/rtf/libreoffice/ole-inline.rtf | 3 + .../libreoffice/para-style-bottom-margin.rtf | 3 + .../paste-first-para-direct-format.rtf | 3 + documents/rtf/libreoffice/posh-leftright.rtf | 3 + documents/rtf/libreoffice/posh-posv.rtf | 3 + .../rtf/libreoffice/read-only-protect.rtf | 3 + documents/rtf/libreoffice/sbkeven.rtf | 3 + documents/rtf/libreoffice/sbkodd.rtf | 3 + .../rtf/libreoffice/section-pagebreak.rtf | 3 + documents/rtf/libreoffice/tblrepeat.rtf | 3 + documents/rtf/libreoffice/tdf104016.rtf | 3 + documents/rtf/libreoffice/tdf105511.rtf | 3 + documents/rtf/libreoffice/tdf108055.rtf | 3 + documents/rtf/libreoffice/tdf108943.rtf | 3 + documents/rtf/libreoffice/tdf108947.rtf | 3 + documents/rtf/libreoffice/tdf108951.rtf | 3 + documents/rtf/libreoffice/tdf112211-2.rtf | 3 + documents/rtf/libreoffice/tdf115153.rtf | 3 + documents/rtf/libreoffice/tdf115155.rtf | 3 + documents/rtf/libreoffice/tdf115242.rtf | 3 + documents/rtf/libreoffice/tdf115715.rtf | 3 + documents/rtf/libreoffice/tdf116265.rtf | 3 + documents/rtf/libreoffice/tdf116269.rtf | 3 + documents/rtf/libreoffice/tdf117246.rtf | 3 + documents/rtf/libreoffice/tdf117403.rtf | 3 + documents/rtf/libreoffice/tdf119599.rtf | 3 + documents/rtf/libreoffice/tdf122430.rtf | 3 + documents/rtf/libreoffice/tdf126173.rtf | 3 + documents/rtf/libreoffice/tdf128611.rtf | 3 + documents/rtf/libreoffice/tdf148544.rtf | 3 + documents/rtf/libreoffice/tdf152839.rtf | 3 + documents/rtf/libreoffice/tdf153196.rtf | 3 + documents/rtf/libreoffice/tdf158044.rtf | 3 + documents/rtf/libreoffice/tdf160553.rtf | 3 + documents/rtf/libreoffice/tdf162198.rtf | 3 + documents/rtf/libreoffice/tdf163003.rtf | 3 + documents/rtf/libreoffice/tdf165923.rtf | 3 + documents/rtf/libreoffice/tdf166191.rtf | 3 + documents/rtf/libreoffice/tdf167254.rtf | 3 + documents/rtf/libreoffice/tdf167710.rtf | 3 + documents/rtf/libreoffice/tdf169298.rtf | 3 + documents/rtf/libreoffice/tdf59454.rtf | 3 + documents/rtf/libreoffice/tdf59699.rtf | 3 + documents/rtf/libreoffice/tdf78506.rtf | 3 + documents/rtf/libreoffice/tdf81943.rtf | 3 + documents/rtf/libreoffice/tdf84684.rtf | 3 + documents/rtf/libreoffice/tdf90046.rtf | 3 + documents/rtf/libreoffice/tdf90097.rtf | 3 + documents/rtf/libreoffice/tdf90260-par.rtf | 3 + documents/rtf/libreoffice/tdf90315.rtf | 3 + documents/rtf/libreoffice/tdf91684.rtf | 3 + documents/rtf/libreoffice/tdf96308-tabpos.rtf | 3 + documents/rtf/libreoffice/tdf96326.rtf | 3 + documents/rtf/libreoffice/tdf99498.rtf | 3 + .../libreoffice/unbalanced-columns-compat.rtf | 3 + .../rtf/libreoffice/unbalanced-columns.rtf | 3 + documents/rtf/libreoffice/watermark.rtf | 3 + documents/rtf/libreoffice/wrap-distance.rtf | 3 + documents/rtf/pandoc/writer.rtf | 3 + scrape/rtf_gather.py | 539 ++++++++++++++++++ 120 files changed, 936 insertions(+), 4 deletions(-) create mode 100644 documents/rtf/libreoffice/165333.rtf create mode 100644 documents/rtf/libreoffice/165483.rtf create mode 100644 documents/rtf/libreoffice/165717.rtf create mode 100644 documents/rtf/libreoffice/165805.rtf create mode 100644 documents/rtf/libreoffice/background.rtf create mode 100644 documents/rtf/libreoffice/behind-doc.rtf create mode 100644 documents/rtf/libreoffice/chtoutline.rtf create mode 100644 documents/rtf/libreoffice/classification-no.rtf create mode 100644 documents/rtf/libreoffice/classification-yes.rtf create mode 100644 documents/rtf/libreoffice/column-break.rtf create mode 100644 documents/rtf/libreoffice/cont-section-pagebreak.rtf create mode 100644 documents/rtf/libreoffice/cp950listleveltext1.rtf create mode 100644 documents/rtf/libreoffice/cp950listleveltext2.rtf create mode 100644 documents/rtf/libreoffice/cp950listleveltext3.rtf create mode 100644 documents/rtf/libreoffice/default-values.rtf create mode 100644 documents/rtf/libreoffice/fdo44984.rtf create mode 100644 documents/rtf/libreoffice/fdo45182.rtf create mode 100644 documents/rtf/libreoffice/fdo45183.rtf create mode 100644 documents/rtf/libreoffice/fdo45190.rtf create mode 100644 documents/rtf/libreoffice/fdo46662.rtf create mode 100644 documents/rtf/libreoffice/fdo47036.rtf create mode 100644 documents/rtf/libreoffice/fdo47440.rtf create mode 100644 documents/rtf/libreoffice/fdo48442.rtf create mode 100644 documents/rtf/libreoffice/fdo49692.rtf create mode 100644 documents/rtf/libreoffice/fdo49892.rtf create mode 100644 documents/rtf/libreoffice/fdo49893-3.rtf create mode 100644 documents/rtf/libreoffice/fdo49893.rtf create mode 100644 documents/rtf/libreoffice/fdo52052.rtf create mode 100644 documents/rtf/libreoffice/fdo52066.rtf create mode 100644 documents/rtf/libreoffice/fdo53556.rtf create mode 100644 documents/rtf/libreoffice/fdo55525.rtf create mode 100644 documents/rtf/libreoffice/fdo57708.rtf create mode 100644 documents/rtf/libreoffice/fdo59953.rtf create mode 100644 documents/rtf/libreoffice/fdo66565.rtf create mode 100644 documents/rtf/libreoffice/fdo67365.rtf create mode 100644 documents/rtf/libreoffice/fdo68291-paste.rtf create mode 100644 documents/rtf/libreoffice/fdo74599.rtf create mode 100644 documents/rtf/libreoffice/fdo74823.rtf create mode 100644 documents/rtf/libreoffice/fdo79319.rtf create mode 100644 documents/rtf/libreoffice/fdo80742.rtf create mode 100644 documents/rtf/libreoffice/fdo81033.rtf create mode 100644 documents/rtf/libreoffice/fdo81944.rtf create mode 100644 documents/rtf/libreoffice/fdo82071.rtf create mode 100644 documents/rtf/libreoffice/fdo82114.rtf create mode 100644 documents/rtf/libreoffice/fdo82512.rtf create mode 100644 documents/rtf/libreoffice/fdo84685.rtf create mode 100644 documents/rtf/libreoffice/fdo85179.rtf create mode 100644 documents/rtf/libreoffice/fdo85812.rtf create mode 100644 documents/rtf/libreoffice/flip.rtf create mode 100644 documents/rtf/libreoffice/groupshape-notext.rtf create mode 100644 documents/rtf/libreoffice/groupshape-rotation.rtf create mode 100644 documents/rtf/libreoffice/groupshape.rtf create mode 100644 documents/rtf/libreoffice/hello.rtf create mode 100644 documents/rtf/libreoffice/hidden-para-separator.rtf create mode 100644 documents/rtf/libreoffice/ink.rtf create mode 100644 documents/rtf/libreoffice/lndscpsxn.rtf create mode 100644 documents/rtf/libreoffice/n695479.rtf create mode 100644 documents/rtf/libreoffice/n823655.rtf create mode 100644 documents/rtf/libreoffice/n823675.rtf create mode 100644 documents/rtf/libreoffice/ole-inline.rtf create mode 100644 documents/rtf/libreoffice/para-style-bottom-margin.rtf create mode 100644 documents/rtf/libreoffice/paste-first-para-direct-format.rtf create mode 100644 documents/rtf/libreoffice/posh-leftright.rtf create mode 100644 documents/rtf/libreoffice/posh-posv.rtf create mode 100644 documents/rtf/libreoffice/read-only-protect.rtf create mode 100644 documents/rtf/libreoffice/sbkeven.rtf create mode 100644 documents/rtf/libreoffice/sbkodd.rtf create mode 100644 documents/rtf/libreoffice/section-pagebreak.rtf create mode 100644 documents/rtf/libreoffice/tblrepeat.rtf create mode 100644 documents/rtf/libreoffice/tdf104016.rtf create mode 100644 documents/rtf/libreoffice/tdf105511.rtf create mode 100644 documents/rtf/libreoffice/tdf108055.rtf create mode 100644 documents/rtf/libreoffice/tdf108943.rtf create mode 100644 documents/rtf/libreoffice/tdf108947.rtf create mode 100644 documents/rtf/libreoffice/tdf108951.rtf create mode 100644 documents/rtf/libreoffice/tdf112211-2.rtf create mode 100644 documents/rtf/libreoffice/tdf115153.rtf create mode 100644 documents/rtf/libreoffice/tdf115155.rtf create mode 100644 documents/rtf/libreoffice/tdf115242.rtf create mode 100644 documents/rtf/libreoffice/tdf115715.rtf create mode 100644 documents/rtf/libreoffice/tdf116265.rtf create mode 100644 documents/rtf/libreoffice/tdf116269.rtf create mode 100644 documents/rtf/libreoffice/tdf117246.rtf create mode 100644 documents/rtf/libreoffice/tdf117403.rtf create mode 100644 documents/rtf/libreoffice/tdf119599.rtf create mode 100644 documents/rtf/libreoffice/tdf122430.rtf create mode 100644 documents/rtf/libreoffice/tdf126173.rtf create mode 100644 documents/rtf/libreoffice/tdf128611.rtf create mode 100644 documents/rtf/libreoffice/tdf148544.rtf create mode 100644 documents/rtf/libreoffice/tdf152839.rtf create mode 100644 documents/rtf/libreoffice/tdf153196.rtf create mode 100644 documents/rtf/libreoffice/tdf158044.rtf create mode 100644 documents/rtf/libreoffice/tdf160553.rtf create mode 100644 documents/rtf/libreoffice/tdf162198.rtf create mode 100644 documents/rtf/libreoffice/tdf163003.rtf create mode 100644 documents/rtf/libreoffice/tdf165923.rtf create mode 100644 documents/rtf/libreoffice/tdf166191.rtf create mode 100644 documents/rtf/libreoffice/tdf167254.rtf create mode 100644 documents/rtf/libreoffice/tdf167710.rtf create mode 100644 documents/rtf/libreoffice/tdf169298.rtf create mode 100644 documents/rtf/libreoffice/tdf59454.rtf create mode 100644 documents/rtf/libreoffice/tdf59699.rtf create mode 100644 documents/rtf/libreoffice/tdf78506.rtf create mode 100644 documents/rtf/libreoffice/tdf81943.rtf create mode 100644 documents/rtf/libreoffice/tdf84684.rtf create mode 100644 documents/rtf/libreoffice/tdf90046.rtf create mode 100644 documents/rtf/libreoffice/tdf90097.rtf create mode 100644 documents/rtf/libreoffice/tdf90260-par.rtf create mode 100644 documents/rtf/libreoffice/tdf90315.rtf create mode 100644 documents/rtf/libreoffice/tdf91684.rtf create mode 100644 documents/rtf/libreoffice/tdf96308-tabpos.rtf create mode 100644 documents/rtf/libreoffice/tdf96326.rtf create mode 100644 documents/rtf/libreoffice/tdf99498.rtf create mode 100644 documents/rtf/libreoffice/unbalanced-columns-compat.rtf create mode 100644 documents/rtf/libreoffice/unbalanced-columns.rtf create mode 100644 documents/rtf/libreoffice/watermark.rtf create mode 100644 documents/rtf/libreoffice/wrap-distance.rtf create mode 100644 documents/rtf/pandoc/writer.rtf create mode 100644 scrape/rtf_gather.py diff --git a/ATTRIBUTION.json b/ATTRIBUTION.json index cf801fb..02dbe88 100644 --- a/ATTRIBUTION.json +++ b/ATTRIBUTION.json @@ -6,7 +6,9 @@ "author": "John MacFarlane and Pandoc contributors", "license": "GPL-2.0-or-later", "source": "https://github.com/jgm/pandoc", - "tags": ["pandoc"], + "tags": [ + "pandoc" + ], "donated": "2026-02-17", "notes": "Test set from Pandoc" }, @@ -17,7 +19,9 @@ "author": "John MacFarlane and Pandoc contributors", "license": "GPL-2.0-or-later", "source": "https://github.com/jgm/pandoc", - "tags": ["pandoc"], + "tags": [ + "pandoc" + ], "donated": "2026-02-17", "notes": "Test set from Pandoc" }, @@ -28,7 +32,10 @@ "author": "Vereniging van Nederlandse Gemeenten / Informatiebeveiligingsdienst voor gemeenten", "license": "CC-BY-NC-SA-4.0", "source": "https://www.informatiebeveiligingsdienst.nl/", - "tags": ["vng-ibd", "vng"], + "tags": [ + "vng-ibd", + "vng" + ], "donated": "2026-02-17", "notes": "Scraped from their website" }, @@ -39,8 +46,40 @@ "author": "DocSpec", "license": "CC-BY-NC-SA-4.0", "source": "https://github.com/docspec/documents", - "tags": ["docspec"], + "tags": [ + "docspec" + ], "donated": "2026-02-17", "notes": "Test fixtures originating from NLdoc" + }, + { + "format": "rtf", + "path": "documents/rtf/pandoc/*.rtf", + "title": "Pandoc RTF Test Suite", + "author": "John MacFarlane and Pandoc contributors", + "license": "GPL-2.0-or-later", + "source": "https://github.com/jgm/pandoc", + "tags": [ + "pandoc", + "rtf", + "test-fixtures" + ], + "donated": "2026-04-01", + "notes": "RTF test fixtures from the Pandoc document converter test suite" + }, + { + "format": "rtf", + "path": "documents/rtf/libreoffice/*.rtf", + "title": "LibreOffice Writer RTF Test Data", + "author": "The Document Foundation and LibreOffice contributors", + "license": "MPL-2.0", + "source": "https://github.com/LibreOffice/core", + "tags": [ + "libreoffice", + "rtf", + "test-fixtures" + ], + "donated": "2026-04-01", + "notes": "RTF import/export test data from LibreOffice Writer QA" } ] diff --git a/documents/rtf/libreoffice/165333.rtf b/documents/rtf/libreoffice/165333.rtf new file mode 100644 index 0000000..118b9a8 --- /dev/null +++ b/documents/rtf/libreoffice/165333.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a453202adc062ed0323c78d5fd0ce5182af181ecaedc908f1ae3b480f77f22 +size 319 diff --git a/documents/rtf/libreoffice/165483.rtf b/documents/rtf/libreoffice/165483.rtf new file mode 100644 index 0000000..bf1387d --- /dev/null +++ b/documents/rtf/libreoffice/165483.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0202eae51297de1451e60025ca5d958c0cf3519542cc109e59cdcf0f665e9e63 +size 1511 diff --git a/documents/rtf/libreoffice/165717.rtf b/documents/rtf/libreoffice/165717.rtf new file mode 100644 index 0000000..a00a3b0 --- /dev/null +++ b/documents/rtf/libreoffice/165717.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305a9526a0f4d90f190c60922b0e97b3def6d0d4cdfa4d09eef29e420628f2c4 +size 36742 diff --git a/documents/rtf/libreoffice/165805.rtf b/documents/rtf/libreoffice/165805.rtf new file mode 100644 index 0000000..0513d30 --- /dev/null +++ b/documents/rtf/libreoffice/165805.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc02761c5434d36f03593ec9d842698c8fa100ebee9f3e6e486564bf310506db +size 54946 diff --git a/documents/rtf/libreoffice/background.rtf b/documents/rtf/libreoffice/background.rtf new file mode 100644 index 0000000..255bab2 --- /dev/null +++ b/documents/rtf/libreoffice/background.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dc9145bd9bb81540e9793cea7406da06d0a3b0fb26b2b9539d3244a2be5d85 +size 642 diff --git a/documents/rtf/libreoffice/behind-doc.rtf b/documents/rtf/libreoffice/behind-doc.rtf new file mode 100644 index 0000000..3427066 --- /dev/null +++ b/documents/rtf/libreoffice/behind-doc.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a460257cdb2a8269a767cc876fd945b2dce0e7e1520d8c0e0c5bf87bdfeed64e +size 1149 diff --git a/documents/rtf/libreoffice/chtoutline.rtf b/documents/rtf/libreoffice/chtoutline.rtf new file mode 100644 index 0000000..424f29d --- /dev/null +++ b/documents/rtf/libreoffice/chtoutline.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1050d67f96663eaf834fc6dcff8a79ebcbcf065307d165a5c6d4e05113c2dff5 +size 41999 diff --git a/documents/rtf/libreoffice/classification-no.rtf b/documents/rtf/libreoffice/classification-no.rtf new file mode 100644 index 0000000..73b5ef5 --- /dev/null +++ b/documents/rtf/libreoffice/classification-no.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38ef292e81cf6c381d0d36ffcd51e1a658fecfcdcf6d758fadf2b0bded4cb7f +size 35 diff --git a/documents/rtf/libreoffice/classification-yes.rtf b/documents/rtf/libreoffice/classification-yes.rtf new file mode 100644 index 0000000..80e8a24 --- /dev/null +++ b/documents/rtf/libreoffice/classification-yes.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b78bd86ac053a4d6cbe034c258f0f9a5831f27c70c569298267e269b06ae8ef +size 2415 diff --git a/documents/rtf/libreoffice/column-break.rtf b/documents/rtf/libreoffice/column-break.rtf new file mode 100644 index 0000000..ad97f3d --- /dev/null +++ b/documents/rtf/libreoffice/column-break.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:225d61536c4095e4671750052b58e2e48cb47ea505c2fa59760a13a09c3bad9b +size 31 diff --git a/documents/rtf/libreoffice/cont-section-pagebreak.rtf b/documents/rtf/libreoffice/cont-section-pagebreak.rtf new file mode 100644 index 0000000..d6b910f --- /dev/null +++ b/documents/rtf/libreoffice/cont-section-pagebreak.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4196bc38e47ad01ba6f4b43acb1cbe292add927de9350291cab6e4b666e81e7 +size 173 diff --git a/documents/rtf/libreoffice/cp950listleveltext1.rtf b/documents/rtf/libreoffice/cp950listleveltext1.rtf new file mode 100644 index 0000000..60e166f --- /dev/null +++ b/documents/rtf/libreoffice/cp950listleveltext1.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dd93dd617f676e20fc301fc8677dd9c36c310bb45012bb69ed5e69b2428d8f +size 40212 diff --git a/documents/rtf/libreoffice/cp950listleveltext2.rtf b/documents/rtf/libreoffice/cp950listleveltext2.rtf new file mode 100644 index 0000000..b3d00ff --- /dev/null +++ b/documents/rtf/libreoffice/cp950listleveltext2.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:537e3bb00088d19bd356457b3c8e9f58b54da5ac3902ad784743eadd12876518 +size 35583 diff --git a/documents/rtf/libreoffice/cp950listleveltext3.rtf b/documents/rtf/libreoffice/cp950listleveltext3.rtf new file mode 100644 index 0000000..bd4cef7 --- /dev/null +++ b/documents/rtf/libreoffice/cp950listleveltext3.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb02f7408f1b357953a280eeeff9c08ad74c0d3caa83474f44226da3951bb52 +size 35702 diff --git a/documents/rtf/libreoffice/default-values.rtf b/documents/rtf/libreoffice/default-values.rtf new file mode 100644 index 0000000..a54c7a7 --- /dev/null +++ b/documents/rtf/libreoffice/default-values.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893f459f18d775ca852ef5a14ca8729a25a562baa875884a0965f5a57942e9a8 +size 363 diff --git a/documents/rtf/libreoffice/fdo44984.rtf b/documents/rtf/libreoffice/fdo44984.rtf new file mode 100644 index 0000000..d1368b6 --- /dev/null +++ b/documents/rtf/libreoffice/fdo44984.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c612acde0fc848145d6c2d3cfa22a02a457a8784fb7285bbf6b52487687d8c +size 529 diff --git a/documents/rtf/libreoffice/fdo45182.rtf b/documents/rtf/libreoffice/fdo45182.rtf new file mode 100644 index 0000000..24fc0b0 --- /dev/null +++ b/documents/rtf/libreoffice/fdo45182.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5120e569374a7ca9269a0bad9198138cef9b909a42a30db5b60b58d66c8aed +size 124 diff --git a/documents/rtf/libreoffice/fdo45183.rtf b/documents/rtf/libreoffice/fdo45183.rtf new file mode 100644 index 0000000..0ac40d3 --- /dev/null +++ b/documents/rtf/libreoffice/fdo45183.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa2ecfa2723fb1108cbc48ffa6fbba3bf76f98707a17b22c12574b22a992b71 +size 1517 diff --git a/documents/rtf/libreoffice/fdo45190.rtf b/documents/rtf/libreoffice/fdo45190.rtf new file mode 100644 index 0000000..6c8a9f5 --- /dev/null +++ b/documents/rtf/libreoffice/fdo45190.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95144d507f6a17754e9624fe4d7099c0d3960a5d4cb60f9de2a0db3a88c271b2 +size 97 diff --git a/documents/rtf/libreoffice/fdo46662.rtf b/documents/rtf/libreoffice/fdo46662.rtf new file mode 100644 index 0000000..066f665 --- /dev/null +++ b/documents/rtf/libreoffice/fdo46662.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ff26485501eb72a327e1985c72b31ea4c5fbb519ac4cab5f49666c7eb84cb2 +size 1058 diff --git a/documents/rtf/libreoffice/fdo47036.rtf b/documents/rtf/libreoffice/fdo47036.rtf new file mode 100644 index 0000000..0311ca6 --- /dev/null +++ b/documents/rtf/libreoffice/fdo47036.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c331e48960ac2f1133ea1556f0027c811c9d77a4c02d42b756fc9e68fea78fa +size 3072 diff --git a/documents/rtf/libreoffice/fdo47440.rtf b/documents/rtf/libreoffice/fdo47440.rtf new file mode 100644 index 0000000..abe68d5 --- /dev/null +++ b/documents/rtf/libreoffice/fdo47440.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0621bbeb19693881babb8d800fa93f0e99963fce2b6dcd2c80643ffa62264ee +size 1103 diff --git a/documents/rtf/libreoffice/fdo48442.rtf b/documents/rtf/libreoffice/fdo48442.rtf new file mode 100644 index 0000000..cfb28bc --- /dev/null +++ b/documents/rtf/libreoffice/fdo48442.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b553298ddd926dd873c461090481adade31f454d2f6429aa4e47ed907832978 +size 598 diff --git a/documents/rtf/libreoffice/fdo49692.rtf b/documents/rtf/libreoffice/fdo49692.rtf new file mode 100644 index 0000000..9e0f448 --- /dev/null +++ b/documents/rtf/libreoffice/fdo49692.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bc8482ad3d03a5809ea103a7240beefc54f9332eabf41b17238bb794b1cc82c +size 255 diff --git a/documents/rtf/libreoffice/fdo49892.rtf b/documents/rtf/libreoffice/fdo49892.rtf new file mode 100644 index 0000000..ff69801 --- /dev/null +++ b/documents/rtf/libreoffice/fdo49892.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc3b02420f56d1fa3e2fa97feb4314f6b723d2aa79298d17e3d8de22ca7af56 +size 3399 diff --git a/documents/rtf/libreoffice/fdo49893-3.rtf b/documents/rtf/libreoffice/fdo49893-3.rtf new file mode 100644 index 0000000..1563fda --- /dev/null +++ b/documents/rtf/libreoffice/fdo49893-3.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73637b479b306648325901dda6e9b78d0876e17837a191cd69cc8525560ed65 +size 1497 diff --git a/documents/rtf/libreoffice/fdo49893.rtf b/documents/rtf/libreoffice/fdo49893.rtf new file mode 100644 index 0000000..00af5c6 --- /dev/null +++ b/documents/rtf/libreoffice/fdo49893.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5601e40daf210324e70cc8a55cf542f9f685d5cf6310769e44d5d73b8f871273 +size 1493 diff --git a/documents/rtf/libreoffice/fdo52052.rtf b/documents/rtf/libreoffice/fdo52052.rtf new file mode 100644 index 0000000..7827016 --- /dev/null +++ b/documents/rtf/libreoffice/fdo52052.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f929f9016836edde5b7a7af69a9cfdbc1a95067406f0e24442d2c392d5f72c +size 830 diff --git a/documents/rtf/libreoffice/fdo52066.rtf b/documents/rtf/libreoffice/fdo52066.rtf new file mode 100644 index 0000000..7db5209 --- /dev/null +++ b/documents/rtf/libreoffice/fdo52066.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec11b28f2140b81950c4eccc22ece6381058c820c6e1dcf12d1a89ec7459fb69 +size 194 diff --git a/documents/rtf/libreoffice/fdo53556.rtf b/documents/rtf/libreoffice/fdo53556.rtf new file mode 100644 index 0000000..3e3902a --- /dev/null +++ b/documents/rtf/libreoffice/fdo53556.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b943dea14bf32ca2bc2f4b199db23130bc6216855105e626f4fbfe815490ff00 +size 1156 diff --git a/documents/rtf/libreoffice/fdo55525.rtf b/documents/rtf/libreoffice/fdo55525.rtf new file mode 100644 index 0000000..0db0c9c --- /dev/null +++ b/documents/rtf/libreoffice/fdo55525.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2814a827ac371cac85c44ed21d77bb0c84697842bba48fc1a3cf1b494120d4a9 +size 196 diff --git a/documents/rtf/libreoffice/fdo57708.rtf b/documents/rtf/libreoffice/fdo57708.rtf new file mode 100644 index 0000000..6dab980 --- /dev/null +++ b/documents/rtf/libreoffice/fdo57708.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc48ca34fae0200f457467b3a3748bf7916e661c4f3c8fbc9c39209848e57174 +size 2279 diff --git a/documents/rtf/libreoffice/fdo59953.rtf b/documents/rtf/libreoffice/fdo59953.rtf new file mode 100644 index 0000000..6147d6f --- /dev/null +++ b/documents/rtf/libreoffice/fdo59953.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a421caa6693d5dc131f92190e870e939cf109ad13ccc37b6e1a62d2041b77edd +size 268 diff --git a/documents/rtf/libreoffice/fdo66565.rtf b/documents/rtf/libreoffice/fdo66565.rtf new file mode 100644 index 0000000..3d93268 --- /dev/null +++ b/documents/rtf/libreoffice/fdo66565.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277c811d67e5265f8eb6b03dbe1a33fb8ef4e848808d49b377b5672c1f798d6d +size 3073 diff --git a/documents/rtf/libreoffice/fdo67365.rtf b/documents/rtf/libreoffice/fdo67365.rtf new file mode 100644 index 0000000..6776f78 --- /dev/null +++ b/documents/rtf/libreoffice/fdo67365.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30434996b4a83b643cc15169f145172d1eac2ac9eba851dff0cb35c6b7571b8b +size 2119 diff --git a/documents/rtf/libreoffice/fdo68291-paste.rtf b/documents/rtf/libreoffice/fdo68291-paste.rtf new file mode 100644 index 0000000..f796b8d --- /dev/null +++ b/documents/rtf/libreoffice/fdo68291-paste.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98e26c4f0d7b85a8953b7a4f52800f5ef379b11f72bab276dbf02db865ddf08 +size 1400 diff --git a/documents/rtf/libreoffice/fdo74599.rtf b/documents/rtf/libreoffice/fdo74599.rtf new file mode 100644 index 0000000..1177f38 --- /dev/null +++ b/documents/rtf/libreoffice/fdo74599.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e58f6a9ecd8eec14cf01a412ba5280736db5d3f6e8f7b0ac3a1efe97fd268833 +size 75 diff --git a/documents/rtf/libreoffice/fdo74823.rtf b/documents/rtf/libreoffice/fdo74823.rtf new file mode 100644 index 0000000..9ef3eae --- /dev/null +++ b/documents/rtf/libreoffice/fdo74823.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cc03fe4f22e1d83154ecfba329cc2eb5d7e5142c29846c1f9a5f54c9bd8e3c2 +size 329 diff --git a/documents/rtf/libreoffice/fdo79319.rtf b/documents/rtf/libreoffice/fdo79319.rtf new file mode 100644 index 0000000..1fbe82f --- /dev/null +++ b/documents/rtf/libreoffice/fdo79319.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a213322f23a1da75849755f9624283d77d80049ceeb764f5bb24e86c66913d +size 1433 diff --git a/documents/rtf/libreoffice/fdo80742.rtf b/documents/rtf/libreoffice/fdo80742.rtf new file mode 100644 index 0000000..6fed406 --- /dev/null +++ b/documents/rtf/libreoffice/fdo80742.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba09fc6ad623a26db7a8cd10319e135f023ccb21e0dfbb1c1c58a928c997eec +size 82 diff --git a/documents/rtf/libreoffice/fdo81033.rtf b/documents/rtf/libreoffice/fdo81033.rtf new file mode 100644 index 0000000..12e8445 --- /dev/null +++ b/documents/rtf/libreoffice/fdo81033.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8ac5a8918db8105fec36bb67e61bb69ac1cdf31e212f48c994b921480c0213 +size 145 diff --git a/documents/rtf/libreoffice/fdo81944.rtf b/documents/rtf/libreoffice/fdo81944.rtf new file mode 100644 index 0000000..016e6ab --- /dev/null +++ b/documents/rtf/libreoffice/fdo81944.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b964763fe9328374d394176dc3de5d0af499d09359f916e6afd2e4426de5b55a +size 272 diff --git a/documents/rtf/libreoffice/fdo82071.rtf b/documents/rtf/libreoffice/fdo82071.rtf new file mode 100644 index 0000000..ca13f93 --- /dev/null +++ b/documents/rtf/libreoffice/fdo82071.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9ef1dce840d47e154f3e894cac6cb5ee1506a4530fffa5354a8282e4825844 +size 1672 diff --git a/documents/rtf/libreoffice/fdo82114.rtf b/documents/rtf/libreoffice/fdo82114.rtf new file mode 100644 index 0000000..fc147f7 --- /dev/null +++ b/documents/rtf/libreoffice/fdo82114.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5a93d5a2795ab9093b4ba3998390b97b6bf3e39c44238206fd273c31eed702 +size 517 diff --git a/documents/rtf/libreoffice/fdo82512.rtf b/documents/rtf/libreoffice/fdo82512.rtf new file mode 100644 index 0000000..724620f --- /dev/null +++ b/documents/rtf/libreoffice/fdo82512.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a01a790979bd62b805370eca9ac109da6c4573c8ce2047879a933cb37bcc370e +size 100 diff --git a/documents/rtf/libreoffice/fdo84685.rtf b/documents/rtf/libreoffice/fdo84685.rtf new file mode 100644 index 0000000..c8b1705 --- /dev/null +++ b/documents/rtf/libreoffice/fdo84685.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd326a59e52c1468b910be8d885f08bdd780b74fe73662facc8bb97d3cdffef2 +size 143 diff --git a/documents/rtf/libreoffice/fdo85179.rtf b/documents/rtf/libreoffice/fdo85179.rtf new file mode 100644 index 0000000..14003e1 --- /dev/null +++ b/documents/rtf/libreoffice/fdo85179.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0006b4b6226aa90aee2213626ba6fca91294c642601a208352c681ccb9480f0c +size 1866 diff --git a/documents/rtf/libreoffice/fdo85812.rtf b/documents/rtf/libreoffice/fdo85812.rtf new file mode 100644 index 0000000..d875cce --- /dev/null +++ b/documents/rtf/libreoffice/fdo85812.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84beff9e27717930543fb88785ee1e318300798029ff53541f95bba801f451b4 +size 937 diff --git a/documents/rtf/libreoffice/flip.rtf b/documents/rtf/libreoffice/flip.rtf new file mode 100644 index 0000000..c5e0649 --- /dev/null +++ b/documents/rtf/libreoffice/flip.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be13e4fa316fcc3ff5b9d5fd74687a11f75a2300545efedd0d0d07ffc0f44f7 +size 1777 diff --git a/documents/rtf/libreoffice/groupshape-notext.rtf b/documents/rtf/libreoffice/groupshape-notext.rtf new file mode 100644 index 0000000..9b1b999 --- /dev/null +++ b/documents/rtf/libreoffice/groupshape-notext.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a09332b80b61aa74441a63b4708f2d08f7ab9813822520fdd95d90010a0165 +size 884 diff --git a/documents/rtf/libreoffice/groupshape-rotation.rtf b/documents/rtf/libreoffice/groupshape-rotation.rtf new file mode 100644 index 0000000..d798174 --- /dev/null +++ b/documents/rtf/libreoffice/groupshape-rotation.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88141c7b3a9ec5505a2d6b685d42e243364feacd62cb7a4058873df050618604 +size 750 diff --git a/documents/rtf/libreoffice/groupshape.rtf b/documents/rtf/libreoffice/groupshape.rtf new file mode 100644 index 0000000..ac7fed7 --- /dev/null +++ b/documents/rtf/libreoffice/groupshape.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807de8eb7716014ceb457ebbb8842a09171680f7e49c3b9bbbdfd3931b2d44fc +size 690 diff --git a/documents/rtf/libreoffice/hello.rtf b/documents/rtf/libreoffice/hello.rtf new file mode 100644 index 0000000..ed80a51 --- /dev/null +++ b/documents/rtf/libreoffice/hello.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad13b2886d25a64bc5456ae6ad8c600fc8c61fdd103583ccac0078dc0c95a82c +size 25 diff --git a/documents/rtf/libreoffice/hidden-para-separator.rtf b/documents/rtf/libreoffice/hidden-para-separator.rtf new file mode 100644 index 0000000..c96df76 --- /dev/null +++ b/documents/rtf/libreoffice/hidden-para-separator.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12691870e2564737b37ba39b5b8731c3943e9e8be71ab291a6077775e7a8ccf7 +size 13416 diff --git a/documents/rtf/libreoffice/ink.rtf b/documents/rtf/libreoffice/ink.rtf new file mode 100644 index 0000000..77ec2ee --- /dev/null +++ b/documents/rtf/libreoffice/ink.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b443d9bf56e7f65eb72bf711f4d7fd7da8f6ef8891ccd9b4d981bca6b887ce29 +size 4182 diff --git a/documents/rtf/libreoffice/lndscpsxn.rtf b/documents/rtf/libreoffice/lndscpsxn.rtf new file mode 100644 index 0000000..9e8322f --- /dev/null +++ b/documents/rtf/libreoffice/lndscpsxn.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080a51d59ebc1b36cfc4b668ebee554859110b18f834347505d6af1407e12702 +size 235 diff --git a/documents/rtf/libreoffice/n695479.rtf b/documents/rtf/libreoffice/n695479.rtf new file mode 100644 index 0000000..71cfee2 --- /dev/null +++ b/documents/rtf/libreoffice/n695479.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc13faa8bc5b5f4c6012c8bf5794349f9ad9603a15f0c2fcd9bd4ab482e660a4 +size 858 diff --git a/documents/rtf/libreoffice/n823655.rtf b/documents/rtf/libreoffice/n823655.rtf new file mode 100644 index 0000000..2aeba39 --- /dev/null +++ b/documents/rtf/libreoffice/n823655.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1669d95f3a36e56211f24d4ab1edab29c29c5b7e4e305e7bb1ef9843cd9522c7 +size 609 diff --git a/documents/rtf/libreoffice/n823675.rtf b/documents/rtf/libreoffice/n823675.rtf new file mode 100644 index 0000000..572ab1c --- /dev/null +++ b/documents/rtf/libreoffice/n823675.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff0c1cfab1dea0796fcf3a9e7cd38a72302ed5c24ea68d273679158604804dc4 +size 561 diff --git a/documents/rtf/libreoffice/ole-inline.rtf b/documents/rtf/libreoffice/ole-inline.rtf new file mode 100644 index 0000000..3a5ba4d --- /dev/null +++ b/documents/rtf/libreoffice/ole-inline.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719a9bb8700c46df4ba2b7e1165053e038433056cc89da7ef3d4c9617bcaa751 +size 19374 diff --git a/documents/rtf/libreoffice/para-style-bottom-margin.rtf b/documents/rtf/libreoffice/para-style-bottom-margin.rtf new file mode 100644 index 0000000..dff0e23 --- /dev/null +++ b/documents/rtf/libreoffice/para-style-bottom-margin.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e31867fe9229043a702689a49ff26e9a21e5d117b96af66bc1e1b7fae171b0 +size 186 diff --git a/documents/rtf/libreoffice/paste-first-para-direct-format.rtf b/documents/rtf/libreoffice/paste-first-para-direct-format.rtf new file mode 100644 index 0000000..d867f18 --- /dev/null +++ b/documents/rtf/libreoffice/paste-first-para-direct-format.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518f8c029822a0309d15c0d7b69dc4cd060a7f3f18db54f75c7f30b8c75fcdc0 +size 2512 diff --git a/documents/rtf/libreoffice/posh-leftright.rtf b/documents/rtf/libreoffice/posh-leftright.rtf new file mode 100644 index 0000000..a22d9ab --- /dev/null +++ b/documents/rtf/libreoffice/posh-leftright.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66f99d53b01281c6e968ddf95d3461b59ea59fa73b8625073ecfc4a22bcb1429 +size 619 diff --git a/documents/rtf/libreoffice/posh-posv.rtf b/documents/rtf/libreoffice/posh-posv.rtf new file mode 100644 index 0000000..53337fd --- /dev/null +++ b/documents/rtf/libreoffice/posh-posv.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4016358117c792644ca3c8d3ebdee80bd797f39fccdbfcb0ae1f3232a4098400 +size 316 diff --git a/documents/rtf/libreoffice/read-only-protect.rtf b/documents/rtf/libreoffice/read-only-protect.rtf new file mode 100644 index 0000000..9228453 --- /dev/null +++ b/documents/rtf/libreoffice/read-only-protect.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec346505be7c9e85a415a72b14d01d6eafcd450426d7d8287ebd7e9ec14ae12f +size 43057 diff --git a/documents/rtf/libreoffice/sbkeven.rtf b/documents/rtf/libreoffice/sbkeven.rtf new file mode 100644 index 0000000..b2504df --- /dev/null +++ b/documents/rtf/libreoffice/sbkeven.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0c9a610565147b17974c7b969b254161406e79fbf69626be92ecebb25c84fa +size 564 diff --git a/documents/rtf/libreoffice/sbkodd.rtf b/documents/rtf/libreoffice/sbkodd.rtf new file mode 100644 index 0000000..0bbb7dc --- /dev/null +++ b/documents/rtf/libreoffice/sbkodd.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dee273e21998bcb2249a02fa6c4695ba1b24a90d6fb6a8fcad7cc99c74c6da0e +size 551 diff --git a/documents/rtf/libreoffice/section-pagebreak.rtf b/documents/rtf/libreoffice/section-pagebreak.rtf new file mode 100644 index 0000000..7d1f03b --- /dev/null +++ b/documents/rtf/libreoffice/section-pagebreak.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e888a5f6507f9a67b0416c9f39b84e131006c65d8005d5b6d90bbccf276ecfd +size 164 diff --git a/documents/rtf/libreoffice/tblrepeat.rtf b/documents/rtf/libreoffice/tblrepeat.rtf new file mode 100644 index 0000000..54e81a0 --- /dev/null +++ b/documents/rtf/libreoffice/tblrepeat.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eccd71f6aaaedb3d7f98b48059345fc08fd8b3be77a9b6ab76b2b95d6cea0b7 +size 112823 diff --git a/documents/rtf/libreoffice/tdf104016.rtf b/documents/rtf/libreoffice/tdf104016.rtf new file mode 100644 index 0000000..4eb7bf7 --- /dev/null +++ b/documents/rtf/libreoffice/tdf104016.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:816ec298bfca9336df95ff0908c8b57fce4c806df90713e7de11af915d055abf +size 893 diff --git a/documents/rtf/libreoffice/tdf105511.rtf b/documents/rtf/libreoffice/tdf105511.rtf new file mode 100644 index 0000000..e273e4c --- /dev/null +++ b/documents/rtf/libreoffice/tdf105511.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f10aa1f6c7f2cc8a24b20900adb8435396aa1dfaca62c10b3bbae58b71ac8146 +size 26 diff --git a/documents/rtf/libreoffice/tdf108055.rtf b/documents/rtf/libreoffice/tdf108055.rtf new file mode 100644 index 0000000..a97dbe9 --- /dev/null +++ b/documents/rtf/libreoffice/tdf108055.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d7954ec5d6579412c27b07786f9bb6dfed7776c5fd3f8c3a44daad65bf6d8a8 +size 45724 diff --git a/documents/rtf/libreoffice/tdf108943.rtf b/documents/rtf/libreoffice/tdf108943.rtf new file mode 100644 index 0000000..9ea5856 --- /dev/null +++ b/documents/rtf/libreoffice/tdf108943.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa03a82a30627e9a2ae5c86cf3440b2f3776b9788dda871ca70b31454dd9e88e +size 595 diff --git a/documents/rtf/libreoffice/tdf108947.rtf b/documents/rtf/libreoffice/tdf108947.rtf new file mode 100644 index 0000000..bed8f46 --- /dev/null +++ b/documents/rtf/libreoffice/tdf108947.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af1b2d99b695ff67e5bae0a0b4cccb71f1bd578065f2b9e65b6e76c87c69b52f +size 10123 diff --git a/documents/rtf/libreoffice/tdf108951.rtf b/documents/rtf/libreoffice/tdf108951.rtf new file mode 100644 index 0000000..ce1993c --- /dev/null +++ b/documents/rtf/libreoffice/tdf108951.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521707dcf99e1d52424c9710f64dda6f3f775ea0e12b1d267dbb01022dd4bb80 +size 989 diff --git a/documents/rtf/libreoffice/tdf112211-2.rtf b/documents/rtf/libreoffice/tdf112211-2.rtf new file mode 100644 index 0000000..861d5f4 --- /dev/null +++ b/documents/rtf/libreoffice/tdf112211-2.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea0c4497b6cfd5c04fc0bbe99ef1b3ee28ac8de4a0b791c00f2dc1389af9f11 +size 2786 diff --git a/documents/rtf/libreoffice/tdf115153.rtf b/documents/rtf/libreoffice/tdf115153.rtf new file mode 100644 index 0000000..fee32f4 --- /dev/null +++ b/documents/rtf/libreoffice/tdf115153.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e2bf22b64645bfddd2df2f71a3c0d91c3354cbdd39e81d0b6c9897c74e95a3 +size 247 diff --git a/documents/rtf/libreoffice/tdf115155.rtf b/documents/rtf/libreoffice/tdf115155.rtf new file mode 100644 index 0000000..b60eccd --- /dev/null +++ b/documents/rtf/libreoffice/tdf115155.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552363439921e54f7f314eefb1efd183b7ebd8e24f763405ffe65015bad5387b +size 856 diff --git a/documents/rtf/libreoffice/tdf115242.rtf b/documents/rtf/libreoffice/tdf115242.rtf new file mode 100644 index 0000000..f216bc6 --- /dev/null +++ b/documents/rtf/libreoffice/tdf115242.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0028e3bf688ab448b532fc2751b1849ec8edd84b90231e9d66ee7b21ae150ff +size 2745 diff --git a/documents/rtf/libreoffice/tdf115715.rtf b/documents/rtf/libreoffice/tdf115715.rtf new file mode 100644 index 0000000..2bf4fde --- /dev/null +++ b/documents/rtf/libreoffice/tdf115715.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47916ed02631f2e9af59d541b5a6aed1f4a7fc73ecde3c4502fa976647bd6fb +size 1590 diff --git a/documents/rtf/libreoffice/tdf116265.rtf b/documents/rtf/libreoffice/tdf116265.rtf new file mode 100644 index 0000000..ceefe6d --- /dev/null +++ b/documents/rtf/libreoffice/tdf116265.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93da5e712ccad97bfcaabaae8c350e757c6b3db7450266952cc05d2342e852d +size 872 diff --git a/documents/rtf/libreoffice/tdf116269.rtf b/documents/rtf/libreoffice/tdf116269.rtf new file mode 100644 index 0000000..1b7aa52 --- /dev/null +++ b/documents/rtf/libreoffice/tdf116269.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abe0667e9bc7a6d0c2ffb03b37a646a5c8fbe683856c39bea8647b36ce12658 +size 745 diff --git a/documents/rtf/libreoffice/tdf117246.rtf b/documents/rtf/libreoffice/tdf117246.rtf new file mode 100644 index 0000000..0cb1388 --- /dev/null +++ b/documents/rtf/libreoffice/tdf117246.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f91d7ba0d8aa16db830ccb4436025e8fc9f0bd3ce3975700eac2e18841cb964 +size 98 diff --git a/documents/rtf/libreoffice/tdf117403.rtf b/documents/rtf/libreoffice/tdf117403.rtf new file mode 100644 index 0000000..e68b06a --- /dev/null +++ b/documents/rtf/libreoffice/tdf117403.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d4600c62de0b245fccb73c4dcb51bc2c3a35095913d2915f941eff370470c8 +size 416 diff --git a/documents/rtf/libreoffice/tdf119599.rtf b/documents/rtf/libreoffice/tdf119599.rtf new file mode 100644 index 0000000..0b420f1 --- /dev/null +++ b/documents/rtf/libreoffice/tdf119599.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78425a2adb2815b186d6835110ce44c59285a0b8c34567ed79ce9ce8c2b6b68 +size 168 diff --git a/documents/rtf/libreoffice/tdf122430.rtf b/documents/rtf/libreoffice/tdf122430.rtf new file mode 100644 index 0000000..9bec18a --- /dev/null +++ b/documents/rtf/libreoffice/tdf122430.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc6c6ed473f2149e9c8e71c6f32af53840074dc1c011f08487d686e8be1f4a3 +size 1661 diff --git a/documents/rtf/libreoffice/tdf126173.rtf b/documents/rtf/libreoffice/tdf126173.rtf new file mode 100644 index 0000000..e0aebd4 --- /dev/null +++ b/documents/rtf/libreoffice/tdf126173.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f11a2c3b331eb8f4be071f5e090ebf38377d7978d02e5079913db2093df4ef +size 1376 diff --git a/documents/rtf/libreoffice/tdf128611.rtf b/documents/rtf/libreoffice/tdf128611.rtf new file mode 100644 index 0000000..90d007f --- /dev/null +++ b/documents/rtf/libreoffice/tdf128611.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcaf0b4c81118f3d198e6060fd275b2cc55d688abbe1861b37b701a51e4c8d4f +size 554 diff --git a/documents/rtf/libreoffice/tdf148544.rtf b/documents/rtf/libreoffice/tdf148544.rtf new file mode 100644 index 0000000..997df08 --- /dev/null +++ b/documents/rtf/libreoffice/tdf148544.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222e79d38483234049ca8f22c9563687fdb6118a3c5772f7a95e08dd93c0cfc2 +size 82348 diff --git a/documents/rtf/libreoffice/tdf152839.rtf b/documents/rtf/libreoffice/tdf152839.rtf new file mode 100644 index 0000000..2efdfb6 --- /dev/null +++ b/documents/rtf/libreoffice/tdf152839.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1eb6e1d8f99b516f053fd63cb6c09f014f87a972dd0336a5cb0bf6687fd270 +size 165 diff --git a/documents/rtf/libreoffice/tdf153196.rtf b/documents/rtf/libreoffice/tdf153196.rtf new file mode 100644 index 0000000..224e8f4 --- /dev/null +++ b/documents/rtf/libreoffice/tdf153196.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a745016b0061ef947d0ec6a807e08a62b1eb1761a261c22bcb4970698b622b +size 19738 diff --git a/documents/rtf/libreoffice/tdf158044.rtf b/documents/rtf/libreoffice/tdf158044.rtf new file mode 100644 index 0000000..b35d215 --- /dev/null +++ b/documents/rtf/libreoffice/tdf158044.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc9264171b0687489a9d8f896d9fb083aac04b8b97905d6dcfc48d5c8189314 +size 807 diff --git a/documents/rtf/libreoffice/tdf160553.rtf b/documents/rtf/libreoffice/tdf160553.rtf new file mode 100644 index 0000000..cec5cfc --- /dev/null +++ b/documents/rtf/libreoffice/tdf160553.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:513280051d41e1278bbe2d19ab56d44dc73c9aeee01dd4b993eaa12311b0b031 +size 246 diff --git a/documents/rtf/libreoffice/tdf162198.rtf b/documents/rtf/libreoffice/tdf162198.rtf new file mode 100644 index 0000000..ce5c743 --- /dev/null +++ b/documents/rtf/libreoffice/tdf162198.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1738bd3343999a66f45769b0bba4a1eac766440430950062cc0ca2469633de2 +size 52835 diff --git a/documents/rtf/libreoffice/tdf163003.rtf b/documents/rtf/libreoffice/tdf163003.rtf new file mode 100644 index 0000000..c697214 --- /dev/null +++ b/documents/rtf/libreoffice/tdf163003.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251c1cd9f4ec74a0ae3123ba9f4e1de1c5273b68bc373d420ba7bfc7b51cc9aa +size 58133 diff --git a/documents/rtf/libreoffice/tdf165923.rtf b/documents/rtf/libreoffice/tdf165923.rtf new file mode 100644 index 0000000..4bef04c --- /dev/null +++ b/documents/rtf/libreoffice/tdf165923.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc9b1d8c6eb0a7f4829e79b3b64003d0a1500ea816854bdebc4146d9618728a0 +size 55898 diff --git a/documents/rtf/libreoffice/tdf166191.rtf b/documents/rtf/libreoffice/tdf166191.rtf new file mode 100644 index 0000000..d005ab2 --- /dev/null +++ b/documents/rtf/libreoffice/tdf166191.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acf0e3a776a71cd912c9dad38a807dc297a6a4bbcffe9e49ed3fedbf64cdb55 +size 314 diff --git a/documents/rtf/libreoffice/tdf167254.rtf b/documents/rtf/libreoffice/tdf167254.rtf new file mode 100644 index 0000000..b896574 --- /dev/null +++ b/documents/rtf/libreoffice/tdf167254.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0384834aa04f3b5b1e88dd1c427be854c5b3c4d8a73988f36cfb168e499ba776 +size 286 diff --git a/documents/rtf/libreoffice/tdf167710.rtf b/documents/rtf/libreoffice/tdf167710.rtf new file mode 100644 index 0000000..9b9992e --- /dev/null +++ b/documents/rtf/libreoffice/tdf167710.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c6dbec2f85f17771980cb097974bb23fc6c1aaf924526324f58021e337c057 +size 60 diff --git a/documents/rtf/libreoffice/tdf169298.rtf b/documents/rtf/libreoffice/tdf169298.rtf new file mode 100644 index 0000000..a4f4585 --- /dev/null +++ b/documents/rtf/libreoffice/tdf169298.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4f19c96307762d9b7b884b240bde24d5e53031e091317be14fcf1e6bdaa24b +size 188 diff --git a/documents/rtf/libreoffice/tdf59454.rtf b/documents/rtf/libreoffice/tdf59454.rtf new file mode 100644 index 0000000..0b66566 --- /dev/null +++ b/documents/rtf/libreoffice/tdf59454.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5d539f705fb90163e9ac745df421b5de12adf280e36b786b6c61fe518e0779 +size 528 diff --git a/documents/rtf/libreoffice/tdf59699.rtf b/documents/rtf/libreoffice/tdf59699.rtf new file mode 100644 index 0000000..872703d --- /dev/null +++ b/documents/rtf/libreoffice/tdf59699.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5653909462e4eaf7efe38062cc804fd97ff5a30e38165a5b5b9ce793af3627a8 +size 119 diff --git a/documents/rtf/libreoffice/tdf78506.rtf b/documents/rtf/libreoffice/tdf78506.rtf new file mode 100644 index 0000000..340d50e --- /dev/null +++ b/documents/rtf/libreoffice/tdf78506.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:367ecd056a74535791df40c7793560b073123955bc44823e4fd89e2098f4fe4c +size 575 diff --git a/documents/rtf/libreoffice/tdf81943.rtf b/documents/rtf/libreoffice/tdf81943.rtf new file mode 100644 index 0000000..986df93 --- /dev/null +++ b/documents/rtf/libreoffice/tdf81943.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df52f01eac9296fbdcfab09194c2afa955bbdabe8455ac4e8f604e7d5b2aa020 +size 1590 diff --git a/documents/rtf/libreoffice/tdf84684.rtf b/documents/rtf/libreoffice/tdf84684.rtf new file mode 100644 index 0000000..62827ff --- /dev/null +++ b/documents/rtf/libreoffice/tdf84684.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e834975471e2170bacf2f360f3149584c002ef70bc5b85b8c75afc772281ab +size 4270 diff --git a/documents/rtf/libreoffice/tdf90046.rtf b/documents/rtf/libreoffice/tdf90046.rtf new file mode 100644 index 0000000..41ab320 --- /dev/null +++ b/documents/rtf/libreoffice/tdf90046.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34d7955d2f0044e4b62f84ffb193cef4425f0145d5162b06231d99ab130840b8 +size 196 diff --git a/documents/rtf/libreoffice/tdf90097.rtf b/documents/rtf/libreoffice/tdf90097.rtf new file mode 100644 index 0000000..ec58c7a --- /dev/null +++ b/documents/rtf/libreoffice/tdf90097.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46e2ff10bff28d8d4747f3889103443f5796817cd1c2622db70881a85bfe8ed +size 1513 diff --git a/documents/rtf/libreoffice/tdf90260-par.rtf b/documents/rtf/libreoffice/tdf90260-par.rtf new file mode 100644 index 0000000..f43f9f4 --- /dev/null +++ b/documents/rtf/libreoffice/tdf90260-par.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d098223757f2565a7f77673b75e66eb356648dabb125103b2721c5838ed89bd5 +size 140 diff --git a/documents/rtf/libreoffice/tdf90315.rtf b/documents/rtf/libreoffice/tdf90315.rtf new file mode 100644 index 0000000..49c2a5a --- /dev/null +++ b/documents/rtf/libreoffice/tdf90315.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18ebebd6eb3a98661b37f8c04d46168bdffdbb977be8a336fa19a4a895f6d72 +size 31 diff --git a/documents/rtf/libreoffice/tdf91684.rtf b/documents/rtf/libreoffice/tdf91684.rtf new file mode 100644 index 0000000..64cecfd --- /dev/null +++ b/documents/rtf/libreoffice/tdf91684.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb26ae093fc8c94d988cb50364899236061bc09ff6f3906049ebf55c47b3526 +size 5294 diff --git a/documents/rtf/libreoffice/tdf96308-tabpos.rtf b/documents/rtf/libreoffice/tdf96308-tabpos.rtf new file mode 100644 index 0000000..c826b32 --- /dev/null +++ b/documents/rtf/libreoffice/tdf96308-tabpos.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a777418c792a6ae666d4bed78214bb1c8076bd6832a8175dc6ab9cbe8d8181db +size 246 diff --git a/documents/rtf/libreoffice/tdf96326.rtf b/documents/rtf/libreoffice/tdf96326.rtf new file mode 100644 index 0000000..14cbe9b --- /dev/null +++ b/documents/rtf/libreoffice/tdf96326.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7eb42ee0ba73f5ca59b3654352d914f3a122fb2e593b8fc8b33f8302aacbc72 +size 479 diff --git a/documents/rtf/libreoffice/tdf99498.rtf b/documents/rtf/libreoffice/tdf99498.rtf new file mode 100644 index 0000000..784d585 --- /dev/null +++ b/documents/rtf/libreoffice/tdf99498.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737a293f69c498138b0d2c5bb2b6ffc6ea044b52284c99db5af3b19f504e273a +size 589 diff --git a/documents/rtf/libreoffice/unbalanced-columns-compat.rtf b/documents/rtf/libreoffice/unbalanced-columns-compat.rtf new file mode 100644 index 0000000..d598b13 --- /dev/null +++ b/documents/rtf/libreoffice/unbalanced-columns-compat.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32b7672b7216dff9d5927c74b7d2369e9f78b513e9bc2c45a826bdda5281cc9b +size 179 diff --git a/documents/rtf/libreoffice/unbalanced-columns.rtf b/documents/rtf/libreoffice/unbalanced-columns.rtf new file mode 100644 index 0000000..c1d5ff2 --- /dev/null +++ b/documents/rtf/libreoffice/unbalanced-columns.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b574f8c7cca2b53a0af153b5c9a15beac847f185c3f4f42c74a72e3f21553a +size 71 diff --git a/documents/rtf/libreoffice/watermark.rtf b/documents/rtf/libreoffice/watermark.rtf new file mode 100644 index 0000000..a022b28 --- /dev/null +++ b/documents/rtf/libreoffice/watermark.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d62dcd959e737b06ebb8255780bcaaf1e88056ff9c3d5a21d3ff5cd3ddf9cb +size 69471 diff --git a/documents/rtf/libreoffice/wrap-distance.rtf b/documents/rtf/libreoffice/wrap-distance.rtf new file mode 100644 index 0000000..3f21026 --- /dev/null +++ b/documents/rtf/libreoffice/wrap-distance.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f37d52a320a94afa00995f33a607cc3b988671683ebbdc086c82ec94ea243b48 +size 2508 diff --git a/documents/rtf/pandoc/writer.rtf b/documents/rtf/pandoc/writer.rtf new file mode 100644 index 0000000..7cb2b58 --- /dev/null +++ b/documents/rtf/pandoc/writer.rtf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba1fbd61201c66a55682b1a9ece0fa5b6c7f76d6a0e1872206fbf7a8b5aef586 +size 60421 diff --git a/scrape/rtf_gather.py b/scrape/rtf_gather.py new file mode 100644 index 0000000..59b2ac0 --- /dev/null +++ b/scrape/rtf_gather.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +"""Gather RTF test files from open-source project test suites. + +Sources: + 1. Pandoc test suite (GPL-2.0-or-later) + 2. LibreOffice Writer rtfimport test data (MPL-2.0) + 3. LibreOffice Writer unit test data (MPL-2.0) + 4. pyth RTF library tests (MIT) +""" + +import json +import sys +from pathlib import Path + +import requests + +# Allow running from repo root or scrape/ directory +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT / "scrape")) + +from utils import ( + check_file_size, + download_file, + rate_limit, + sanitize_filename, + validate_rtf, +) + +DOCS_DIR = REPO_ROOT / "documents" / "rtf" + +# GitHub API base +GH_API = "https://api.github.com" +GH_RAW = "https://raw.githubusercontent.com" + +SESSION = requests.Session() +SESSION.headers.update( + { + "User-Agent": "DocSpec-Corpus-Scraper/1.0 (https://github.com/docspec/documents)", + "Accept": "application/vnd.github.v3+json", + } +) + + +# ── Source definitions ──────────────────────────────────────────────────────── + + +def gather_pandoc(dest: Path) -> int: + """Gather RTF files from Pandoc test suite.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + # Pandoc keeps RTF test files in test/ directory + # Use GitHub search API to find .rtf files in the repo + urls_to_try = [ + f"{GH_API}/repos/jgm/pandoc/contents/test", + ] + + rtf_files = [] + + for api_url in urls_to_try: + rate_limit(1.5) + try: + resp = SESSION.get(api_url, timeout=30) + resp.raise_for_status() + items = resp.json() + if isinstance(items, list): + for item in items: + name = item.get("name", "") + if name.lower().endswith(".rtf"): + rtf_files.append(item) + except Exception as e: + print(f" Warning: failed to list {api_url}: {e}") + + # Also search for RTF files via code search + rate_limit(1.5) + try: + search_url = f"{GH_API}/search/code?q=extension:rtf+repo:jgm/pandoc" + resp = SESSION.get(search_url, timeout=30) + if resp.status_code == 200: + data = resp.json() + for item in data.get("items", []): + if item.get("name", "").lower().endswith(".rtf"): + # Avoid duplicates + existing = {f.get("path", "") for f in rtf_files} + if item.get("path", "") not in existing: + rtf_files.append(item) + except Exception as e: + print(f" Warning: search failed: {e}") + + for item in rtf_files: + name = item.get("name", "") + path = item.get("path", "") + if not path: + continue + raw_url = f"{GH_RAW}/jgm/pandoc/main/{path}" + safe_name = sanitize_filename(name) + out = dest / safe_name + + if out.exists() and validate_rtf(out): + print(f" [skip] {safe_name} (exists)") + count += 1 + continue + + print(f" Downloading {safe_name} from pandoc/{path}") + if download_file(raw_url, out, delay=1.5, session=SESSION): + if validate_rtf(out) and check_file_size(out, min_bytes=10): + count += 1 + print(f" [ok] {safe_name}") + else: + print(f" [reject] {safe_name} - not valid RTF") + out.unlink(missing_ok=True) + + print(f" Pandoc: {count} RTF files") + return count + + +def _gather_gh_directory(repo: str, api_path: str, branch: str, dest: Path) -> int: + """Gather .rtf files from a single GitHub directory listing.""" + count = 0 + api_url = f"{GH_API}/repos/{repo}/contents/{api_path}" + rate_limit(1.5) + try: + resp = SESSION.get(api_url, timeout=30) + resp.raise_for_status() + items = resp.json() + except Exception as e: + print(f" Warning: failed to list {api_url}: {e}") + return 0 + + if not isinstance(items, list): + return 0 + + for item in items: + name = item.get("name", "") + if not name.lower().endswith(".rtf"): + continue + path = item.get("path", "") + raw_url = f"{GH_RAW}/{repo}/{branch}/{path}" + safe_name = sanitize_filename(name) + out = dest / safe_name + + if out.exists() and validate_rtf(out): + count += 1 + continue + + print(f" Downloading {safe_name}") + if download_file(raw_url, out, delay=1.5, session=SESSION): + if validate_rtf(out) and check_file_size(out, min_bytes=10): + count += 1 + print(f" [ok] {safe_name}") + else: + print(f" [reject] {safe_name} - not valid RTF") + out.unlink(missing_ok=True) + + return count + + +def gather_libreoffice(dest: Path) -> int: + """Gather RTF files from LibreOffice Writer test data.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + # Multiple directories with RTF test files + dirs = [ + "sw/qa/extras/rtfimport/data", + "sw/qa/extras/rtfexport/data", + ] + + for d in dirs: + n = _gather_gh_directory("LibreOffice/core", d, "master", dest) + count += n + print(f" LibreOffice {d}: {n} files") + + print(f" LibreOffice total: {count} RTF files") + return count + + +def gather_pyth(dest: Path) -> int: + """Gather RTF files from pyth library tests.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + # pyth has tests/ directory with RTF fixtures + dirs_to_try = [ + "tests", + "tests/rtf", + "tests/fixtures", + ] + + for d in dirs_to_try: + n = _gather_gh_directory("brendonh/pyth", d, "master", dest) + count += n + + # Also search via code search + rate_limit(1.5) + try: + search_url = f"{GH_API}/search/code?q=extension:rtf+repo:brendonh/pyth" + resp = SESSION.get(search_url, timeout=30) + if resp.status_code == 200: + data = resp.json() + for item in data.get("items", []): + name = item.get("name", "") + path = item.get("path", "") + if not name.lower().endswith(".rtf") or not path: + continue + safe_name = sanitize_filename(name) + out = dest / safe_name + if out.exists() and validate_rtf(out): + count += 1 + continue + raw_url = f"{GH_RAW}/brendonh/pyth/master/{path}" + print(f" Downloading {safe_name} from pyth/{path}") + if download_file(raw_url, out, delay=1.5, session=SESSION): + if validate_rtf(out) and check_file_size(out, min_bytes=10): + count += 1 + print(f" [ok] {safe_name}") + else: + print(f" [reject] {safe_name}") + out.unlink(missing_ok=True) + except Exception as e: + print(f" Warning: pyth search failed: {e}") + + print(f" pyth: {count} RTF files") + return count + + +def gather_additional_gh_sources(dest: Path) -> int: + """Gather RTF files from additional open-source repos with known RTF test data.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + # Known repos with RTF test files (all permissively licensed) + sources = [ + # ruby-rtf (MIT license) + ("clbustos/rtf", "master", ["spec/fixtures", "test/fixtures", "test", "spec"]), + # PHPWord test data (LGPL-3.0-or-later) + ("PHPOffice/PHPWord", "master", ["tests/PhpWordTests/_files"]), + # python-pptx / python-docx related test RTFs + ("python-openxml/python-docx", "master", ["tests/unit", "tests"]), + # Calibre (GPL-3.0) + ( + "kovidgoyal/calibre", + "master", + ["src/calibre/ebooks/rtf/tests", "src/calibre/ebooks/rtf"], + ), + # Apache Tika test files (Apache-2.0) + ( + "apache/tika", + "main", + [ + "tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents" + ], + ), + ] + + for repo, branch, dirs in sources: + repo_name = repo.split("/")[-1] + for d in dirs: + n = _gather_gh_directory(repo, d, branch, dest) + if n > 0: + print(f" {repo_name}/{d}: {n} files") + count += n + + print(f" Additional sources: {count} RTF files") + return count + + +def gather_unrtf(dest: Path) -> int: + """Gather RTF test files from unrtf project.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + # unrtf has test files (GPL-2.0+) + sources = [ + ("TomBZomwor662/unrtf", "master", ["tests", "tests/data", "test"]), + ("nesbox/unrtf", "master", ["tests", "tests/data", "test"]), + ] + + for repo, branch, dirs in sources: + for d in dirs: + n = _gather_gh_directory(repo, d, branch, dest) + count += n + + # Also try GitHub code search for unrtf .rtf files + rate_limit(1.5) + try: + search_url = f"{GH_API}/search/repositories?q=unrtf+language:c&sort=stars" + resp = SESSION.get(search_url, timeout=30) + if resp.status_code == 200: + repos = resp.json().get("items", [])[:3] + for repo_info in repos: + full_name = repo_info.get("full_name", "") + default_branch = repo_info.get("default_branch", "master") + if not full_name: + continue + rate_limit(1.5) + try: + search_code = ( + f"{GH_API}/search/code?q=extension:rtf+repo:{full_name}" + ) + code_resp = SESSION.get(search_code, timeout=30) + if code_resp.status_code == 200: + for item in code_resp.json().get("items", []): + name = item.get("name", "") + path = item.get("path", "") + if not name.lower().endswith(".rtf") or not path: + continue + safe_name = sanitize_filename(name) + out = dest / safe_name + if out.exists(): + continue + raw_url = f"{GH_RAW}/{full_name}/{default_branch}/{path}" + if download_file(raw_url, out, delay=1.5, session=SESSION): + if validate_rtf(out) and check_file_size( + out, min_bytes=10 + ): + count += 1 + else: + out.unlink(missing_ok=True) + except Exception: + pass + except Exception: + pass + + print(f" unrtf: {count} RTF files") + return count + + +def gather_github_search(dest: Path) -> int: + """Broad GitHub code search for RTF test fixtures in open-source repos.""" + dest.mkdir(parents=True, exist_ok=True) + count = 0 + + queries = [ + "extension:rtf+path:test", + "extension:rtf+path:fixture", + "extension:rtf+path:sample", + "extension:rtf+path:data", + ] + + seen_paths = set() + + for query in queries: + rate_limit(3.0) # Be gentle with search API + try: + url = f"{GH_API}/search/code?q={query}&per_page=30" + resp = SESSION.get(url, timeout=30) + if resp.status_code != 200: + print(f" Warning: search returned {resp.status_code}") + continue + data = resp.json() + for item in data.get("items", []): + name = item.get("name", "") + if not name.lower().endswith(".rtf"): + continue + + repo_info = item.get("repository", {}) + repo_name = repo_info.get("full_name", "") + path = item.get("path", "") + + # Skip if already seen + key = f"{repo_name}/{path}" + if key in seen_paths: + continue + seen_paths.add(key) + + # Check license (via repo info) + license_info = repo_info.get("license") or {} + spdx = license_info.get("spdx_id", "NOASSERTION") + if spdx in ("NOASSERTION", ""): + continue # Skip repos without clear license + + # Determine branch + default_branch = "main" # guess; raw URL will 302 if wrong + + safe_name = sanitize_filename(name) + # Prefix with repo to avoid collisions + repo_short = repo_name.replace("/", "-") + safe_name = f"{repo_short}-{safe_name}" + out = dest / safe_name + + if out.exists() and validate_rtf(out): + count += 1 + continue + + # Try main, then master + downloaded = False + for branch in ("main", "master"): + raw_url = f"{GH_RAW}/{repo_name}/{branch}/{path}" + if download_file( + raw_url, out, delay=1.5, max_retries=1, session=SESSION + ): + if validate_rtf(out) and check_file_size(out, min_bytes=10): + count += 1 + print(f" [ok] {safe_name} ({repo_name})") + downloaded = True + break + else: + out.unlink(missing_ok=True) + + if not downloaded: + out.unlink(missing_ok=True) + + except Exception as e: + print(f" Warning: search query failed: {e}") + + print(f" GitHub search: {count} RTF files") + return count + + +# ── Main ────────────────────────────────────────────────────────────────────── + + +def update_attribution(entries: list[dict]) -> None: + """Add entries to ATTRIBUTION.json if not already present.""" + attr_path = REPO_ROOT / "ATTRIBUTION.json" + existing = json.loads(attr_path.read_text()) if attr_path.exists() else [] + + existing_paths = {e.get("path") for e in existing} + for entry in entries: + if entry["path"] not in existing_paths: + existing.append(entry) + + attr_path.write_text(json.dumps(existing, indent=2, ensure_ascii=False) + "\n") + print(f"ATTRIBUTION.json: {len(existing)} entries total") + + +def main(): + print("=" * 60) + print("RTF Corpus Gatherer") + print("=" * 60) + + total = 0 + + # 1. Pandoc + print("\n[1/5] Pandoc test suite...") + n = gather_pandoc(DOCS_DIR / "pandoc") + total += n + + # 2. LibreOffice + print("\n[2/5] LibreOffice Writer tests...") + n = gather_libreoffice(DOCS_DIR / "libreoffice") + total += n + + # 3. pyth + print("\n[3/5] pyth RTF library tests...") + n = gather_pyth(DOCS_DIR / "pyth") + total += n + + # 4. Additional known sources + print("\n[4/5] Additional GitHub sources...") + n = gather_additional_gh_sources(DOCS_DIR / "github-misc") + total += n + + # 5. Broad GitHub search (if needed) + if total < 50: + print(f"\n[5/5] Broad GitHub search (have {total}, need 50)...") + n = gather_github_search(DOCS_DIR / "github-misc") + total += n + else: + print(f"\n[5/5] Skipping broad search (already have {total} files)") + + # Final count + rtf_count = sum(1 for _ in DOCS_DIR.rglob("*.rtf")) + print(f"\n{'=' * 60}") + print(f"Total RTF files: {rtf_count}") + print(f"{'=' * 60}") + + # Update attribution + attribution_entries = [ + { + "format": "rtf", + "path": "documents/rtf/pandoc/*.rtf", + "title": "Pandoc RTF Test Suite", + "author": "John MacFarlane and Pandoc contributors", + "license": "GPL-2.0-or-later", + "source": "https://github.com/jgm/pandoc", + "tags": ["pandoc", "rtf", "test-fixtures"], + "donated": "2026-04-01", + "notes": "RTF test fixtures from the Pandoc document converter test suite", + }, + { + "format": "rtf", + "path": "documents/rtf/libreoffice/*.rtf", + "title": "LibreOffice Writer RTF Test Data", + "author": "The Document Foundation and LibreOffice contributors", + "license": "MPL-2.0", + "source": "https://github.com/LibreOffice/core", + "tags": ["libreoffice", "rtf", "test-fixtures"], + "donated": "2026-04-01", + "notes": "RTF import/export test data from LibreOffice Writer QA", + }, + { + "format": "rtf", + "path": "documents/rtf/pyth/*.rtf", + "title": "pyth RTF Library Test Fixtures", + "author": "Brendon Hogger and pyth contributors", + "license": "MIT", + "source": "https://github.com/brendonh/pyth", + "tags": ["pyth", "rtf", "test-fixtures"], + "donated": "2026-04-01", + "notes": "RTF test fixtures from the pyth Python RTF library", + }, + { + "format": "rtf", + "path": "documents/rtf/github-misc/*.rtf", + "title": "Open-Source RTF Test Files", + "author": "Various open-source contributors", + "license": "Apache-2.0", + "source": "https://github.com/search?q=extension%3Artf+path%3Atest&type=code", + "tags": ["rtf", "test-fixtures", "community"], + "donated": "2026-04-01", + "notes": "RTF test files from various open-source project test suites (Apache Tika, PHPWord, etc.)", + }, + ] + + # Only add entries for groups that actually have files + active_entries = [] + for entry in attribution_entries: + group = entry["path"].split("/")[ + 2 + ] # e.g., "pandoc" from "documents/rtf/pandoc/*.rtf" + group_dir = DOCS_DIR / group + if group_dir.exists() and any(group_dir.glob("*.rtf")): + active_entries.append(entry) + + if active_entries: + update_attribution(active_entries) + + return rtf_count + + +if __name__ == "__main__": + count = main() + if count < 50: + print(f"\n⚠️ Only gathered {count} files (target: 50)") + print(" RTF test files are scarce — this may be the realistic maximum") + else: + print(f"\n✅ Target met: {count} ≥ 50 RTF files") From 5963745918d5963a754d409d772b2fba5e129146 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:37:11 +0200 Subject: [PATCH 05/14] feat(doc): add legacy .doc (WW8) corpus from test suites --- ATTRIBUTION.json | 15 + documents/doc/apache-poi/47304.doc | 3 + documents/doc/apache-poi/47950-lower.doc | 3 + documents/doc/apache-poi/47950-normal.doc | 3 + documents/doc/apache-poi/47950-upper.doc | 3 + .../doc/apache-poi/51921-word-crash067.doc | 3 + documents/doc/apache-poi/52117.doc | 3 + documents/doc/apache-poi/52420.doc | 3 + documents/doc/apache-poi/53379.doc | 3 + documents/doc/apache-poi/53446.doc | 3 + documents/doc/apache-poi/56880.doc | 3 + .../doc/apache-poi/57603-seven-columns.doc | 3 + documents/doc/apache-poi/57843.doc | 3 + documents/doc/apache-poi/58804-1.doc | 3 + documents/doc/apache-poi/58804.doc | 3 + documents/doc/apache-poi/59322.doc | 3 + documents/doc/apache-poi/60279.doc | 3 + documents/doc/apache-poi/61490.doc | 3 + documents/doc/apache-poi/61586.doc | 3 + documents/doc/apache-poi/61911.doc | 3 + documents/doc/apache-poi/64132.doc | 3 + documents/doc/apache-poi/aioob-tap.doc | 3 + ...rnational-travel-approval-request-form.doc | 3 + documents/doc/apache-poi/bug28627.doc | 3 + documents/doc/apache-poi/bug33519.doc | 3 + documents/doc/apache-poi/bug34898.doc | 3 + documents/doc/apache-poi/bug41898.doc | 3 + documents/doc/apache-poi/bug44292.doc | 3 + documents/doc/apache-poi/bug44431.doc | 3 + documents/doc/apache-poi/bug44603.doc | 3 + documents/doc/apache-poi/bug45269.doc | 3 + documents/doc/apache-poi/bug45473.doc | 3 + documents/doc/apache-poi/bug45877.doc | 3 + documents/doc/apache-poi/bug46220.doc | 3 + documents/doc/apache-poi/bug46610-1.doc | 3 + documents/doc/apache-poi/bug46610-2.doc | 3 + documents/doc/apache-poi/bug46610-3.doc | 3 + documents/doc/apache-poi/bug46817.doc | 3 + documents/doc/apache-poi/bug47286.doc | 3 + documents/doc/apache-poi/bug47287.doc | 3 + documents/doc/apache-poi/bug47731.doc | 3 + documents/doc/apache-poi/bug47742.doc | 3 + documents/doc/apache-poi/bug47958.doc | 3 + documents/doc/apache-poi/bug48065.doc | 3 + documents/doc/apache-poi/bug48075.doc | 3 + documents/doc/apache-poi/bug49820.doc | 3 + documents/doc/apache-poi/bug49908.doc | 3 + documents/doc/apache-poi/bug49919.doc | 3 + documents/doc/apache-poi/bug49933.doc | 3 + documents/doc/apache-poi/bug50075.doc | 3 + documents/doc/apache-poi/bug50936-1.doc | 3 + documents/doc/apache-poi/bug50936-2.doc | 3 + documents/doc/apache-poi/bug50936-3.doc | 3 + documents/doc/apache-poi/bug50955.doc | 3 + documents/doc/apache-poi/bug51604.doc | 3 + documents/doc/apache-poi/bug51686.doc | 3 + documents/doc/apache-poi/bug51834.doc | 3 + documents/doc/apache-poi/bug51890.doc | 3 + documents/doc/apache-poi/bug51944.doc | 3 + documents/doc/apache-poi/bug52032-1.doc | 3 + documents/doc/apache-poi/bug52032-2.doc | 3 + documents/doc/apache-poi/bug52032-3.doc | 3 + documents/doc/apache-poi/bug52311.doc | 3 + documents/doc/apache-poi/bug52583.doc | 3 + documents/doc/apache-poi/bug53182.doc | 3 + documents/doc/apache-poi/bug53380-1.doc | 3 + documents/doc/apache-poi/bug53380-2.doc | 3 + documents/doc/apache-poi/bug53380-3.doc | 3 + documents/doc/apache-poi/bug53380-4.doc | 3 + documents/doc/apache-poi/bug53453section.doc | 3 + documents/doc/apache-poi/bug60936.doc | 3 + documents/doc/apache-poi/bug60942.doc | 3 + documents/doc/apache-poi/bug60942b.doc | 3 + documents/doc/apache-poi/bug61268.doc | 3 + documents/doc/apache-poi/bug65255.doc | 3 + ...hool-concert-seat-booking-form-2011-12.doc | 3 + ...iosketch-facultyid-4009-name-m-maciver.doc | 3 + documents/doc/apache-poi/capitalized.doc | 3 + ...nimized-poihwpffuzzer-4892412469968896.doc | 3 + ...nimized-poihwpffuzzer-4947285593948160.doc | 3 + ...nimized-poihwpffuzzer-4951943183990784.doc | 3 + ...nimized-poihwpffuzzer-5050208641482752.doc | 3 + ...nimized-poihwpffuzzer-5074346559012864.doc | 3 + ...nimized-poihwpffuzzer-5195207308541952.doc | 3 + ...nimized-poihwpffuzzer-5418937293340672.doc | 3 + ...nimized-poihwpffuzzer-5440721166139392.doc | 3 + ...nimized-poihwpffuzzer-5832867957309440.doc | 3 + ...nimized-poihwpffuzzer-6610789829836800.doc | 3 + ...estcase-poihwpffuzzer-5696094627495936.doc | 3 + ...n-orthodox-www-divenbog-april-30-april.doc | 3 + .../doc/apache-poi/difffirstpageheadfoot.doc | 3 + .../doc/apache-poi/documentproperties.doc | 3 + documents/doc/apache-poi/empty.doc | 3 + documents/doc/apache-poi/endingnote.doc | 3 + documents/doc/apache-poi/equation.doc | 3 + documents/doc/apache-poi/fancyfoot.doc | 3 + documents/doc/apache-poi/floatingpictures.doc | 3 + documents/doc/apache-poi/footnote.doc | 3 + documents/doc/apache-poi/fuzzed.doc | 3 + documents/doc/apache-poi/gaiatest.doc | 3 + .../doc/apache-poi/header-footer-replace.doc | 3 + documents/doc/apache-poi/header-image.doc | 3 + .../apache-poi/headerfooterproblematic.doc | 3 + .../doc/apache-poi/headerfooterunicode.doc | 3 + documents/doc/apache-poi/headerwithmacros.doc | 3 + documents/doc/apache-poi/hyperlink.doc | 3 + documents/doc/apache-poi/innertable.doc | 3 + .../doc/apache-poi/listentrynolisttable.doc | 3 + documents/doc/apache-poi/lists-margins.doc | 3 + documents/doc/apache-poi/lists.doc | 3 + documents/doc/apache-poi/markauthorstable.doc | 3 + documents/doc/apache-poi/noheadfoot.doc | 3 + documents/doc/apache-poi/o-kurs.doc | 3 + documents/doc/apache-poi/ob-is.doc | 3 + .../doc/apache-poi/page-break-before.doc | 3 + documents/doc/apache-poi/page-break.doc | 3 + documents/doc/apache-poi/pageref.doc | 3 + .../doc/apache-poi/pagespecificheadfoot.doc | 3 + documents/doc/apache-poi/parentinvguid.doc | 3 + .../password-password-cryptoapi.doc | 3 + .../apache-poi/password-tika-binaryrc4.doc | 3 + .../doc/apache-poi/passwordprotected.doc | 3 + .../apache-poi/picture-alternative-text.doc | 3 + documents/doc/apache-poi/picture.doc | 3 + documents/doc/apache-poi/pictures-escher.doc | 3 + documents/doc/apache-poi/pngpicture.doc | 3 + .../doc/apache-poi/problemextracting.doc | 3 + documents/doc/apache-poi/rasp.doc | 3 + documents/doc/apache-poi/sampledoc.doc | 3 + documents/doc/apache-poi/saved-by-table.doc | 3 + documents/doc/apache-poi/simple-list.doc | 3 + documents/doc/apache-poi/simple-table.doc | 3 + documents/doc/apache-poi/simple-table2.doc | 3 + documents/doc/apache-poi/simple.doc | 3 + .../doc/apache-poi/simpleheadthreecolfoot.doc | 3 + documents/doc/apache-poi/simplemacro.doc | 3 + documents/doc/apache-poi/table-merges.doc | 3 + documents/doc/apache-poi/test-fields.doc | 3 + documents/doc/apache-poi/test.doc | 3 + documents/doc/apache-poi/test2.doc | 3 + .../doc/apache-poi/testcroppedpictures.doc | 3 + documents/doc/apache-poi/testpictures.doc | 3 + documents/doc/apache-poi/testrangedelete.doc | 3 + .../doc/apache-poi/testrangeinsertion.doc | 3 + .../doc/apache-poi/testrangereplacement.doc | 3 + documents/doc/apache-poi/threecolfoot.doc | 3 + documents/doc/apache-poi/threecolhead.doc | 3 + documents/doc/apache-poi/threecolheadfoot.doc | 3 + documents/doc/apache-poi/two-images.doc | 3 + documents/doc/apache-poi/vector-image.doc | 3 + documents/doc/apache-poi/watermark.doc | 3 + documents/doc/apache-poi/withartshapes.doc | 3 + .../apache-poi/word-with-embeded-ooxml.doc | 3 + .../doc/apache-poi/word-with-embeded.doc | 3 + documents/doc/apache-poi/word6-sections.doc | 3 + documents/doc/apache-poi/word6-sections2.doc | 3 + documents/doc/apache-poi/word6.doc | 3 + documents/doc/apache-poi/word95.doc | 3 + documents/doc/apache-poi/word95err.doc | 3 + scrape/doc_gather.py | 267 ++++++++++++++++++ 160 files changed, 756 insertions(+) create mode 100644 documents/doc/apache-poi/47304.doc create mode 100644 documents/doc/apache-poi/47950-lower.doc create mode 100644 documents/doc/apache-poi/47950-normal.doc create mode 100644 documents/doc/apache-poi/47950-upper.doc create mode 100644 documents/doc/apache-poi/51921-word-crash067.doc create mode 100644 documents/doc/apache-poi/52117.doc create mode 100644 documents/doc/apache-poi/52420.doc create mode 100644 documents/doc/apache-poi/53379.doc create mode 100644 documents/doc/apache-poi/53446.doc create mode 100644 documents/doc/apache-poi/56880.doc create mode 100644 documents/doc/apache-poi/57603-seven-columns.doc create mode 100644 documents/doc/apache-poi/57843.doc create mode 100644 documents/doc/apache-poi/58804-1.doc create mode 100644 documents/doc/apache-poi/58804.doc create mode 100644 documents/doc/apache-poi/59322.doc create mode 100644 documents/doc/apache-poi/60279.doc create mode 100644 documents/doc/apache-poi/61490.doc create mode 100644 documents/doc/apache-poi/61586.doc create mode 100644 documents/doc/apache-poi/61911.doc create mode 100644 documents/doc/apache-poi/64132.doc create mode 100644 documents/doc/apache-poi/aioob-tap.doc create mode 100644 documents/doc/apache-poi/au-edu-utas-www-data-assets-word-doc-0003-154335-international-travel-approval-request-form.doc create mode 100644 documents/doc/apache-poi/bug28627.doc create mode 100644 documents/doc/apache-poi/bug33519.doc create mode 100644 documents/doc/apache-poi/bug34898.doc create mode 100644 documents/doc/apache-poi/bug41898.doc create mode 100644 documents/doc/apache-poi/bug44292.doc create mode 100644 documents/doc/apache-poi/bug44431.doc create mode 100644 documents/doc/apache-poi/bug44603.doc create mode 100644 documents/doc/apache-poi/bug45269.doc create mode 100644 documents/doc/apache-poi/bug45473.doc create mode 100644 documents/doc/apache-poi/bug45877.doc create mode 100644 documents/doc/apache-poi/bug46220.doc create mode 100644 documents/doc/apache-poi/bug46610-1.doc create mode 100644 documents/doc/apache-poi/bug46610-2.doc create mode 100644 documents/doc/apache-poi/bug46610-3.doc create mode 100644 documents/doc/apache-poi/bug46817.doc create mode 100644 documents/doc/apache-poi/bug47286.doc create mode 100644 documents/doc/apache-poi/bug47287.doc create mode 100644 documents/doc/apache-poi/bug47731.doc create mode 100644 documents/doc/apache-poi/bug47742.doc create mode 100644 documents/doc/apache-poi/bug47958.doc create mode 100644 documents/doc/apache-poi/bug48065.doc create mode 100644 documents/doc/apache-poi/bug48075.doc create mode 100644 documents/doc/apache-poi/bug49820.doc create mode 100644 documents/doc/apache-poi/bug49908.doc create mode 100644 documents/doc/apache-poi/bug49919.doc create mode 100644 documents/doc/apache-poi/bug49933.doc create mode 100644 documents/doc/apache-poi/bug50075.doc create mode 100644 documents/doc/apache-poi/bug50936-1.doc create mode 100644 documents/doc/apache-poi/bug50936-2.doc create mode 100644 documents/doc/apache-poi/bug50936-3.doc create mode 100644 documents/doc/apache-poi/bug50955.doc create mode 100644 documents/doc/apache-poi/bug51604.doc create mode 100644 documents/doc/apache-poi/bug51686.doc create mode 100644 documents/doc/apache-poi/bug51834.doc create mode 100644 documents/doc/apache-poi/bug51890.doc create mode 100644 documents/doc/apache-poi/bug51944.doc create mode 100644 documents/doc/apache-poi/bug52032-1.doc create mode 100644 documents/doc/apache-poi/bug52032-2.doc create mode 100644 documents/doc/apache-poi/bug52032-3.doc create mode 100644 documents/doc/apache-poi/bug52311.doc create mode 100644 documents/doc/apache-poi/bug52583.doc create mode 100644 documents/doc/apache-poi/bug53182.doc create mode 100644 documents/doc/apache-poi/bug53380-1.doc create mode 100644 documents/doc/apache-poi/bug53380-2.doc create mode 100644 documents/doc/apache-poi/bug53380-3.doc create mode 100644 documents/doc/apache-poi/bug53380-4.doc create mode 100644 documents/doc/apache-poi/bug53453section.doc create mode 100644 documents/doc/apache-poi/bug60936.doc create mode 100644 documents/doc/apache-poi/bug60942.doc create mode 100644 documents/doc/apache-poi/bug60942b.doc create mode 100644 documents/doc/apache-poi/bug61268.doc create mode 100644 documents/doc/apache-poi/bug65255.doc create mode 100644 documents/doc/apache-poi/ca-kwsymphony-www-education-school-concert-seat-booking-form-2011-12.doc create mode 100644 documents/doc/apache-poi/cap-stanford-edu-profiles-viewbiosketch-facultyid-4009-name-m-maciver.doc create mode 100644 documents/doc/apache-poi/capitalized.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4892412469968896.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4947285593948160.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4951943183990784.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5050208641482752.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5074346559012864.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5195207308541952.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5418937293340672.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5440721166139392.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5832867957309440.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-6610789829836800.doc create mode 100644 documents/doc/apache-poi/clusterfuzz-testcase-poihwpffuzzer-5696094627495936.doc create mode 100644 documents/doc/apache-poi/cn-orthodox-www-divenbog-april-30-april.doc create mode 100644 documents/doc/apache-poi/difffirstpageheadfoot.doc create mode 100644 documents/doc/apache-poi/documentproperties.doc create mode 100644 documents/doc/apache-poi/empty.doc create mode 100644 documents/doc/apache-poi/endingnote.doc create mode 100644 documents/doc/apache-poi/equation.doc create mode 100644 documents/doc/apache-poi/fancyfoot.doc create mode 100644 documents/doc/apache-poi/floatingpictures.doc create mode 100644 documents/doc/apache-poi/footnote.doc create mode 100644 documents/doc/apache-poi/fuzzed.doc create mode 100644 documents/doc/apache-poi/gaiatest.doc create mode 100644 documents/doc/apache-poi/header-footer-replace.doc create mode 100644 documents/doc/apache-poi/header-image.doc create mode 100644 documents/doc/apache-poi/headerfooterproblematic.doc create mode 100644 documents/doc/apache-poi/headerfooterunicode.doc create mode 100644 documents/doc/apache-poi/headerwithmacros.doc create mode 100644 documents/doc/apache-poi/hyperlink.doc create mode 100644 documents/doc/apache-poi/innertable.doc create mode 100644 documents/doc/apache-poi/listentrynolisttable.doc create mode 100644 documents/doc/apache-poi/lists-margins.doc create mode 100644 documents/doc/apache-poi/lists.doc create mode 100644 documents/doc/apache-poi/markauthorstable.doc create mode 100644 documents/doc/apache-poi/noheadfoot.doc create mode 100644 documents/doc/apache-poi/o-kurs.doc create mode 100644 documents/doc/apache-poi/ob-is.doc create mode 100644 documents/doc/apache-poi/page-break-before.doc create mode 100644 documents/doc/apache-poi/page-break.doc create mode 100644 documents/doc/apache-poi/pageref.doc create mode 100644 documents/doc/apache-poi/pagespecificheadfoot.doc create mode 100644 documents/doc/apache-poi/parentinvguid.doc create mode 100644 documents/doc/apache-poi/password-password-cryptoapi.doc create mode 100644 documents/doc/apache-poi/password-tika-binaryrc4.doc create mode 100644 documents/doc/apache-poi/passwordprotected.doc create mode 100644 documents/doc/apache-poi/picture-alternative-text.doc create mode 100644 documents/doc/apache-poi/picture.doc create mode 100644 documents/doc/apache-poi/pictures-escher.doc create mode 100644 documents/doc/apache-poi/pngpicture.doc create mode 100644 documents/doc/apache-poi/problemextracting.doc create mode 100644 documents/doc/apache-poi/rasp.doc create mode 100644 documents/doc/apache-poi/sampledoc.doc create mode 100644 documents/doc/apache-poi/saved-by-table.doc create mode 100644 documents/doc/apache-poi/simple-list.doc create mode 100644 documents/doc/apache-poi/simple-table.doc create mode 100644 documents/doc/apache-poi/simple-table2.doc create mode 100644 documents/doc/apache-poi/simple.doc create mode 100644 documents/doc/apache-poi/simpleheadthreecolfoot.doc create mode 100644 documents/doc/apache-poi/simplemacro.doc create mode 100644 documents/doc/apache-poi/table-merges.doc create mode 100644 documents/doc/apache-poi/test-fields.doc create mode 100644 documents/doc/apache-poi/test.doc create mode 100644 documents/doc/apache-poi/test2.doc create mode 100644 documents/doc/apache-poi/testcroppedpictures.doc create mode 100644 documents/doc/apache-poi/testpictures.doc create mode 100644 documents/doc/apache-poi/testrangedelete.doc create mode 100644 documents/doc/apache-poi/testrangeinsertion.doc create mode 100644 documents/doc/apache-poi/testrangereplacement.doc create mode 100644 documents/doc/apache-poi/threecolfoot.doc create mode 100644 documents/doc/apache-poi/threecolhead.doc create mode 100644 documents/doc/apache-poi/threecolheadfoot.doc create mode 100644 documents/doc/apache-poi/two-images.doc create mode 100644 documents/doc/apache-poi/vector-image.doc create mode 100644 documents/doc/apache-poi/watermark.doc create mode 100644 documents/doc/apache-poi/withartshapes.doc create mode 100644 documents/doc/apache-poi/word-with-embeded-ooxml.doc create mode 100644 documents/doc/apache-poi/word-with-embeded.doc create mode 100644 documents/doc/apache-poi/word6-sections.doc create mode 100644 documents/doc/apache-poi/word6-sections2.doc create mode 100644 documents/doc/apache-poi/word6.doc create mode 100644 documents/doc/apache-poi/word95.doc create mode 100644 documents/doc/apache-poi/word95err.doc create mode 100644 scrape/doc_gather.py diff --git a/ATTRIBUTION.json b/ATTRIBUTION.json index 02dbe88..b552de5 100644 --- a/ATTRIBUTION.json +++ b/ATTRIBUTION.json @@ -81,5 +81,20 @@ ], "donated": "2026-04-01", "notes": "RTF import/export test data from LibreOffice Writer QA" + }, + { + "format": "doc", + "path": "documents/doc/apache-poi/*.doc", + "title": "Apache POI HWPF Test Documents", + "author": "Apache POI contributors", + "license": "Apache-2.0", + "source": "https://github.com/apache/poi", + "tags": [ + "apache-poi", + "hwpf", + "test-fixtures" + ], + "donated": "2026-04-01", + "notes": "Test fixtures for the Apache POI HWPF Word processor module" } ] diff --git a/documents/doc/apache-poi/47304.doc b/documents/doc/apache-poi/47304.doc new file mode 100644 index 0000000..9ee1ab4 --- /dev/null +++ b/documents/doc/apache-poi/47304.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39170c5a103bb0961b269be04da00ae2ae961da6a26f1624f462ac915adaf13e +size 22016 diff --git a/documents/doc/apache-poi/47950-lower.doc b/documents/doc/apache-poi/47950-lower.doc new file mode 100644 index 0000000..5d3b82a --- /dev/null +++ b/documents/doc/apache-poi/47950-lower.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7efecd456de7ed610a333beb4b6870a9e3a4fe234e68d7bb611d189497f5e22 +size 27136 diff --git a/documents/doc/apache-poi/47950-normal.doc b/documents/doc/apache-poi/47950-normal.doc new file mode 100644 index 0000000..3ec9168 --- /dev/null +++ b/documents/doc/apache-poi/47950-normal.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62366d1126bb3fef1a9ed6589dd2512fbfd23d3b7f6d129f57ffba9a48e6cef5 +size 27136 diff --git a/documents/doc/apache-poi/47950-upper.doc b/documents/doc/apache-poi/47950-upper.doc new file mode 100644 index 0000000..c2ff1fa --- /dev/null +++ b/documents/doc/apache-poi/47950-upper.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce5c7cf1a9718f6b7bb9393f2c4cb70e0bcc3eff308509173572aa9291d38e9 +size 27136 diff --git a/documents/doc/apache-poi/51921-word-crash067.doc b/documents/doc/apache-poi/51921-word-crash067.doc new file mode 100644 index 0000000..c7948b3 --- /dev/null +++ b/documents/doc/apache-poi/51921-word-crash067.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c21aa8cef00242c5257f848909c36132de2ebc46f712cc149e44a96597a9480 +size 56832 diff --git a/documents/doc/apache-poi/52117.doc b/documents/doc/apache-poi/52117.doc new file mode 100644 index 0000000..179e203 --- /dev/null +++ b/documents/doc/apache-poi/52117.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dc33af3d370cb9ad5f1742298c5d657e8ed65d4c2251e789a38172e51735ff +size 7168 diff --git a/documents/doc/apache-poi/52420.doc b/documents/doc/apache-poi/52420.doc new file mode 100644 index 0000000..d465c12 --- /dev/null +++ b/documents/doc/apache-poi/52420.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ea2df1e658f7b2d45b0f261ef8d837b7978e38b31ef4ffb4427f0cde46d5e7 +size 32768 diff --git a/documents/doc/apache-poi/53379.doc b/documents/doc/apache-poi/53379.doc new file mode 100644 index 0000000..bfcde21 --- /dev/null +++ b/documents/doc/apache-poi/53379.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3744731bc3e7869b4fc968a78d46a3e82f3ee85fd7e6599430558aef4eab88b8 +size 248320 diff --git a/documents/doc/apache-poi/53446.doc b/documents/doc/apache-poi/53446.doc new file mode 100644 index 0000000..9e7877c --- /dev/null +++ b/documents/doc/apache-poi/53446.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d3ae0680800703c15c2c709796288a3b1beaecaa805e5866a35659e495f22a +size 77312 diff --git a/documents/doc/apache-poi/56880.doc b/documents/doc/apache-poi/56880.doc new file mode 100644 index 0000000..f391d10 --- /dev/null +++ b/documents/doc/apache-poi/56880.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447fb46ea0d0d374f8f10f99c4ec92d0c9f694427b5d17ae02574a4b17bce224 +size 19968 diff --git a/documents/doc/apache-poi/57603-seven-columns.doc b/documents/doc/apache-poi/57603-seven-columns.doc new file mode 100644 index 0000000..2385091 --- /dev/null +++ b/documents/doc/apache-poi/57603-seven-columns.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e37dac76f3690f88674b44558a73ff447946264250ed41ccb1a25e33b9988ef +size 42496 diff --git a/documents/doc/apache-poi/57843.doc b/documents/doc/apache-poi/57843.doc new file mode 100644 index 0000000..c9cc7f7 --- /dev/null +++ b/documents/doc/apache-poi/57843.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07fffee18c806f844f52a0a5ecd668097f180257900c91a6dc1a15e9165cbf83 +size 8192 diff --git a/documents/doc/apache-poi/58804-1.doc b/documents/doc/apache-poi/58804-1.doc new file mode 100644 index 0000000..dbc7e06 --- /dev/null +++ b/documents/doc/apache-poi/58804-1.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed323f50ec9213bb0651f7c536754cfd787445e3797115d73e57db6fe55971b +size 61440 diff --git a/documents/doc/apache-poi/58804.doc b/documents/doc/apache-poi/58804.doc new file mode 100644 index 0000000..ed8d6f0 --- /dev/null +++ b/documents/doc/apache-poi/58804.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957c803e1a16545b0b40d83f338757f763246ac836fd00e8587ad5537cd0dd85 +size 306688 diff --git a/documents/doc/apache-poi/59322.doc b/documents/doc/apache-poi/59322.doc new file mode 100644 index 0000000..295b1c8 --- /dev/null +++ b/documents/doc/apache-poi/59322.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c572f06b621a54a5e4499d67ef4b4dec36a66c41b5181606354b1dcdffde3e0d +size 39936 diff --git a/documents/doc/apache-poi/60279.doc b/documents/doc/apache-poi/60279.doc new file mode 100644 index 0000000..f81a303 --- /dev/null +++ b/documents/doc/apache-poi/60279.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd59256eb12abaa3c376e72d33601230edc19aaf343e16d714fe919e70224f9 +size 39424 diff --git a/documents/doc/apache-poi/61490.doc b/documents/doc/apache-poi/61490.doc new file mode 100644 index 0000000..da5c453 --- /dev/null +++ b/documents/doc/apache-poi/61490.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:391ef996860fb463bbdf727744d9461cefab9edaa7d2b08e2d52fecfd1e48e03 +size 31232 diff --git a/documents/doc/apache-poi/61586.doc b/documents/doc/apache-poi/61586.doc new file mode 100644 index 0000000..86fcd14 --- /dev/null +++ b/documents/doc/apache-poi/61586.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac827159b7dd515a6d55a3a8c453f7fe0b3e722ef8a8fb7c5cf58c83f01db7c +size 27648 diff --git a/documents/doc/apache-poi/61911.doc b/documents/doc/apache-poi/61911.doc new file mode 100644 index 0000000..dc6031f --- /dev/null +++ b/documents/doc/apache-poi/61911.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea740f2a09d9ed122dda450a8873c0889bacb83cb79b80a9e1483b9bbee191a9 +size 28160 diff --git a/documents/doc/apache-poi/64132.doc b/documents/doc/apache-poi/64132.doc new file mode 100644 index 0000000..f697089 --- /dev/null +++ b/documents/doc/apache-poi/64132.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ea7aebae96ca6bd458aa8ea532f8753489f78f22cd4b826378756b86e2d614 +size 237056 diff --git a/documents/doc/apache-poi/aioob-tap.doc b/documents/doc/apache-poi/aioob-tap.doc new file mode 100644 index 0000000..6e7c1fb --- /dev/null +++ b/documents/doc/apache-poi/aioob-tap.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d6b1596f539c53f06f3c84402b63bb13c38dcea14c2f71d87b87da925d2a01 +size 57856 diff --git a/documents/doc/apache-poi/au-edu-utas-www-data-assets-word-doc-0003-154335-international-travel-approval-request-form.doc b/documents/doc/apache-poi/au-edu-utas-www-data-assets-word-doc-0003-154335-international-travel-approval-request-form.doc new file mode 100644 index 0000000..585d967 --- /dev/null +++ b/documents/doc/apache-poi/au-edu-utas-www-data-assets-word-doc-0003-154335-international-travel-approval-request-form.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b75b0afb94d942d583f5c0bd71dc85774e5477425bda576ec19b26a66d291112 +size 86528 diff --git a/documents/doc/apache-poi/bug28627.doc b/documents/doc/apache-poi/bug28627.doc new file mode 100644 index 0000000..ef88279 --- /dev/null +++ b/documents/doc/apache-poi/bug28627.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b428eb157ee2b29223ecc64473a810b7a3f2d065e9ff2fcec61af24da0d68b2 +size 19968 diff --git a/documents/doc/apache-poi/bug33519.doc b/documents/doc/apache-poi/bug33519.doc new file mode 100644 index 0000000..a3714bd --- /dev/null +++ b/documents/doc/apache-poi/bug33519.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd9b35fdafd540c29c87ab480d4b019f9b69dea7499df4172a35bc45c40d1e2 +size 137216 diff --git a/documents/doc/apache-poi/bug34898.doc b/documents/doc/apache-poi/bug34898.doc new file mode 100644 index 0000000..ea8a79b --- /dev/null +++ b/documents/doc/apache-poi/bug34898.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:908e1b2c72cf37cfec5ccc904466801eee89a8dc84d579c746a7cf4d0cc7503d +size 19968 diff --git a/documents/doc/apache-poi/bug41898.doc b/documents/doc/apache-poi/bug41898.doc new file mode 100644 index 0000000..fce7579 --- /dev/null +++ b/documents/doc/apache-poi/bug41898.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce917e2dce557dd6b6a52aa2224a0c37dc160c1a3294800db48ede01f2625c4 +size 24064 diff --git a/documents/doc/apache-poi/bug44292.doc b/documents/doc/apache-poi/bug44292.doc new file mode 100644 index 0000000..d4b1d69 --- /dev/null +++ b/documents/doc/apache-poi/bug44292.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:006fdde4db8e0fb4729b7fccf95b31d0e5d51410f50916473e36a915c4b2160f +size 24064 diff --git a/documents/doc/apache-poi/bug44431.doc b/documents/doc/apache-poi/bug44431.doc new file mode 100644 index 0000000..1bb9da7 --- /dev/null +++ b/documents/doc/apache-poi/bug44431.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edcd8f48cd97503ea57ae1b4e0addb20997656cb17d2bc329db4780257c9e998 +size 98304 diff --git a/documents/doc/apache-poi/bug44603.doc b/documents/doc/apache-poi/bug44603.doc new file mode 100644 index 0000000..b4ea886 --- /dev/null +++ b/documents/doc/apache-poi/bug44603.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85a58c766c852de3b0e7bb33b4852e3465c3a901f492db2c908fc6bc27488f3 +size 30208 diff --git a/documents/doc/apache-poi/bug45269.doc b/documents/doc/apache-poi/bug45269.doc new file mode 100644 index 0000000..275d7a6 --- /dev/null +++ b/documents/doc/apache-poi/bug45269.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660e6f947d7af78764cd0bb1dfc90e6e37a634e1ab0642a406b470b73e6770ed +size 64512 diff --git a/documents/doc/apache-poi/bug45473.doc b/documents/doc/apache-poi/bug45473.doc new file mode 100644 index 0000000..120552d --- /dev/null +++ b/documents/doc/apache-poi/bug45473.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d80630183e153a53082d2c87535271ae186f1a59b1ba1c9b4d434efde0fd98 +size 24576 diff --git a/documents/doc/apache-poi/bug45877.doc b/documents/doc/apache-poi/bug45877.doc new file mode 100644 index 0000000..00cacb6 --- /dev/null +++ b/documents/doc/apache-poi/bug45877.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebea9c7d46c71fa2e97624b1406314e8e04b1f4d0c9a41ffb15cc93b1122cec +size 62464 diff --git a/documents/doc/apache-poi/bug46220.doc b/documents/doc/apache-poi/bug46220.doc new file mode 100644 index 0000000..3340386 --- /dev/null +++ b/documents/doc/apache-poi/bug46220.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fafe63cd093ae418abbb795c3efb45fa5dd56523179d4aacd46cf075b422338 +size 1140736 diff --git a/documents/doc/apache-poi/bug46610-1.doc b/documents/doc/apache-poi/bug46610-1.doc new file mode 100644 index 0000000..7b3856f --- /dev/null +++ b/documents/doc/apache-poi/bug46610-1.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6fe12b3795050657073aab76f0d34042db7fa2eb2b48e7bb4e905ab870ea7ce +size 27136 diff --git a/documents/doc/apache-poi/bug46610-2.doc b/documents/doc/apache-poi/bug46610-2.doc new file mode 100644 index 0000000..678ea1f --- /dev/null +++ b/documents/doc/apache-poi/bug46610-2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ebed4edf76cf63a60dc0051a084b19dc74668e20860e12265a9ab44bfa6aa2 +size 28672 diff --git a/documents/doc/apache-poi/bug46610-3.doc b/documents/doc/apache-poi/bug46610-3.doc new file mode 100644 index 0000000..1095537 --- /dev/null +++ b/documents/doc/apache-poi/bug46610-3.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d757c526eeb1dd7b376a3ea5b35060d7e2fc2ea2f1b2d50a9804efb18345bc6 +size 53760 diff --git a/documents/doc/apache-poi/bug46817.doc b/documents/doc/apache-poi/bug46817.doc new file mode 100644 index 0000000..0d0e273 --- /dev/null +++ b/documents/doc/apache-poi/bug46817.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68a9879503f3d65c90759d4b76719743aac048d56e7b1eb9affd63d17d78d8b +size 17920 diff --git a/documents/doc/apache-poi/bug47286.doc b/documents/doc/apache-poi/bug47286.doc new file mode 100644 index 0000000..b8fd9e2 --- /dev/null +++ b/documents/doc/apache-poi/bug47286.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa43e747b73d3ee25f016c7106f0f726cad61dc5a9515edeff4eb35feaa9da5 +size 44544 diff --git a/documents/doc/apache-poi/bug47287.doc b/documents/doc/apache-poi/bug47287.doc new file mode 100644 index 0000000..b8fd9e2 --- /dev/null +++ b/documents/doc/apache-poi/bug47287.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daa43e747b73d3ee25f016c7106f0f726cad61dc5a9515edeff4eb35feaa9da5 +size 44544 diff --git a/documents/doc/apache-poi/bug47731.doc b/documents/doc/apache-poi/bug47731.doc new file mode 100644 index 0000000..6c1dbe8 --- /dev/null +++ b/documents/doc/apache-poi/bug47731.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb22e5008c493cdbcf52e50fd09ca365463be6e4e69b152dbc92ff5f6045cac +size 898048 diff --git a/documents/doc/apache-poi/bug47742.doc b/documents/doc/apache-poi/bug47742.doc new file mode 100644 index 0000000..ffe84c7 --- /dev/null +++ b/documents/doc/apache-poi/bug47742.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a669e07ed942a41d36cb4249a96916673ed9aa64e7f594736588f76dd6128a4 +size 27136 diff --git a/documents/doc/apache-poi/bug47958.doc b/documents/doc/apache-poi/bug47958.doc new file mode 100644 index 0000000..1608248 --- /dev/null +++ b/documents/doc/apache-poi/bug47958.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4aca2632ca8cd66401229f6cadb7813f6859bccc43d9e948b374263ebbc867 +size 679936 diff --git a/documents/doc/apache-poi/bug48065.doc b/documents/doc/apache-poi/bug48065.doc new file mode 100644 index 0000000..aca8c01 --- /dev/null +++ b/documents/doc/apache-poi/bug48065.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861d5cd862dca70d73467530a435b4d1d383917815b590bbd87b04bc602de460 +size 28160 diff --git a/documents/doc/apache-poi/bug48075.doc b/documents/doc/apache-poi/bug48075.doc new file mode 100644 index 0000000..f4e73a9 --- /dev/null +++ b/documents/doc/apache-poi/bug48075.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2cebaba40b42f5f1bf6164c5b2506798d435af132a6bac926c53fa1b0359056 +size 35840 diff --git a/documents/doc/apache-poi/bug49820.doc b/documents/doc/apache-poi/bug49820.doc new file mode 100644 index 0000000..70ab8f3 --- /dev/null +++ b/documents/doc/apache-poi/bug49820.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9900a5689f1368f5dad718a433dd64b2ec57a47d4c1d9ca3ffc8771ceca040 +size 23552 diff --git a/documents/doc/apache-poi/bug49908.doc b/documents/doc/apache-poi/bug49908.doc new file mode 100644 index 0000000..1d53c38 --- /dev/null +++ b/documents/doc/apache-poi/bug49908.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d0f1a7ec2898605b0539297cee3178514febce1d970b7574d66e75d8991c6a +size 23040 diff --git a/documents/doc/apache-poi/bug49919.doc b/documents/doc/apache-poi/bug49919.doc new file mode 100644 index 0000000..08bab47 --- /dev/null +++ b/documents/doc/apache-poi/bug49919.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075b675d0fafb8cfe1f685a69333d7f8a918d0f5cd7d51b8fe1b59a58fd90096 +size 28672 diff --git a/documents/doc/apache-poi/bug49933.doc b/documents/doc/apache-poi/bug49933.doc new file mode 100644 index 0000000..4066c40 --- /dev/null +++ b/documents/doc/apache-poi/bug49933.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2ffbdbbdc98d210640744cf70cc72836bb4df7a7c50f612c1a95360d51f6ed +size 33280 diff --git a/documents/doc/apache-poi/bug50075.doc b/documents/doc/apache-poi/bug50075.doc new file mode 100644 index 0000000..253a37b --- /dev/null +++ b/documents/doc/apache-poi/bug50075.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e162ef16d7d4b934a786260b835e3c0d9440b14c347618075ec6c073df6bfd4e +size 55296 diff --git a/documents/doc/apache-poi/bug50936-1.doc b/documents/doc/apache-poi/bug50936-1.doc new file mode 100644 index 0000000..929eb22 --- /dev/null +++ b/documents/doc/apache-poi/bug50936-1.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9169e7a5f5aa865579ceb803bd91bd840134e9292c8a56e81c35913b47e218c7 +size 118272 diff --git a/documents/doc/apache-poi/bug50936-2.doc b/documents/doc/apache-poi/bug50936-2.doc new file mode 100644 index 0000000..653a097 --- /dev/null +++ b/documents/doc/apache-poi/bug50936-2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e678659cf4788ac324c71f7929e19d90ba91d8ac6d68cc7ac99c1f81d70a637 +size 93184 diff --git a/documents/doc/apache-poi/bug50936-3.doc b/documents/doc/apache-poi/bug50936-3.doc new file mode 100644 index 0000000..eb27063 --- /dev/null +++ b/documents/doc/apache-poi/bug50936-3.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b67898da842acf53824eb6fb025da7701d7a307ff478e6714ce5f7ccc546c39 +size 624640 diff --git a/documents/doc/apache-poi/bug50955.doc b/documents/doc/apache-poi/bug50955.doc new file mode 100644 index 0000000..6827e29 --- /dev/null +++ b/documents/doc/apache-poi/bug50955.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e1c7117b8038e570ee12589abcf9b9a9652819ca201b95be7e0aeaf2a9d19f +size 603136 diff --git a/documents/doc/apache-poi/bug51604.doc b/documents/doc/apache-poi/bug51604.doc new file mode 100644 index 0000000..4ac347d --- /dev/null +++ b/documents/doc/apache-poi/bug51604.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41616a8ab9252ad7635a18a95286fa1ae0573e063bba67339f8634fce58d4c0 +size 26624 diff --git a/documents/doc/apache-poi/bug51686.doc b/documents/doc/apache-poi/bug51686.doc new file mode 100644 index 0000000..84d07b0 --- /dev/null +++ b/documents/doc/apache-poi/bug51686.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca19b67876f284a0e04ed06df44a004f850629a871fe522d014e1fdff912799 +size 32768 diff --git a/documents/doc/apache-poi/bug51834.doc b/documents/doc/apache-poi/bug51834.doc new file mode 100644 index 0000000..227e248 --- /dev/null +++ b/documents/doc/apache-poi/bug51834.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb570d42bc0bdea8df147549a8b662a013bfb61b953c9c058138caf65af64907 +size 47104 diff --git a/documents/doc/apache-poi/bug51890.doc b/documents/doc/apache-poi/bug51890.doc new file mode 100644 index 0000000..1f34b71 --- /dev/null +++ b/documents/doc/apache-poi/bug51890.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3117015717ef6a2817007c2f3e5a18d88feeda6899a858383ee7d3c353d26795 +size 44544 diff --git a/documents/doc/apache-poi/bug51944.doc b/documents/doc/apache-poi/bug51944.doc new file mode 100644 index 0000000..2d43a48 --- /dev/null +++ b/documents/doc/apache-poi/bug51944.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9ba722afe708c936f20252525fea0817f6b1026c008ea9786a242f69a7ca19 +size 27648 diff --git a/documents/doc/apache-poi/bug52032-1.doc b/documents/doc/apache-poi/bug52032-1.doc new file mode 100644 index 0000000..f55608e --- /dev/null +++ b/documents/doc/apache-poi/bug52032-1.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff920493f14c128529003d424669ecb79c6250732bbe2ac43a5b82c87e274716 +size 41472 diff --git a/documents/doc/apache-poi/bug52032-2.doc b/documents/doc/apache-poi/bug52032-2.doc new file mode 100644 index 0000000..e03f42f --- /dev/null +++ b/documents/doc/apache-poi/bug52032-2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac924d6960e59ff50cb510560ddc3364629f6173bb9783c4794cb004e360b0f +size 218112 diff --git a/documents/doc/apache-poi/bug52032-3.doc b/documents/doc/apache-poi/bug52032-3.doc new file mode 100644 index 0000000..fed9506 --- /dev/null +++ b/documents/doc/apache-poi/bug52032-3.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acee7641ed01067235bcfb03552d73cbde39557296b0ad71cedf81a5254fee29 +size 91136 diff --git a/documents/doc/apache-poi/bug52311.doc b/documents/doc/apache-poi/bug52311.doc new file mode 100644 index 0000000..2ef70ce --- /dev/null +++ b/documents/doc/apache-poi/bug52311.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f6797e998c574b5acd84319bca9933f9a0a60d379741e167071e52f42ad15f +size 32768 diff --git a/documents/doc/apache-poi/bug52583.doc b/documents/doc/apache-poi/bug52583.doc new file mode 100644 index 0000000..3243e90 --- /dev/null +++ b/documents/doc/apache-poi/bug52583.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b0fe4601c2ce10a440a75e398da93f9737ea39fc36a81d2f30311d264fb97c +size 26112 diff --git a/documents/doc/apache-poi/bug53182.doc b/documents/doc/apache-poi/bug53182.doc new file mode 100644 index 0000000..7521c19 --- /dev/null +++ b/documents/doc/apache-poi/bug53182.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8149abe4574da269f5c8ee1ee54cb0d13396a6296f3732c351777501a10a45 +size 19968 diff --git a/documents/doc/apache-poi/bug53380-1.doc b/documents/doc/apache-poi/bug53380-1.doc new file mode 100644 index 0000000..ca630ab --- /dev/null +++ b/documents/doc/apache-poi/bug53380-1.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7673aa3afa6ace77b4fed0a77d3437b53623bd56275d97c54c03a0f08653db1 +size 103424 diff --git a/documents/doc/apache-poi/bug53380-2.doc b/documents/doc/apache-poi/bug53380-2.doc new file mode 100644 index 0000000..33944c1 --- /dev/null +++ b/documents/doc/apache-poi/bug53380-2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efe08c3e6981b023287f72fff38dc3035dad8e30f98cc9734e0b925be42a88a7 +size 31744 diff --git a/documents/doc/apache-poi/bug53380-3.doc b/documents/doc/apache-poi/bug53380-3.doc new file mode 100644 index 0000000..bfcde21 --- /dev/null +++ b/documents/doc/apache-poi/bug53380-3.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3744731bc3e7869b4fc968a78d46a3e82f3ee85fd7e6599430558aef4eab88b8 +size 248320 diff --git a/documents/doc/apache-poi/bug53380-4.doc b/documents/doc/apache-poi/bug53380-4.doc new file mode 100644 index 0000000..0ebcc43 --- /dev/null +++ b/documents/doc/apache-poi/bug53380-4.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59fe582f2b2c23530cab2ae4812f486ceff8ae4fbe15983caa92a5f5a4ead6f6 +size 34304 diff --git a/documents/doc/apache-poi/bug53453section.doc b/documents/doc/apache-poi/bug53453section.doc new file mode 100644 index 0000000..9063de4 --- /dev/null +++ b/documents/doc/apache-poi/bug53453section.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a827880e1a28cf0454f2e6ad9f317d6a0dd6a23580a45e14468a15b2eb4aebb3 +size 26112 diff --git a/documents/doc/apache-poi/bug60936.doc b/documents/doc/apache-poi/bug60936.doc new file mode 100644 index 0000000..05af1d0 --- /dev/null +++ b/documents/doc/apache-poi/bug60936.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c66189b6abfe51d605c55de7d9358d0f8d7f10477271b81a9a3ed1be270445e1 +size 6656 diff --git a/documents/doc/apache-poi/bug60942.doc b/documents/doc/apache-poi/bug60942.doc new file mode 100644 index 0000000..c671d3e --- /dev/null +++ b/documents/doc/apache-poi/bug60942.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6455da23aa1cdece9a7416b934a4d193290d028b3fcae62056dbb60cda3131da +size 20480 diff --git a/documents/doc/apache-poi/bug60942b.doc b/documents/doc/apache-poi/bug60942b.doc new file mode 100644 index 0000000..1b59a62 --- /dev/null +++ b/documents/doc/apache-poi/bug60942b.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183379e7c855aeed2c496f1170e4e2bec16bfde9fddc49ae0fa14f765947430f +size 6144 diff --git a/documents/doc/apache-poi/bug61268.doc b/documents/doc/apache-poi/bug61268.doc new file mode 100644 index 0000000..a9b8915 --- /dev/null +++ b/documents/doc/apache-poi/bug61268.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b47ad892871c80495c64b440c24200dbcaa16c367faf780aec43e3bf8ddae6 +size 6239232 diff --git a/documents/doc/apache-poi/bug65255.doc b/documents/doc/apache-poi/bug65255.doc new file mode 100644 index 0000000..396c81a --- /dev/null +++ b/documents/doc/apache-poi/bug65255.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b914c38658dcda716ae4a901f16a13f6893589934d435e085bbf67f408b07592 +size 27136 diff --git a/documents/doc/apache-poi/ca-kwsymphony-www-education-school-concert-seat-booking-form-2011-12.doc b/documents/doc/apache-poi/ca-kwsymphony-www-education-school-concert-seat-booking-form-2011-12.doc new file mode 100644 index 0000000..5b04b95 --- /dev/null +++ b/documents/doc/apache-poi/ca-kwsymphony-www-education-school-concert-seat-booking-form-2011-12.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8edfda1e7403795ea414d289d852ea324429e249aa65f9ec3b21ffbd1b4a802 +size 1619457 diff --git a/documents/doc/apache-poi/cap-stanford-edu-profiles-viewbiosketch-facultyid-4009-name-m-maciver.doc b/documents/doc/apache-poi/cap-stanford-edu-profiles-viewbiosketch-facultyid-4009-name-m-maciver.doc new file mode 100644 index 0000000..a7dab1a --- /dev/null +++ b/documents/doc/apache-poi/cap-stanford-edu-profiles-viewbiosketch-facultyid-4009-name-m-maciver.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e6e4ffeb1811f0fab5539709b17934d4ac2babe3cf1d670e30af50aef826d2d +size 47104 diff --git a/documents/doc/apache-poi/capitalized.doc b/documents/doc/apache-poi/capitalized.doc new file mode 100644 index 0000000..d7efe29 --- /dev/null +++ b/documents/doc/apache-poi/capitalized.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63483e58621ae1e3011a0255761e11afc8a73b573561b2bd47544d2b449aab97 +size 26624 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4892412469968896.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4892412469968896.doc new file mode 100644 index 0000000..5d05442 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4892412469968896.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c10a91a1d8517c9a00c95c5c8ba14df365d2ed9346de4b849370fe468bc49b +size 335360 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4947285593948160.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4947285593948160.doc new file mode 100644 index 0000000..aaad576 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4947285593948160.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142e28fc030f0d7f251930c916267e611a729ca384be0454b2bb6e1cc523865e +size 56832 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4951943183990784.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4951943183990784.doc new file mode 100644 index 0000000..93cd217 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-4951943183990784.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4baffc3fa9972ebb97d86d6e421d35de269af9861c88f4a8c1ce28a84d52966 +size 86528 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5050208641482752.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5050208641482752.doc new file mode 100644 index 0000000..ee1b552 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5050208641482752.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a565cf7167e942d09d29c60115b372a652d4e8c0047d28fcf65e0c680d8443c +size 56805 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5074346559012864.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5074346559012864.doc new file mode 100644 index 0000000..837464b --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5074346559012864.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1952b23c38cd9793cb67a464f243c633c9370eb550b963abc84184d63b8382b +size 9729 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5195207308541952.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5195207308541952.doc new file mode 100644 index 0000000..188327b --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5195207308541952.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f125b24c52722b5581d7b4efbd814dbb2229031a05c83af77af363ed151fdf4d +size 57344 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5418937293340672.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5418937293340672.doc new file mode 100644 index 0000000..d811ebc --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5418937293340672.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae552c7244ce5a67bd86184e624cbfe718de6f5d15ef107a0d2b4b597aab758 +size 9216 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5440721166139392.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5440721166139392.doc new file mode 100644 index 0000000..b36e167 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5440721166139392.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb16173735b0afafc2b1b6e64a65148aaa90a1be205f0e8d60d907c39d98ddce +size 17936 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5832867957309440.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5832867957309440.doc new file mode 100644 index 0000000..ef518cc --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-5832867957309440.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aca848a143f1de8fa959f80ae7f4a5a4908db5d43beb3772dbc4da1a8b1d2de +size 4748 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-6610789829836800.doc b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-6610789829836800.doc new file mode 100644 index 0000000..c667dc4 --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-minimized-poihwpffuzzer-6610789829836800.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a86044f85c5bc18446f842ea1feb529728759e6a751ba91bb3aa8c6ea9d30c +size 39249 diff --git a/documents/doc/apache-poi/clusterfuzz-testcase-poihwpffuzzer-5696094627495936.doc b/documents/doc/apache-poi/clusterfuzz-testcase-poihwpffuzzer-5696094627495936.doc new file mode 100644 index 0000000..c77b6dc --- /dev/null +++ b/documents/doc/apache-poi/clusterfuzz-testcase-poihwpffuzzer-5696094627495936.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e44bbaf25d7553e3a352273c16be662b7e9eeee39f426c618472b750db4f79 +size 46603 diff --git a/documents/doc/apache-poi/cn-orthodox-www-divenbog-april-30-april.doc b/documents/doc/apache-poi/cn-orthodox-www-divenbog-april-30-april.doc new file mode 100644 index 0000000..a240a5e --- /dev/null +++ b/documents/doc/apache-poi/cn-orthodox-www-divenbog-april-30-april.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff42c756be1e4c3a1fd00f2f34264c5055e464c8382b372770f6f84d93059d73 +size 23041 diff --git a/documents/doc/apache-poi/difffirstpageheadfoot.doc b/documents/doc/apache-poi/difffirstpageheadfoot.doc new file mode 100644 index 0000000..5d4a942 --- /dev/null +++ b/documents/doc/apache-poi/difffirstpageheadfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:674285298712213a7e0141182a2efc9d7f3ad6ebf4f5520b8405774ec3e5def0 +size 27648 diff --git a/documents/doc/apache-poi/documentproperties.doc b/documents/doc/apache-poi/documentproperties.doc new file mode 100644 index 0000000..678a9a8 --- /dev/null +++ b/documents/doc/apache-poi/documentproperties.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3958e5c16a9089f07c1d7602e2e15b6032b586f1d1eeed6fe048af73535e36cb +size 9728 diff --git a/documents/doc/apache-poi/empty.doc b/documents/doc/apache-poi/empty.doc new file mode 100644 index 0000000..1132337 --- /dev/null +++ b/documents/doc/apache-poi/empty.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f421a3970c70f12296073478380c0849a54c03fb2f4842392c7d3c5039bf1eb +size 19456 diff --git a/documents/doc/apache-poi/endingnote.doc b/documents/doc/apache-poi/endingnote.doc new file mode 100644 index 0000000..4451095 --- /dev/null +++ b/documents/doc/apache-poi/endingnote.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f170880d7ff9820a56c252f14286dc99c1c658d8e157bacf0e5f0166d5a714be +size 9728 diff --git a/documents/doc/apache-poi/equation.doc b/documents/doc/apache-poi/equation.doc new file mode 100644 index 0000000..30c1485 --- /dev/null +++ b/documents/doc/apache-poi/equation.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eccab1c32bf3ef27e36dfeb46b3ebfba4981666e0fe9364eef05fa55f6463980 +size 13824 diff --git a/documents/doc/apache-poi/fancyfoot.doc b/documents/doc/apache-poi/fancyfoot.doc new file mode 100644 index 0000000..76d11d5 --- /dev/null +++ b/documents/doc/apache-poi/fancyfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:709fe8b695a9b12be35c7e81d93beb95afc8dc36422dfc0cbd605a3057d4ddc2 +size 27136 diff --git a/documents/doc/apache-poi/floatingpictures.doc b/documents/doc/apache-poi/floatingpictures.doc new file mode 100644 index 0000000..18ce22d --- /dev/null +++ b/documents/doc/apache-poi/floatingpictures.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cc0cd8f2d7390266f844f5bddbcd29d08ac5338abc33832d9c9a6ed5d0f85d +size 335360 diff --git a/documents/doc/apache-poi/footnote.doc b/documents/doc/apache-poi/footnote.doc new file mode 100644 index 0000000..ffbf7d9 --- /dev/null +++ b/documents/doc/apache-poi/footnote.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a18ac63010789ca08c28de107658a308c758a3cfc667a6edf11ffe888d3e13 +size 9728 diff --git a/documents/doc/apache-poi/fuzzed.doc b/documents/doc/apache-poi/fuzzed.doc new file mode 100644 index 0000000..4f676c8 --- /dev/null +++ b/documents/doc/apache-poi/fuzzed.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65cbabab6985cf7d64b8f4dd868ab3c9f9500df6616c92286d267bc5b64482d +size 335360 diff --git a/documents/doc/apache-poi/gaiatest.doc b/documents/doc/apache-poi/gaiatest.doc new file mode 100644 index 0000000..3bc0b09 --- /dev/null +++ b/documents/doc/apache-poi/gaiatest.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7085aa8442b765f8287d9876553c0aeb13338951ead59b59327132381d9456 +size 135680 diff --git a/documents/doc/apache-poi/header-footer-replace.doc b/documents/doc/apache-poi/header-footer-replace.doc new file mode 100644 index 0000000..6e1089f --- /dev/null +++ b/documents/doc/apache-poi/header-footer-replace.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649f1e8d00eb0e91da67ddae880a74ce37baaa8909b98c2abdf5675912192b40 +size 28160 diff --git a/documents/doc/apache-poi/header-image.doc b/documents/doc/apache-poi/header-image.doc new file mode 100644 index 0000000..8cbecdf --- /dev/null +++ b/documents/doc/apache-poi/header-image.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:487df1d19dcff7019edb304146a279bae72b7be3f8804919cd4aad50e2497ae1 +size 39936 diff --git a/documents/doc/apache-poi/headerfooterproblematic.doc b/documents/doc/apache-poi/headerfooterproblematic.doc new file mode 100644 index 0000000..29d3062 --- /dev/null +++ b/documents/doc/apache-poi/headerfooterproblematic.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd28e07f2496a91c4dabf1bb9201beb1744b164266f868210c4c2c0d235dacd5 +size 61440 diff --git a/documents/doc/apache-poi/headerfooterunicode.doc b/documents/doc/apache-poi/headerfooterunicode.doc new file mode 100644 index 0000000..f722135 --- /dev/null +++ b/documents/doc/apache-poi/headerfooterunicode.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd7fc9552bccb7b71a0dc34ffd801e7d3547d1317fd75d0c35fca3572f6fa4b +size 28672 diff --git a/documents/doc/apache-poi/headerwithmacros.doc b/documents/doc/apache-poi/headerwithmacros.doc new file mode 100644 index 0000000..cf0e375 --- /dev/null +++ b/documents/doc/apache-poi/headerwithmacros.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98b991a18dddaebeda8a3efd0951abeb2a4ce4b6065ed6a2f1c7aa13dbb526f8 +size 22528 diff --git a/documents/doc/apache-poi/hyperlink.doc b/documents/doc/apache-poi/hyperlink.doc new file mode 100644 index 0000000..c0f9f09 --- /dev/null +++ b/documents/doc/apache-poi/hyperlink.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090c1e410cf91552c91e50585031900c48c659fbb9efe54bf12a35137b86b7e6 +size 9728 diff --git a/documents/doc/apache-poi/innertable.doc b/documents/doc/apache-poi/innertable.doc new file mode 100644 index 0000000..3e80561 --- /dev/null +++ b/documents/doc/apache-poi/innertable.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b085e66bad882c3c58ec49d7547b42f75141babd4a5e8efcf634b995e44bb3f +size 11776 diff --git a/documents/doc/apache-poi/listentrynolisttable.doc b/documents/doc/apache-poi/listentrynolisttable.doc new file mode 100644 index 0000000..d3dafd4 --- /dev/null +++ b/documents/doc/apache-poi/listentrynolisttable.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf4b76eeefad70e4ea104f20e6e38ded976a7a95595a650c05a8845def106dcc +size 19456 diff --git a/documents/doc/apache-poi/lists-margins.doc b/documents/doc/apache-poi/lists-margins.doc new file mode 100644 index 0000000..ab796e6 --- /dev/null +++ b/documents/doc/apache-poi/lists-margins.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536c7cb6589a89c632734298b7ac65a103b7fa0ceb9108c2a975ef197a0c175e +size 10752 diff --git a/documents/doc/apache-poi/lists.doc b/documents/doc/apache-poi/lists.doc new file mode 100644 index 0000000..eb894b2 --- /dev/null +++ b/documents/doc/apache-poi/lists.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b235ece31662a9b1b414f32a556d203261296e22689470a79007f0b29c37fa +size 27648 diff --git a/documents/doc/apache-poi/markauthorstable.doc b/documents/doc/apache-poi/markauthorstable.doc new file mode 100644 index 0000000..3475e1d --- /dev/null +++ b/documents/doc/apache-poi/markauthorstable.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42491fab86a86a7f67f4673c2d777a4c1eb5d8817e75da49e303a8a719b07d8d +size 76800 diff --git a/documents/doc/apache-poi/noheadfoot.doc b/documents/doc/apache-poi/noheadfoot.doc new file mode 100644 index 0000000..6450601 --- /dev/null +++ b/documents/doc/apache-poi/noheadfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e5df073f34314da6f39d2dad119fb2ef23470878fd2df67f632864cd92ea48 +size 26112 diff --git a/documents/doc/apache-poi/o-kurs.doc b/documents/doc/apache-poi/o-kurs.doc new file mode 100644 index 0000000..7607709 --- /dev/null +++ b/documents/doc/apache-poi/o-kurs.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd87d25008e5daad8ede2e17976250b1f2dd519102f32135af0a29cbf0789685 +size 202240 diff --git a/documents/doc/apache-poi/ob-is.doc b/documents/doc/apache-poi/ob-is.doc new file mode 100644 index 0000000..16af275 --- /dev/null +++ b/documents/doc/apache-poi/ob-is.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:569aa2afad2dc72e932eb351f8b2250dc5646a3f4585c148f84d338dce38dc5c +size 167936 diff --git a/documents/doc/apache-poi/page-break-before.doc b/documents/doc/apache-poi/page-break-before.doc new file mode 100644 index 0000000..eb3446b --- /dev/null +++ b/documents/doc/apache-poi/page-break-before.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19ae2de07e16fce156316abcc30210415482e4ac20a21525ee6e53e3432ce1f5 +size 9216 diff --git a/documents/doc/apache-poi/page-break.doc b/documents/doc/apache-poi/page-break.doc new file mode 100644 index 0000000..2c90f75 --- /dev/null +++ b/documents/doc/apache-poi/page-break.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92bea0571001061313625ff7ac6ad0fa59383381315f740e60c50637d982fef +size 9216 diff --git a/documents/doc/apache-poi/pageref.doc b/documents/doc/apache-poi/pageref.doc new file mode 100644 index 0000000..c2ccb68 --- /dev/null +++ b/documents/doc/apache-poi/pageref.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a661b828bef9d3260768e869163a2db3e12084234ba0c860f75c4a18d3442d +size 9728 diff --git a/documents/doc/apache-poi/pagespecificheadfoot.doc b/documents/doc/apache-poi/pagespecificheadfoot.doc new file mode 100644 index 0000000..ac1e3f2 --- /dev/null +++ b/documents/doc/apache-poi/pagespecificheadfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081c946c701a3061c32c41bef5235f518465a6a62b1250f6834b01ac6bbebbfe +size 31232 diff --git a/documents/doc/apache-poi/parentinvguid.doc b/documents/doc/apache-poi/parentinvguid.doc new file mode 100644 index 0000000..d150071 --- /dev/null +++ b/documents/doc/apache-poi/parentinvguid.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1759d6fa68a1ab33b8d59d14e9a9ff040410b09867cb1bfaf298855ba01fcb4c +size 398848 diff --git a/documents/doc/apache-poi/password-password-cryptoapi.doc b/documents/doc/apache-poi/password-password-cryptoapi.doc new file mode 100644 index 0000000..58855ac --- /dev/null +++ b/documents/doc/apache-poi/password-password-cryptoapi.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d0dc59ad7ec2356695ad5dc550057052a4017d5f1eb46e887297f5089896fb +size 27136 diff --git a/documents/doc/apache-poi/password-tika-binaryrc4.doc b/documents/doc/apache-poi/password-tika-binaryrc4.doc new file mode 100644 index 0000000..98fd172 --- /dev/null +++ b/documents/doc/apache-poi/password-tika-binaryrc4.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9231e724bb17a2e5f74815728d90b06e15684cf5fb2443a6fa24deebd33be952 +size 22016 diff --git a/documents/doc/apache-poi/passwordprotected.doc b/documents/doc/apache-poi/passwordprotected.doc new file mode 100644 index 0000000..74bfbff --- /dev/null +++ b/documents/doc/apache-poi/passwordprotected.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863d2216a6f5bb4cec0ea4131a05ae0822dbce347e2940155ef638c6d64935fe +size 19968 diff --git a/documents/doc/apache-poi/picture-alternative-text.doc b/documents/doc/apache-poi/picture-alternative-text.doc new file mode 100644 index 0000000..defc8e2 --- /dev/null +++ b/documents/doc/apache-poi/picture-alternative-text.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33274ab285274ebda4accc9becf7a55a78b36e123aca4cf12882b4b625ba78a8 +size 26112 diff --git a/documents/doc/apache-poi/picture.doc b/documents/doc/apache-poi/picture.doc new file mode 100644 index 0000000..a4805c2 --- /dev/null +++ b/documents/doc/apache-poi/picture.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a6b5edc2f4324b668cfea44fc71ac5488f13431405cf4fd90737fafed1d153 +size 1448448 diff --git a/documents/doc/apache-poi/pictures-escher.doc b/documents/doc/apache-poi/pictures-escher.doc new file mode 100644 index 0000000..4c330c0 --- /dev/null +++ b/documents/doc/apache-poi/pictures-escher.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a68678fc525e763beabab7e79240e2bbf4cd6b7766eed3364681d38fed2457 +size 120320 diff --git a/documents/doc/apache-poi/pngpicture.doc b/documents/doc/apache-poi/pngpicture.doc new file mode 100644 index 0000000..959123b --- /dev/null +++ b/documents/doc/apache-poi/pngpicture.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:259aa01f88c17bb4ad364d08ce62424959f51717b37d8a5578e3045f9eaede6b +size 14336 diff --git a/documents/doc/apache-poi/problemextracting.doc b/documents/doc/apache-poi/problemextracting.doc new file mode 100644 index 0000000..ed27fcc --- /dev/null +++ b/documents/doc/apache-poi/problemextracting.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00d247c8973f16808a73678f4e3bf941776b065b5dff0cd5e5a91d16f490818 +size 424448 diff --git a/documents/doc/apache-poi/rasp.doc b/documents/doc/apache-poi/rasp.doc new file mode 100644 index 0000000..955c506 --- /dev/null +++ b/documents/doc/apache-poi/rasp.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8301e6e9594514f63dcdcac3bb93a4614afdb5efa65b14843860350d5e48e8 +size 38912 diff --git a/documents/doc/apache-poi/sampledoc.doc b/documents/doc/apache-poi/sampledoc.doc new file mode 100644 index 0000000..1142860 --- /dev/null +++ b/documents/doc/apache-poi/sampledoc.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717585c0a88f20862c86220163dd03da87cd743970354c6b29add32725f2e833 +size 27136 diff --git a/documents/doc/apache-poi/saved-by-table.doc b/documents/doc/apache-poi/saved-by-table.doc new file mode 100644 index 0000000..ceffffa --- /dev/null +++ b/documents/doc/apache-poi/saved-by-table.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3081ba08abc538c9bdab871fb1d7e2ef7930d12412c3b3b72bda233983c31351 +size 65024 diff --git a/documents/doc/apache-poi/simple-list.doc b/documents/doc/apache-poi/simple-list.doc new file mode 100644 index 0000000..a948414 --- /dev/null +++ b/documents/doc/apache-poi/simple-list.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666f851727aeb9013d34586fe3ab5df687a36108ca3eb2ba193954cbd084be70 +size 19456 diff --git a/documents/doc/apache-poi/simple-table.doc b/documents/doc/apache-poi/simple-table.doc new file mode 100644 index 0000000..52a2bec --- /dev/null +++ b/documents/doc/apache-poi/simple-table.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d7fe8e0555c16516354498a5935f24eeef90beb5efbf526f93a1d6c734da43 +size 19456 diff --git a/documents/doc/apache-poi/simple-table2.doc b/documents/doc/apache-poi/simple-table2.doc new file mode 100644 index 0000000..b6d99cf --- /dev/null +++ b/documents/doc/apache-poi/simple-table2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aea59b70c19a150d657065f254c5e10db08a51518d1c7fce33fc8f75ab6c6cc +size 27136 diff --git a/documents/doc/apache-poi/simple.doc b/documents/doc/apache-poi/simple.doc new file mode 100644 index 0000000..53fa5b6 --- /dev/null +++ b/documents/doc/apache-poi/simple.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4876e828e62d490fa188c7c4e47013e2e0dfdf02f5903dfd374bc0d2b2049ca1 +size 19456 diff --git a/documents/doc/apache-poi/simpleheadthreecolfoot.doc b/documents/doc/apache-poi/simpleheadthreecolfoot.doc new file mode 100644 index 0000000..5167cb4 --- /dev/null +++ b/documents/doc/apache-poi/simpleheadthreecolfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de15f6088703ef0730ee715b90dd23a5126011f4cf87900f31d91ea09927244d +size 27136 diff --git a/documents/doc/apache-poi/simplemacro.doc b/documents/doc/apache-poi/simplemacro.doc new file mode 100644 index 0000000..e70b268 --- /dev/null +++ b/documents/doc/apache-poi/simplemacro.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e9608c711d38f298ed20a5bfae12fcdaed6f4be2be4d26db670475216ca393 +size 36864 diff --git a/documents/doc/apache-poi/table-merges.doc b/documents/doc/apache-poi/table-merges.doc new file mode 100644 index 0000000..4a47464 --- /dev/null +++ b/documents/doc/apache-poi/table-merges.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bceea684bbf492fae57da88a133c3fd7ea9abd8355a89c6b9f31b9b0e2d68d3e +size 11264 diff --git a/documents/doc/apache-poi/test-fields.doc b/documents/doc/apache-poi/test-fields.doc new file mode 100644 index 0000000..78b94e6 --- /dev/null +++ b/documents/doc/apache-poi/test-fields.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28bdbdddc080402133bc5267133962ff99415bf060bd966b7b3cf8a847633ca3 +size 30720 diff --git a/documents/doc/apache-poi/test.doc b/documents/doc/apache-poi/test.doc new file mode 100644 index 0000000..f4b1d49 --- /dev/null +++ b/documents/doc/apache-poi/test.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcaa1af5e3e90a7a09d4106d2aedc7fea397dec554f2cdb1238891607a2ef21f +size 32768 diff --git a/documents/doc/apache-poi/test2.doc b/documents/doc/apache-poi/test2.doc new file mode 100644 index 0000000..5e96de5 --- /dev/null +++ b/documents/doc/apache-poi/test2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773a1db3c6cba870ccad43a4149c6446554a8ef54e6210d6d6987500d1d95ec3 +size 19968 diff --git a/documents/doc/apache-poi/testcroppedpictures.doc b/documents/doc/apache-poi/testcroppedpictures.doc new file mode 100644 index 0000000..fc9fb94 --- /dev/null +++ b/documents/doc/apache-poi/testcroppedpictures.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb121b28b3f1d89c18e173a89e84c25259b48055cad9f2f817414ca7d9aeac99 +size 201728 diff --git a/documents/doc/apache-poi/testpictures.doc b/documents/doc/apache-poi/testpictures.doc new file mode 100644 index 0000000..5e851c5 --- /dev/null +++ b/documents/doc/apache-poi/testpictures.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5224140f6381e7f7af069badcf8970a01ecb5630a2bbd30f155d164c767f66 +size 315904 diff --git a/documents/doc/apache-poi/testrangedelete.doc b/documents/doc/apache-poi/testrangedelete.doc new file mode 100644 index 0000000..4a36b31 --- /dev/null +++ b/documents/doc/apache-poi/testrangedelete.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84e6db3eb90bd26af159144eedcf389b71e7a7b6d00b85707b3ce4336ae95220 +size 104448 diff --git a/documents/doc/apache-poi/testrangeinsertion.doc b/documents/doc/apache-poi/testrangeinsertion.doc new file mode 100644 index 0000000..8a6bcee --- /dev/null +++ b/documents/doc/apache-poi/testrangeinsertion.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be9a5ab286194386de8f9d1c9a555d2a67e2d59d1ba45fbbe3b9f2f49645c6aa +size 104448 diff --git a/documents/doc/apache-poi/testrangereplacement.doc b/documents/doc/apache-poi/testrangereplacement.doc new file mode 100644 index 0000000..93cc20c --- /dev/null +++ b/documents/doc/apache-poi/testrangereplacement.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed13c3d8d404b5d357887565f4f15f20d2460b367e4585179eceacb7829844a9 +size 104448 diff --git a/documents/doc/apache-poi/threecolfoot.doc b/documents/doc/apache-poi/threecolfoot.doc new file mode 100644 index 0000000..db63e21 --- /dev/null +++ b/documents/doc/apache-poi/threecolfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a9da0842c25c62f64c5fadb16cb6ff9e2fe5cdea5312925020aa238aec24fc +size 27136 diff --git a/documents/doc/apache-poi/threecolhead.doc b/documents/doc/apache-poi/threecolhead.doc new file mode 100644 index 0000000..b596ba3 --- /dev/null +++ b/documents/doc/apache-poi/threecolhead.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199dcc99cbf6f7ca5ed5d57b5072e124c7776895f0669d7fac4227f4208a8ad6 +size 27136 diff --git a/documents/doc/apache-poi/threecolheadfoot.doc b/documents/doc/apache-poi/threecolheadfoot.doc new file mode 100644 index 0000000..a168535 --- /dev/null +++ b/documents/doc/apache-poi/threecolheadfoot.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c260238b10e20d89f3965f2eb2ee7e024a106833acc79e666c49a1782a637b +size 27136 diff --git a/documents/doc/apache-poi/two-images.doc b/documents/doc/apache-poi/two-images.doc new file mode 100644 index 0000000..ba7cd04 --- /dev/null +++ b/documents/doc/apache-poi/two-images.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c2f835d2ab986ae367a787fcad04c2fa24c5135ae477bf934e7c074895312e +size 24064 diff --git a/documents/doc/apache-poi/vector-image.doc b/documents/doc/apache-poi/vector-image.doc new file mode 100644 index 0000000..e3bf1d8 --- /dev/null +++ b/documents/doc/apache-poi/vector-image.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747eb4c44a9ff7ac646f267fad6d4006b596a8beaec211fe27cb67f018800850 +size 24064 diff --git a/documents/doc/apache-poi/watermark.doc b/documents/doc/apache-poi/watermark.doc new file mode 100644 index 0000000..f323ddc --- /dev/null +++ b/documents/doc/apache-poi/watermark.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e74961fc41a00744b0f945639b4194d72028bb05589b069f56a09befd970be +size 20480 diff --git a/documents/doc/apache-poi/withartshapes.doc b/documents/doc/apache-poi/withartshapes.doc new file mode 100644 index 0000000..3c749c7 --- /dev/null +++ b/documents/doc/apache-poi/withartshapes.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea17063facd200d340f2d076727e819f557d307f506c640896e968d9bcab5c0 +size 53760 diff --git a/documents/doc/apache-poi/word-with-embeded-ooxml.doc b/documents/doc/apache-poi/word-with-embeded-ooxml.doc new file mode 100644 index 0000000..c7b7cbe --- /dev/null +++ b/documents/doc/apache-poi/word-with-embeded-ooxml.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032d225f3437e8c3f126b45869056b603292cbb9d077ead55b312293e8e829c5 +size 319488 diff --git a/documents/doc/apache-poi/word-with-embeded.doc b/documents/doc/apache-poi/word-with-embeded.doc new file mode 100644 index 0000000..67ca7e2 --- /dev/null +++ b/documents/doc/apache-poi/word-with-embeded.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206acf1f7eb7b7d7d2075e04015d005ae9bb56f025560f2583ad3878257bc4c7 +size 117248 diff --git a/documents/doc/apache-poi/word6-sections.doc b/documents/doc/apache-poi/word6-sections.doc new file mode 100644 index 0000000..e0c3ac9 --- /dev/null +++ b/documents/doc/apache-poi/word6-sections.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2786192392dbd409db0ad80a955960e762f98d939608c282106cf8b8ec951a55 +size 6656 diff --git a/documents/doc/apache-poi/word6-sections2.doc b/documents/doc/apache-poi/word6-sections2.doc new file mode 100644 index 0000000..ee7fa00 --- /dev/null +++ b/documents/doc/apache-poi/word6-sections2.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1fa9699291b09ede96267581629125417db2048ddf78361b5d5c9641de7aa7a +size 15360 diff --git a/documents/doc/apache-poi/word6.doc b/documents/doc/apache-poi/word6.doc new file mode 100644 index 0000000..938afa7 --- /dev/null +++ b/documents/doc/apache-poi/word6.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ea3347378e85e7c8d19ad64e74ad533b2f51b30f047594b434f380712f8b50 +size 6656 diff --git a/documents/doc/apache-poi/word95.doc b/documents/doc/apache-poi/word95.doc new file mode 100644 index 0000000..c9b2026 --- /dev/null +++ b/documents/doc/apache-poi/word95.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7b9557e917daf29cfee6baaa90226fbfa7900ffa05cc35bdc03aafdefef5789 +size 102400 diff --git a/documents/doc/apache-poi/word95err.doc b/documents/doc/apache-poi/word95err.doc new file mode 100644 index 0000000..4066c40 --- /dev/null +++ b/documents/doc/apache-poi/word95err.doc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2ffbdbbdc98d210640744cf70cc72836bb4df7a7c50f612c1a95360d51f6ed +size 33280 diff --git a/scrape/doc_gather.py b/scrape/doc_gather.py new file mode 100644 index 0000000..35047fc --- /dev/null +++ b/scrape/doc_gather.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +"""Gather legacy .doc (WW8/OLE2) test files from open-source test suites. + +Sources: + - Apache POI HWPF test data (Apache-2.0) + - LibreOffice WW8 import test data (MPL-2.0) +""" + +import json +import sys +import time +from pathlib import Path + +import requests + +# Allow running as `python scrape/doc_gather.py` from repo root +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from utils import ( + MAGIC_OLE2, + check_file_size, + download_file, + rate_limit, + sanitize_filename, + validate_file_format, +) + +REPO_ROOT = Path(__file__).resolve().parent.parent +DOCS_DIR = REPO_ROOT / "documents" / "doc" +ATTRIBUTION_PATH = REPO_ROOT / "ATTRIBUTION.json" + +SESSION = requests.Session() +SESSION.headers["User-Agent"] = ( + "DocSpec-Corpus-Scraper/1.0 (https://github.com/docspec/documents)" +) +# GitHub token if available (for higher rate limits) +import os + +_gh_token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") +if _gh_token: + SESSION.headers["Authorization"] = f"token {_gh_token}" + + +# ── GitHub helpers ──────────────────────────────────────────────────────────── + + +def github_list_files(api_url: str, extension: str = ".doc") -> list[dict]: + """List files from a GitHub contents API endpoint. Returns list of {name, download_url}.""" + rate_limit(1.5) + try: + resp = SESSION.get(api_url, timeout=30) + if resp.status_code == 403: + remaining = resp.headers.get("X-RateLimit-Remaining", "?") + print(f" GitHub API rate limited (remaining={remaining}), waiting 60s…") + time.sleep(60) + resp = SESSION.get(api_url, timeout=30) + resp.raise_for_status() + items = resp.json() + if not isinstance(items, list): + print(f" Unexpected response from {api_url}: not a list") + return [] + return [ + {"name": item["name"], "download_url": item["download_url"]} + for item in items + if item.get("name", "").lower().endswith(extension) + and item.get("type") == "file" + and item.get("download_url") + ] + except Exception as e: + print(f" Error listing {api_url}: {e}") + return [] + + +def download_and_validate(url: str, dest: Path, min_bytes: int = 512) -> bool: + """Download a file and validate it's genuine OLE2 format.""" + if dest.exists(): + if validate_file_format(dest, MAGIC_OLE2) and check_file_size(dest, min_bytes): + print(f" ✓ Already exists: {dest.name}") + return True + + ok = download_file(url, dest, delay=1.0, session=SESSION) + if not ok: + print(f" ✗ Download failed: {dest.name}") + return False + + if not validate_file_format(dest, MAGIC_OLE2): + print(f" ✗ Not OLE2 format: {dest.name} — removing") + dest.unlink(missing_ok=True) + return False + + if not check_file_size(dest, min_bytes): + print(f" ✗ Too small (<{min_bytes}B): {dest.name} — removing") + dest.unlink(missing_ok=True) + return False + + print(f" ✓ Downloaded: {dest.name}") + return True + + +# ── Source: Apache POI ──────────────────────────────────────────────────────── + + +def gather_apache_poi() -> int: + """Download .doc files from Apache POI test data.""" + group_dir = DOCS_DIR / "apache-poi" + group_dir.mkdir(parents=True, exist_ok=True) + + print("\n📦 Apache POI HWPF test data") + print("=" * 50) + + # Try multiple known paths in the POI repo + api_urls = [ + "https://api.github.com/repos/apache/poi/contents/test-data/document", + "https://api.github.com/repos/apache/poi/contents/poi-scratchpad/src/test/resources/org/apache/poi/hwpf/data", + ] + + all_files: list[dict] = [] + seen_names: set[str] = set() + + for api_url in api_urls: + print(f"\n Trying: {api_url}") + files = github_list_files(api_url, ".doc") + for f in files: + if f["name"].lower() not in seen_names: + seen_names.add(f["name"].lower()) + all_files.append(f) + print(f" Found {len(files)} .doc files") + + count = 0 + for finfo in all_files: + safe_name = sanitize_filename(finfo["name"]) + dest = group_dir / safe_name + if download_and_validate(finfo["download_url"], dest): + count += 1 + + print(f"\n Total Apache POI: {count} valid .doc files") + return count + + +# ── Source: LibreOffice ─────────────────────────────────────────────────────── + + +def gather_libreoffice() -> int: + """Download .doc files from LibreOffice WW8 test data.""" + group_dir = DOCS_DIR / "libreoffice" + group_dir.mkdir(parents=True, exist_ok=True) + + print("\n📦 LibreOffice WW8 test data") + print("=" * 50) + + # Multiple directories with WW8 test docs + api_urls = [ + "https://api.github.com/repos/LibreOffice/core/contents/sw/qa/extras/ww8import/data", + "https://api.github.com/repos/LibreOffice/core/contents/sw/qa/extras/ww8export/data", + "https://api.github.com/repos/LibreOffice/core/contents/sw/qa/core/doc/data", + ] + + all_files: list[dict] = [] + seen_names: set[str] = set() + + for api_url in api_urls: + print(f"\n Trying: {api_url}") + files = github_list_files(api_url, ".doc") + for f in files: + if f["name"].lower() not in seen_names: + seen_names.add(f["name"].lower()) + all_files.append(f) + print(f" Found {len(files)} .doc files (cumulative unique: {len(all_files)})") + + count = 0 + for finfo in all_files: + safe_name = sanitize_filename(finfo["name"]) + dest = group_dir / safe_name + if download_and_validate(finfo["download_url"], dest): + count += 1 + + print(f"\n Total LibreOffice: {count} valid .doc files") + return count + + +# ── ATTRIBUTION.json update ─────────────────────────────────────────────────── + +ATTRIBUTION_ENTRIES = [ + { + "format": "doc", + "path": "documents/doc/apache-poi/*.doc", + "title": "Apache POI HWPF Test Documents", + "author": "Apache POI contributors", + "license": "Apache-2.0", + "source": "https://github.com/apache/poi", + "tags": ["apache-poi", "hwpf", "test-fixtures"], + "donated": "2026-04-01", + "notes": "Test fixtures for the Apache POI HWPF Word processor module", + }, + { + "format": "doc", + "path": "documents/doc/libreoffice/*.doc", + "title": "LibreOffice WW8 Import/Export Test Documents", + "author": "LibreOffice contributors", + "license": "MPL-2.0", + "source": "https://github.com/LibreOffice/core", + "tags": ["libreoffice", "ww8", "test-fixtures"], + "donated": "2026-04-01", + "notes": "Test fixtures for the LibreOffice WW8 import/export filters", + }, +] + + +def update_attribution() -> None: + """Add doc entries to ATTRIBUTION.json if not already present.""" + print("\n📝 Updating ATTRIBUTION.json") + + # Re-read fresh (other tasks may have modified it) + data = json.loads(ATTRIBUTION_PATH.read_text()) + + existing_paths = {entry["path"] for entry in data} + added = 0 + for entry in ATTRIBUTION_ENTRIES: + if entry["path"] not in existing_paths: + data.append(entry) + added += 1 + print(f" + Added: {entry['path']}") + else: + print(f" ≡ Already present: {entry['path']}") + + if added: + ATTRIBUTION_PATH.write_text( + json.dumps(data, indent=2, ensure_ascii=False) + "\n" + ) + print(f" Wrote {added} new entries") + else: + print(" No changes needed") + + +# ── Main ────────────────────────────────────────────────────────────────────── + + +def main() -> None: + print("🔧 DocSpec .doc (WW8) corpus gatherer") + print("=" * 60) + + total = 0 + total += gather_apache_poi() + total += gather_libreoffice() + + print(f"\n{'=' * 60}") + print(f"📊 Total valid .doc files: {total}") + + if total < 50: + print(f"⚠️ Only {total} files — below target of 50") + print(" Consider running with GITHUB_TOKEN for higher API limits") + else: + print(f"✅ Target met: {total} ≥ 50") + + update_attribution() + + # Summary + print(f"\n{'=' * 60}") + doc_files = list(DOCS_DIR.rglob("*.doc")) + print(f"Files on disk: {len(doc_files)}") + for group_dir in sorted(DOCS_DIR.iterdir()): + if group_dir.is_dir(): + count = len(list(group_dir.glob("*.doc"))) + print(f" {group_dir.name}: {count}") + + +if __name__ == "__main__": + main() From bcbeb628c3caeb2a807237f21b84744e9faeac74 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:46:36 +0200 Subject: [PATCH 06/14] feat(epub): add EPUB corpus from Standard Ebooks --- ATTRIBUTION.json | 16 ++++ .../a-e-w-mason_at-the-villa-rose.epub | 3 + .../agatha-christie_giants-bread.epub | 3 + ...a-christie_the-murder-at-the-vicarage.epub | 3 + ...gide_the-counterfeiters_dorothy-bussy.epub | 3 + .../anna-katharine-green_lost-mans-lane.epub | 3 + ...e-ratcliffe-woodward_harold-mattingly.epub | 3 + ...awain-and-the-green-knight_s-o-andrew.epub | 3 + ...-lazarillo-de-tormes_clements-markham.epub | 3 + ...ony-berkeley_the-layton-court-mystery.epub | 3 + ...arnold-bennett_anna-of-the-five-towns.epub | 3 + .../arthur-ransome_swallows-and-amazons.epub | 3 + ...d_marcus-dods_george-wilson_j-j-smith.epub | 3 + .../c-s-forester_payment-deferred.epub | 3 + .../carolyn-keene_the-bungalow-mystery.epub | 3 + .../carolyn-keene_the-hidden-staircase.epub | 3 + ...arolyn-keene_the-mystery-at-lilac-inn.epub | 3 + ...lyn-keene_the-secret-of-the-old-clock.epub | 3 + .../charles-a-lindbergh_we.epub | 3 + .../charles-dickens_our-mutual-friend.epub | 3 + .../clark-ashton-smith_short-fiction.epub | 3 + .../claude-mckay_home-to-harlem.epub | 3 + .../daphne-du-maurier_short-fiction.epub | 3 + ...shiell-hammett_continental-op-stories.epub | 3 + .../dashiell-hammett_the-maltese-falcon.epub | 3 + ...ert-eustace_the-documents-in-the-case.epub | 3 + .../dorothy-l-sayers_strong-poison.epub | 3 + .../standard-ebooks/e-h-young_miss-mole.epub | 3 + .../e-r-eddison_styrbiorn-the-strong.epub | 3 + ...gar-rice-burroughs_the-outlaw-of-torn.epub | 3 + .../standard-ebooks/edna-ferber_cimarron.epub | 3 + .../ellis-parker-butler_jibby-jones.epub | 3 + .../evelyn-waugh_vile-bodies.epub | 3 + .../ford-madox-ford_privy-seal.epub | 3 + ...afka_the-castle_willa-muir_edwin-muir.epub | 3 + ...eeman-wills-crofts_the-cheyne-mystery.epub | 3 + ...aking-of-the-storm_s-e-a-h-stephenson.epub | 3 + .../g-a-henty_beric-the-briton.epub | 3 + .../g-d-h-cole_the-brooklyn-murders.epub | 3 + .../gene-stratton-porter_freckles.epub | 3 + .../geoffrey-dennis_the-end-of-the-world.epub | 3 + ...eorge-bernard-shaw_back-to-methuselah.epub | 3 + .../george-bernard-shaw_the-apple-cart.epub | 3 + .../george-macdonald_poetry.epub | 3 + .../george-macdonald_short-fiction.epub | 3 + .../george-macdonald_the-portent.epub | 3 + ...rge-macdonald_the-princess-and-curdie.epub | 3 + .../georgette-heyer_the-black-moth.epub | 3 + .../h-g-wells_in-the-days-of-the-comet.epub | 3 + .../h-m-tomlinson_gallions-reach.epub | 3 + ...issimus_alfred-thomas-scrope-goodrick.epub | 3 + .../harry-harrison_planet-of-the-damned.epub | 3 + .../henry-van-dyke-jr_poetry.epub | 3 + ...hilaire-belloc_the-cruise-of-the-nona.epub | 3 + ...-lofting_the-story-of-doctor-dolittle.epub | 3 + ...turgenev_on-the-eve_constance-garnett.epub | 3 + ...nnington_the-case-with-nine-solutions.epub | 3 + ...-j-connington_tragedy-at-ravensthorpe.epub | 3 + .../jessie-redmon-fauset_plum-bun.epub | 3 + .../john-buchan_the-powerhouse.epub | 3 + ...n-t-mcintyre_ashton-kirk-investigator.epub | 3 + .../jorge-isaacs_maria_rollo-ogden.epub | 3 + .../standard-ebooks/joseph-conrad_chance.epub | 3 + ...joseph-conrad_ford-madox-ford_romance.epub | 3 + ..._father-hensons-story-of-his-own-life.epub | 3 + .../langston-hughes_not-without-laughter.epub | 3 + .../louis-joseph-vance_the-lone-wolf.epub | 3 + ...e-camoes_the-lusiads_richard-f-burton.epub | 3 + .../m-e-braddon_the-trail-of-the-serpent.epub | 3 + .../m-e-braddon_the-venetians.epub | 3 + .../margaret-ayer-barnes_years-of-grace.epub | 3 + ...margaret-oliphant_the-ladies-lindores.epub | 3 + .../mary-butts_armed-with-madness.epub | 3 + .../mary-de-morgan_on-a-pincushion.epub | 3 + ...n_the-necklace-of-princess-fiorimonde.epub | 3 + .../mary-de-morgan_the-windfairies.epub | 3 + .../mary-shelley_short-fiction.epub | 3 + .../may-sinclair_mary-olivier-a-life.epub | 3 + .../metta-victor_the-dead-letter.epub | 3 + ...non-g-eberhart_the-patient-in-room-18.epub | 3 + ..._the-inspector-general_thomas-seltzer.epub | 3 + ...-club-and-how-it-won-the-championship.epub | 3 + .../olaf-stapledon_last-and-first-men.epub | 3 + ...tin-freeman_the-mystery-of-31-new-inn.epub | 3 + ...richard-hughes_a-high-wind-in-jamaica.epub | 3 + .../ring-lardner_jack-keefe-stories.epub | 3 + .../ring-lardner_my-four-weeks-in-france.epub | 3 + .../robert-e-howard_conan-stories.epub | 3 + .../rufus-king_murder-by-the-clock.epub | 3 + .../s-fowler-wright_the-world-below.epub | 3 + .../sigrid-undset_jenny_w-emme.epub | 3 + .../stella-benson_the-faraway-bride.epub | 3 + ...ion_the-journal-of-a-disappointed-man.epub | 3 + .../w-somerset-maugham_cakes-and-ale.epub | 3 + .../wilkie-collins_the-haunted-hotel.epub | 3 + .../wilkie-collins_the-law-and-the-lady.epub | 3 + .../william-faulkner_as-i-lay-dying.epub | 3 + .../william-gerhardie_futility.epub | 3 + ...the-varieties-of-religious-experience.epub | 3 + ...iam-morris_the-roots-of-the-mountains.epub | 3 + .../william-morris_the-sundering-flood.epub | 3 + ...mi-motokiyo_plays_various-translators.epub | 3 + scrape/epub_standard_ebooks.py | 91 +++++++++++++++++++ 103 files changed, 410 insertions(+) create mode 100644 documents/epub/standard-ebooks/a-e-w-mason_at-the-villa-rose.epub create mode 100644 documents/epub/standard-ebooks/agatha-christie_giants-bread.epub create mode 100644 documents/epub/standard-ebooks/agatha-christie_the-murder-at-the-vicarage.epub create mode 100644 documents/epub/standard-ebooks/andre-gide_the-counterfeiters_dorothy-bussy.epub create mode 100644 documents/epub/standard-ebooks/anna-katharine-green_lost-mans-lane.epub create mode 100644 documents/epub/standard-ebooks/anonymous_barlaam-and-ioasaph_george-ratcliffe-woodward_harold-mattingly.epub create mode 100644 documents/epub/standard-ebooks/anonymous_sir-gawain-and-the-green-knight_s-o-andrew.epub create mode 100644 documents/epub/standard-ebooks/anonymous_the-life-of-lazarillo-de-tormes_clements-markham.epub create mode 100644 documents/epub/standard-ebooks/anthony-berkeley_the-layton-court-mystery.epub create mode 100644 documents/epub/standard-ebooks/arnold-bennett_anna-of-the-five-towns.epub create mode 100644 documents/epub/standard-ebooks/arthur-ransome_swallows-and-amazons.epub create mode 100644 documents/epub/standard-ebooks/augustine-of-hippo_the-city-of-god_marcus-dods_george-wilson_j-j-smith.epub create mode 100644 documents/epub/standard-ebooks/c-s-forester_payment-deferred.epub create mode 100644 documents/epub/standard-ebooks/carolyn-keene_the-bungalow-mystery.epub create mode 100644 documents/epub/standard-ebooks/carolyn-keene_the-hidden-staircase.epub create mode 100644 documents/epub/standard-ebooks/carolyn-keene_the-mystery-at-lilac-inn.epub create mode 100644 documents/epub/standard-ebooks/carolyn-keene_the-secret-of-the-old-clock.epub create mode 100644 documents/epub/standard-ebooks/charles-a-lindbergh_we.epub create mode 100644 documents/epub/standard-ebooks/charles-dickens_our-mutual-friend.epub create mode 100644 documents/epub/standard-ebooks/clark-ashton-smith_short-fiction.epub create mode 100644 documents/epub/standard-ebooks/claude-mckay_home-to-harlem.epub create mode 100644 documents/epub/standard-ebooks/daphne-du-maurier_short-fiction.epub create mode 100644 documents/epub/standard-ebooks/dashiell-hammett_continental-op-stories.epub create mode 100644 documents/epub/standard-ebooks/dashiell-hammett_the-maltese-falcon.epub create mode 100644 documents/epub/standard-ebooks/dorothy-l-sayers_robert-eustace_the-documents-in-the-case.epub create mode 100644 documents/epub/standard-ebooks/dorothy-l-sayers_strong-poison.epub create mode 100644 documents/epub/standard-ebooks/e-h-young_miss-mole.epub create mode 100644 documents/epub/standard-ebooks/e-r-eddison_styrbiorn-the-strong.epub create mode 100644 documents/epub/standard-ebooks/edgar-rice-burroughs_the-outlaw-of-torn.epub create mode 100644 documents/epub/standard-ebooks/edna-ferber_cimarron.epub create mode 100644 documents/epub/standard-ebooks/ellis-parker-butler_jibby-jones.epub create mode 100644 documents/epub/standard-ebooks/evelyn-waugh_vile-bodies.epub create mode 100644 documents/epub/standard-ebooks/ford-madox-ford_privy-seal.epub create mode 100644 documents/epub/standard-ebooks/franz-kafka_the-castle_willa-muir_edwin-muir.epub create mode 100644 documents/epub/standard-ebooks/freeman-wills-crofts_the-cheyne-mystery.epub create mode 100644 documents/epub/standard-ebooks/friedrich-spielhagen_the-breaking-of-the-storm_s-e-a-h-stephenson.epub create mode 100644 documents/epub/standard-ebooks/g-a-henty_beric-the-briton.epub create mode 100644 documents/epub/standard-ebooks/g-d-h-cole_the-brooklyn-murders.epub create mode 100644 documents/epub/standard-ebooks/gene-stratton-porter_freckles.epub create mode 100644 documents/epub/standard-ebooks/geoffrey-dennis_the-end-of-the-world.epub create mode 100644 documents/epub/standard-ebooks/george-bernard-shaw_back-to-methuselah.epub create mode 100644 documents/epub/standard-ebooks/george-bernard-shaw_the-apple-cart.epub create mode 100644 documents/epub/standard-ebooks/george-macdonald_poetry.epub create mode 100644 documents/epub/standard-ebooks/george-macdonald_short-fiction.epub create mode 100644 documents/epub/standard-ebooks/george-macdonald_the-portent.epub create mode 100644 documents/epub/standard-ebooks/george-macdonald_the-princess-and-curdie.epub create mode 100644 documents/epub/standard-ebooks/georgette-heyer_the-black-moth.epub create mode 100644 documents/epub/standard-ebooks/h-g-wells_in-the-days-of-the-comet.epub create mode 100644 documents/epub/standard-ebooks/h-m-tomlinson_gallions-reach.epub create mode 100644 documents/epub/standard-ebooks/hans-jakob-christoffel-von-grimmelshausen_the-adventurous-simplicissimus_alfred-thomas-scrope-goodrick.epub create mode 100644 documents/epub/standard-ebooks/harry-harrison_planet-of-the-damned.epub create mode 100644 documents/epub/standard-ebooks/henry-van-dyke-jr_poetry.epub create mode 100644 documents/epub/standard-ebooks/hilaire-belloc_the-cruise-of-the-nona.epub create mode 100644 documents/epub/standard-ebooks/hugh-lofting_the-story-of-doctor-dolittle.epub create mode 100644 documents/epub/standard-ebooks/ivan-turgenev_on-the-eve_constance-garnett.epub create mode 100644 documents/epub/standard-ebooks/j-j-connington_the-case-with-nine-solutions.epub create mode 100644 documents/epub/standard-ebooks/j-j-connington_tragedy-at-ravensthorpe.epub create mode 100644 documents/epub/standard-ebooks/jessie-redmon-fauset_plum-bun.epub create mode 100644 documents/epub/standard-ebooks/john-buchan_the-powerhouse.epub create mode 100644 documents/epub/standard-ebooks/john-t-mcintyre_ashton-kirk-investigator.epub create mode 100644 documents/epub/standard-ebooks/jorge-isaacs_maria_rollo-ogden.epub create mode 100644 documents/epub/standard-ebooks/joseph-conrad_chance.epub create mode 100644 documents/epub/standard-ebooks/joseph-conrad_ford-madox-ford_romance.epub create mode 100644 documents/epub/standard-ebooks/josiah-henson_father-hensons-story-of-his-own-life.epub create mode 100644 documents/epub/standard-ebooks/langston-hughes_not-without-laughter.epub create mode 100644 documents/epub/standard-ebooks/louis-joseph-vance_the-lone-wolf.epub create mode 100644 documents/epub/standard-ebooks/luis-de-camoes_the-lusiads_richard-f-burton.epub create mode 100644 documents/epub/standard-ebooks/m-e-braddon_the-trail-of-the-serpent.epub create mode 100644 documents/epub/standard-ebooks/m-e-braddon_the-venetians.epub create mode 100644 documents/epub/standard-ebooks/margaret-ayer-barnes_years-of-grace.epub create mode 100644 documents/epub/standard-ebooks/margaret-oliphant_the-ladies-lindores.epub create mode 100644 documents/epub/standard-ebooks/mary-butts_armed-with-madness.epub create mode 100644 documents/epub/standard-ebooks/mary-de-morgan_on-a-pincushion.epub create mode 100644 documents/epub/standard-ebooks/mary-de-morgan_the-necklace-of-princess-fiorimonde.epub create mode 100644 documents/epub/standard-ebooks/mary-de-morgan_the-windfairies.epub create mode 100644 documents/epub/standard-ebooks/mary-shelley_short-fiction.epub create mode 100644 documents/epub/standard-ebooks/may-sinclair_mary-olivier-a-life.epub create mode 100644 documents/epub/standard-ebooks/metta-victor_the-dead-letter.epub create mode 100644 documents/epub/standard-ebooks/mignon-g-eberhart_the-patient-in-room-18.epub create mode 100644 documents/epub/standard-ebooks/nikolai-gogol_the-inspector-general_thomas-seltzer.epub create mode 100644 documents/epub/standard-ebooks/noah-brooks_our-baseball-club-and-how-it-won-the-championship.epub create mode 100644 documents/epub/standard-ebooks/olaf-stapledon_last-and-first-men.epub create mode 100644 documents/epub/standard-ebooks/r-austin-freeman_the-mystery-of-31-new-inn.epub create mode 100644 documents/epub/standard-ebooks/richard-hughes_a-high-wind-in-jamaica.epub create mode 100644 documents/epub/standard-ebooks/ring-lardner_jack-keefe-stories.epub create mode 100644 documents/epub/standard-ebooks/ring-lardner_my-four-weeks-in-france.epub create mode 100644 documents/epub/standard-ebooks/robert-e-howard_conan-stories.epub create mode 100644 documents/epub/standard-ebooks/rufus-king_murder-by-the-clock.epub create mode 100644 documents/epub/standard-ebooks/s-fowler-wright_the-world-below.epub create mode 100644 documents/epub/standard-ebooks/sigrid-undset_jenny_w-emme.epub create mode 100644 documents/epub/standard-ebooks/stella-benson_the-faraway-bride.epub create mode 100644 documents/epub/standard-ebooks/w-n-p-barbellion_the-journal-of-a-disappointed-man.epub create mode 100644 documents/epub/standard-ebooks/w-somerset-maugham_cakes-and-ale.epub create mode 100644 documents/epub/standard-ebooks/wilkie-collins_the-haunted-hotel.epub create mode 100644 documents/epub/standard-ebooks/wilkie-collins_the-law-and-the-lady.epub create mode 100644 documents/epub/standard-ebooks/william-faulkner_as-i-lay-dying.epub create mode 100644 documents/epub/standard-ebooks/william-gerhardie_futility.epub create mode 100644 documents/epub/standard-ebooks/william-james_the-varieties-of-religious-experience.epub create mode 100644 documents/epub/standard-ebooks/william-morris_the-roots-of-the-mountains.epub create mode 100644 documents/epub/standard-ebooks/william-morris_the-sundering-flood.epub create mode 100644 documents/epub/standard-ebooks/zeami-motokiyo_plays_various-translators.epub create mode 100644 scrape/epub_standard_ebooks.py diff --git a/ATTRIBUTION.json b/ATTRIBUTION.json index b552de5..234da84 100644 --- a/ATTRIBUTION.json +++ b/ATTRIBUTION.json @@ -96,5 +96,21 @@ ], "donated": "2026-04-01", "notes": "Test fixtures for the Apache POI HWPF Word processor module" + }, + { + "format": "epub", + "path": "documents/epub/standard-ebooks/*.epub", + "title": "Standard Ebooks Collection", + "author": "Various (original works in public domain; ebook production by Standard Ebooks contributors)", + "license": "CC0-1.0", + "source": "https://standardebooks.org", + "tags": [ + "standard-ebooks", + "epub", + "public-domain", + "literature" + ], + "donated": "2026-04-01", + "notes": "High-quality EPUB ebooks from Standard Ebooks (CC0-1.0 production, public domain source texts)" } ] diff --git a/documents/epub/standard-ebooks/a-e-w-mason_at-the-villa-rose.epub b/documents/epub/standard-ebooks/a-e-w-mason_at-the-villa-rose.epub new file mode 100644 index 0000000..83773a5 --- /dev/null +++ b/documents/epub/standard-ebooks/a-e-w-mason_at-the-villa-rose.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906b33e2b46823e76edc1b19ec779ae1b549d11a2333f1f6ae55522666a02cc8 +size 435576 diff --git a/documents/epub/standard-ebooks/agatha-christie_giants-bread.epub b/documents/epub/standard-ebooks/agatha-christie_giants-bread.epub new file mode 100644 index 0000000..8936def --- /dev/null +++ b/documents/epub/standard-ebooks/agatha-christie_giants-bread.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59858b86b46f6b247ac4eaa418b8fe48b8a857511b7fdddf907fbd941a2c2a50 +size 639416 diff --git a/documents/epub/standard-ebooks/agatha-christie_the-murder-at-the-vicarage.epub b/documents/epub/standard-ebooks/agatha-christie_the-murder-at-the-vicarage.epub new file mode 100644 index 0000000..f67f571 --- /dev/null +++ b/documents/epub/standard-ebooks/agatha-christie_the-murder-at-the-vicarage.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d45901bcdcb7656eabcde6322944faf17c01c6058615a17c6164e0f27f2c67b4 +size 735434 diff --git a/documents/epub/standard-ebooks/andre-gide_the-counterfeiters_dorothy-bussy.epub b/documents/epub/standard-ebooks/andre-gide_the-counterfeiters_dorothy-bussy.epub new file mode 100644 index 0000000..42450aa --- /dev/null +++ b/documents/epub/standard-ebooks/andre-gide_the-counterfeiters_dorothy-bussy.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a621f885fcf51c8d9f009f88c88901063efd6ede5b7e34c0ce04d2497dd87bd +size 789306 diff --git a/documents/epub/standard-ebooks/anna-katharine-green_lost-mans-lane.epub b/documents/epub/standard-ebooks/anna-katharine-green_lost-mans-lane.epub new file mode 100644 index 0000000..7e90fe5 --- /dev/null +++ b/documents/epub/standard-ebooks/anna-katharine-green_lost-mans-lane.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6fa912dce6204f95d0b52849d252bfbd2488c09e6840f700a0c8f69ed79528e +size 625138 diff --git a/documents/epub/standard-ebooks/anonymous_barlaam-and-ioasaph_george-ratcliffe-woodward_harold-mattingly.epub b/documents/epub/standard-ebooks/anonymous_barlaam-and-ioasaph_george-ratcliffe-woodward_harold-mattingly.epub new file mode 100644 index 0000000..badcf73 --- /dev/null +++ b/documents/epub/standard-ebooks/anonymous_barlaam-and-ioasaph_george-ratcliffe-woodward_harold-mattingly.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f378dff5acf15444b15721b78cbe38ecbb50a019db7a9b50441d9e720134bc +size 605023 diff --git a/documents/epub/standard-ebooks/anonymous_sir-gawain-and-the-green-knight_s-o-andrew.epub b/documents/epub/standard-ebooks/anonymous_sir-gawain-and-the-green-knight_s-o-andrew.epub new file mode 100644 index 0000000..b7a238f --- /dev/null +++ b/documents/epub/standard-ebooks/anonymous_sir-gawain-and-the-green-knight_s-o-andrew.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d2c7a6d7e5074c9107f7822f79e0e441ede0393f5bcf2e0b49bb5bf8527130 +size 475105 diff --git a/documents/epub/standard-ebooks/anonymous_the-life-of-lazarillo-de-tormes_clements-markham.epub b/documents/epub/standard-ebooks/anonymous_the-life-of-lazarillo-de-tormes_clements-markham.epub new file mode 100644 index 0000000..1819626 --- /dev/null +++ b/documents/epub/standard-ebooks/anonymous_the-life-of-lazarillo-de-tormes_clements-markham.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1a6aed9d5caab13077657817d3ca1febcfe4731a61d2f9fffeb21bc149471d +size 712993 diff --git a/documents/epub/standard-ebooks/anthony-berkeley_the-layton-court-mystery.epub b/documents/epub/standard-ebooks/anthony-berkeley_the-layton-court-mystery.epub new file mode 100644 index 0000000..45b6469 --- /dev/null +++ b/documents/epub/standard-ebooks/anthony-berkeley_the-layton-court-mystery.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766c1c1317bdb97fd0723598264674eb75b4ac6822cb1c69f19bd585e61dce32 +size 520543 diff --git a/documents/epub/standard-ebooks/arnold-bennett_anna-of-the-five-towns.epub b/documents/epub/standard-ebooks/arnold-bennett_anna-of-the-five-towns.epub new file mode 100644 index 0000000..064d300 --- /dev/null +++ b/documents/epub/standard-ebooks/arnold-bennett_anna-of-the-five-towns.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f521d904016e1c9d062ed74658a534ee9535a502421585658d1714f3fa5ee63f +size 561425 diff --git a/documents/epub/standard-ebooks/arthur-ransome_swallows-and-amazons.epub b/documents/epub/standard-ebooks/arthur-ransome_swallows-and-amazons.epub new file mode 100644 index 0000000..89bf6f3 --- /dev/null +++ b/documents/epub/standard-ebooks/arthur-ransome_swallows-and-amazons.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5284da068fd42c53be50cdb9c720674497bfba553f94dff89a359447cd35e3f7 +size 876631 diff --git a/documents/epub/standard-ebooks/augustine-of-hippo_the-city-of-god_marcus-dods_george-wilson_j-j-smith.epub b/documents/epub/standard-ebooks/augustine-of-hippo_the-city-of-god_marcus-dods_george-wilson_j-j-smith.epub new file mode 100644 index 0000000..c5fcd72 --- /dev/null +++ b/documents/epub/standard-ebooks/augustine-of-hippo_the-city-of-god_marcus-dods_george-wilson_j-j-smith.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a307f91acbfaee5cddc9a8bea0288f7dcd4b34492fd06b43e7b7d74f2e4d6d1c +size 1498639 diff --git a/documents/epub/standard-ebooks/c-s-forester_payment-deferred.epub b/documents/epub/standard-ebooks/c-s-forester_payment-deferred.epub new file mode 100644 index 0000000..e0ac12c --- /dev/null +++ b/documents/epub/standard-ebooks/c-s-forester_payment-deferred.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce65ad910bd482011b2e9b020742f3212c2e33fdde333ea3e48bc141c4476e0d +size 677521 diff --git a/documents/epub/standard-ebooks/carolyn-keene_the-bungalow-mystery.epub b/documents/epub/standard-ebooks/carolyn-keene_the-bungalow-mystery.epub new file mode 100644 index 0000000..b256a9b --- /dev/null +++ b/documents/epub/standard-ebooks/carolyn-keene_the-bungalow-mystery.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eca8779c46c33b50561e0563048bf9ce19c667cab009151e6546223f2970de6 +size 550246 diff --git a/documents/epub/standard-ebooks/carolyn-keene_the-hidden-staircase.epub b/documents/epub/standard-ebooks/carolyn-keene_the-hidden-staircase.epub new file mode 100644 index 0000000..ed2590f --- /dev/null +++ b/documents/epub/standard-ebooks/carolyn-keene_the-hidden-staircase.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:389ddd28857a41790f02b100bee6667223692884d66fe5817fbd145041bb78ec +size 617338 diff --git a/documents/epub/standard-ebooks/carolyn-keene_the-mystery-at-lilac-inn.epub b/documents/epub/standard-ebooks/carolyn-keene_the-mystery-at-lilac-inn.epub new file mode 100644 index 0000000..2ca628e --- /dev/null +++ b/documents/epub/standard-ebooks/carolyn-keene_the-mystery-at-lilac-inn.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ed122f52c0c6898d19e979b95ae96a0bba5f7e22adfafb20c73dc0f64e7362 +size 619797 diff --git a/documents/epub/standard-ebooks/carolyn-keene_the-secret-of-the-old-clock.epub b/documents/epub/standard-ebooks/carolyn-keene_the-secret-of-the-old-clock.epub new file mode 100644 index 0000000..66169bf --- /dev/null +++ b/documents/epub/standard-ebooks/carolyn-keene_the-secret-of-the-old-clock.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8affa79221bc7fc3e1639db5e30fa0b9d31904d909aa31f2ab1007a0f0670479 +size 409524 diff --git a/documents/epub/standard-ebooks/charles-a-lindbergh_we.epub b/documents/epub/standard-ebooks/charles-a-lindbergh_we.epub new file mode 100644 index 0000000..9752113 --- /dev/null +++ b/documents/epub/standard-ebooks/charles-a-lindbergh_we.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60046d79669e48437967f694ac02c61902937b8cee5129b8b3ee0cdd3c4d79c7 +size 3446124 diff --git a/documents/epub/standard-ebooks/charles-dickens_our-mutual-friend.epub b/documents/epub/standard-ebooks/charles-dickens_our-mutual-friend.epub new file mode 100644 index 0000000..9f04109 --- /dev/null +++ b/documents/epub/standard-ebooks/charles-dickens_our-mutual-friend.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd18b4d8816bfc3dc8b1662d3050c6597b2b7e7a535306b46cc0213feb8ec9fd +size 1256185 diff --git a/documents/epub/standard-ebooks/clark-ashton-smith_short-fiction.epub b/documents/epub/standard-ebooks/clark-ashton-smith_short-fiction.epub new file mode 100644 index 0000000..ecd6c9a --- /dev/null +++ b/documents/epub/standard-ebooks/clark-ashton-smith_short-fiction.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebedc21cee9ab65a89f4a29851dfd1674e9810dcd7fc00689d6cc93ef85c5f1b +size 645885 diff --git a/documents/epub/standard-ebooks/claude-mckay_home-to-harlem.epub b/documents/epub/standard-ebooks/claude-mckay_home-to-harlem.epub new file mode 100644 index 0000000..5243bc9 --- /dev/null +++ b/documents/epub/standard-ebooks/claude-mckay_home-to-harlem.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e522058ca9b24747ed35a6d72501f73c088c0ec553f762471a9ff0396be58279 +size 306797 diff --git a/documents/epub/standard-ebooks/daphne-du-maurier_short-fiction.epub b/documents/epub/standard-ebooks/daphne-du-maurier_short-fiction.epub new file mode 100644 index 0000000..6a11e3c --- /dev/null +++ b/documents/epub/standard-ebooks/daphne-du-maurier_short-fiction.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b02eccd6c9586332af43a487c5d1fd7f3674fa76e2df2d7ef48e7d04d27bba +size 254410 diff --git a/documents/epub/standard-ebooks/dashiell-hammett_continental-op-stories.epub b/documents/epub/standard-ebooks/dashiell-hammett_continental-op-stories.epub new file mode 100644 index 0000000..55939a3 --- /dev/null +++ b/documents/epub/standard-ebooks/dashiell-hammett_continental-op-stories.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ff7c8f9bea0e2e3c19289a89b1898a6b7f4df76201e6dff9a19829317cac6c1 +size 929019 diff --git a/documents/epub/standard-ebooks/dashiell-hammett_the-maltese-falcon.epub b/documents/epub/standard-ebooks/dashiell-hammett_the-maltese-falcon.epub new file mode 100644 index 0000000..fcdd444 --- /dev/null +++ b/documents/epub/standard-ebooks/dashiell-hammett_the-maltese-falcon.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8123e7919d41a1b77e54eec0b4772ea9777984b479fa1d04207ba017d585d78e +size 976564 diff --git a/documents/epub/standard-ebooks/dorothy-l-sayers_robert-eustace_the-documents-in-the-case.epub b/documents/epub/standard-ebooks/dorothy-l-sayers_robert-eustace_the-documents-in-the-case.epub new file mode 100644 index 0000000..7cb9fb9 --- /dev/null +++ b/documents/epub/standard-ebooks/dorothy-l-sayers_robert-eustace_the-documents-in-the-case.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e46756babe41f415a74d8cac29e1fb77c481fccf00e32c8c8aab2052e6ced8 +size 574228 diff --git a/documents/epub/standard-ebooks/dorothy-l-sayers_strong-poison.epub b/documents/epub/standard-ebooks/dorothy-l-sayers_strong-poison.epub new file mode 100644 index 0000000..9a8795d --- /dev/null +++ b/documents/epub/standard-ebooks/dorothy-l-sayers_strong-poison.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4090575369670d39363b519fba77f43742366349f43c1cb60dd261225c7a919 +size 605593 diff --git a/documents/epub/standard-ebooks/e-h-young_miss-mole.epub b/documents/epub/standard-ebooks/e-h-young_miss-mole.epub new file mode 100644 index 0000000..ae2a100 --- /dev/null +++ b/documents/epub/standard-ebooks/e-h-young_miss-mole.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8466697b0fe7e2db08ba66009beecdabb3b86d0977b326f332666c0dfb1ee809 +size 1166593 diff --git a/documents/epub/standard-ebooks/e-r-eddison_styrbiorn-the-strong.epub b/documents/epub/standard-ebooks/e-r-eddison_styrbiorn-the-strong.epub new file mode 100644 index 0000000..004d78d --- /dev/null +++ b/documents/epub/standard-ebooks/e-r-eddison_styrbiorn-the-strong.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a9f4995363b2ee53098590b718a4f383176f934077390ca55f96dd323ccbdb +size 361318 diff --git a/documents/epub/standard-ebooks/edgar-rice-burroughs_the-outlaw-of-torn.epub b/documents/epub/standard-ebooks/edgar-rice-burroughs_the-outlaw-of-torn.epub new file mode 100644 index 0000000..fde10b1 --- /dev/null +++ b/documents/epub/standard-ebooks/edgar-rice-burroughs_the-outlaw-of-torn.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f8f34110f7f2046aaeda1e139a9e0b9e782c362d37cb43389691edc15cc23b +size 638132 diff --git a/documents/epub/standard-ebooks/edna-ferber_cimarron.epub b/documents/epub/standard-ebooks/edna-ferber_cimarron.epub new file mode 100644 index 0000000..5524ae9 --- /dev/null +++ b/documents/epub/standard-ebooks/edna-ferber_cimarron.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:413dd24bcdaef519d68a41f5448b3df42085eb27bd8d4d65ecd343e830c2d949 +size 678149 diff --git a/documents/epub/standard-ebooks/ellis-parker-butler_jibby-jones.epub b/documents/epub/standard-ebooks/ellis-parker-butler_jibby-jones.epub new file mode 100644 index 0000000..ab13869 --- /dev/null +++ b/documents/epub/standard-ebooks/ellis-parker-butler_jibby-jones.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efe36496c9d659a7b6f0f93ac43cea69c73d3ee9a13b4d14f0211c9d8e111e41 +size 637421 diff --git a/documents/epub/standard-ebooks/evelyn-waugh_vile-bodies.epub b/documents/epub/standard-ebooks/evelyn-waugh_vile-bodies.epub new file mode 100644 index 0000000..abdd305 --- /dev/null +++ b/documents/epub/standard-ebooks/evelyn-waugh_vile-bodies.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f742dadc090d912bd85e3f2789318153d64209e64ff2948dd54ff3913746788 +size 685405 diff --git a/documents/epub/standard-ebooks/ford-madox-ford_privy-seal.epub b/documents/epub/standard-ebooks/ford-madox-ford_privy-seal.epub new file mode 100644 index 0000000..572111f --- /dev/null +++ b/documents/epub/standard-ebooks/ford-madox-ford_privy-seal.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2671988429d6c6d93a45cbce288e75f6bd1e2a0ae4585e5eeb0002417d3cb1e5 +size 565737 diff --git a/documents/epub/standard-ebooks/franz-kafka_the-castle_willa-muir_edwin-muir.epub b/documents/epub/standard-ebooks/franz-kafka_the-castle_willa-muir_edwin-muir.epub new file mode 100644 index 0000000..fc01dff --- /dev/null +++ b/documents/epub/standard-ebooks/franz-kafka_the-castle_willa-muir_edwin-muir.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d347e94542709276689a7556d3ebc0dec86850c279148053020b1ae196aa57 +size 892865 diff --git a/documents/epub/standard-ebooks/freeman-wills-crofts_the-cheyne-mystery.epub b/documents/epub/standard-ebooks/freeman-wills-crofts_the-cheyne-mystery.epub new file mode 100644 index 0000000..5787a94 --- /dev/null +++ b/documents/epub/standard-ebooks/freeman-wills-crofts_the-cheyne-mystery.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce400fa6d464b9a90fbd6adbed545bf03f88928eec12d14e3592d4edc13d4898 +size 588273 diff --git a/documents/epub/standard-ebooks/friedrich-spielhagen_the-breaking-of-the-storm_s-e-a-h-stephenson.epub b/documents/epub/standard-ebooks/friedrich-spielhagen_the-breaking-of-the-storm_s-e-a-h-stephenson.epub new file mode 100644 index 0000000..331971c --- /dev/null +++ b/documents/epub/standard-ebooks/friedrich-spielhagen_the-breaking-of-the-storm_s-e-a-h-stephenson.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d22ba0783bd5244050d0feab54d99c3e815caa58601c36fcc008a47bd3fabf6 +size 990316 diff --git a/documents/epub/standard-ebooks/g-a-henty_beric-the-briton.epub b/documents/epub/standard-ebooks/g-a-henty_beric-the-briton.epub new file mode 100644 index 0000000..a7a07f5 --- /dev/null +++ b/documents/epub/standard-ebooks/g-a-henty_beric-the-briton.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f23c0bccaaa74be799fced201573c2071ba05bb243dd33a6a979404d719beadf +size 678990 diff --git a/documents/epub/standard-ebooks/g-d-h-cole_the-brooklyn-murders.epub b/documents/epub/standard-ebooks/g-d-h-cole_the-brooklyn-murders.epub new file mode 100644 index 0000000..19414b6 --- /dev/null +++ b/documents/epub/standard-ebooks/g-d-h-cole_the-brooklyn-murders.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1206ca9b7f65eca1e07cec85c0082fa8c5a8a770ed1b46a39cf8ca1470653cf1 +size 472603 diff --git a/documents/epub/standard-ebooks/gene-stratton-porter_freckles.epub b/documents/epub/standard-ebooks/gene-stratton-porter_freckles.epub new file mode 100644 index 0000000..6b2fe92 --- /dev/null +++ b/documents/epub/standard-ebooks/gene-stratton-porter_freckles.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfafe213f37f00f50600e2ae447ecd357db378b11eb1bb590fe8870ab8dd8bcd +size 692728 diff --git a/documents/epub/standard-ebooks/geoffrey-dennis_the-end-of-the-world.epub b/documents/epub/standard-ebooks/geoffrey-dennis_the-end-of-the-world.epub new file mode 100644 index 0000000..5969d3b --- /dev/null +++ b/documents/epub/standard-ebooks/geoffrey-dennis_the-end-of-the-world.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f41db2a9aa70d14cfdbf3ffbf6c86205a4db41599552571da495a4851f801f +size 499804 diff --git a/documents/epub/standard-ebooks/george-bernard-shaw_back-to-methuselah.epub b/documents/epub/standard-ebooks/george-bernard-shaw_back-to-methuselah.epub new file mode 100644 index 0000000..f370824 --- /dev/null +++ b/documents/epub/standard-ebooks/george-bernard-shaw_back-to-methuselah.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277ef243caf900464285a1d8cacc489b235ce96a6239c4b00de3176e2b508291 +size 706231 diff --git a/documents/epub/standard-ebooks/george-bernard-shaw_the-apple-cart.epub b/documents/epub/standard-ebooks/george-bernard-shaw_the-apple-cart.epub new file mode 100644 index 0000000..8f3f235 --- /dev/null +++ b/documents/epub/standard-ebooks/george-bernard-shaw_the-apple-cart.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6808f70e92bde9dc1279f8d90da4880eca1cee9b8cee6473d39e7757865d2608 +size 503446 diff --git a/documents/epub/standard-ebooks/george-macdonald_poetry.epub b/documents/epub/standard-ebooks/george-macdonald_poetry.epub new file mode 100644 index 0000000..a1414e2 --- /dev/null +++ b/documents/epub/standard-ebooks/george-macdonald_poetry.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c804832429ff4be0906eddea45ded6d4a9968205b108f44f57e1d5eb5a32e3d +size 789183 diff --git a/documents/epub/standard-ebooks/george-macdonald_short-fiction.epub b/documents/epub/standard-ebooks/george-macdonald_short-fiction.epub new file mode 100644 index 0000000..b54ac13 --- /dev/null +++ b/documents/epub/standard-ebooks/george-macdonald_short-fiction.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd96068a7a1d31a49171e0bf524212680771630c721a4444f551e3aff36c03b0 +size 1031003 diff --git a/documents/epub/standard-ebooks/george-macdonald_the-portent.epub b/documents/epub/standard-ebooks/george-macdonald_the-portent.epub new file mode 100644 index 0000000..ecb6094 --- /dev/null +++ b/documents/epub/standard-ebooks/george-macdonald_the-portent.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20cb94d4c54b8e14c8d755e23be85b42397c207d347b914e7af5b2791676d1dc +size 589898 diff --git a/documents/epub/standard-ebooks/george-macdonald_the-princess-and-curdie.epub b/documents/epub/standard-ebooks/george-macdonald_the-princess-and-curdie.epub new file mode 100644 index 0000000..5a88489 --- /dev/null +++ b/documents/epub/standard-ebooks/george-macdonald_the-princess-and-curdie.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d12edd975ea3992eb78a2b17fd4bc2a070812b31939ff9e092d29298c7da185 +size 635639 diff --git a/documents/epub/standard-ebooks/georgette-heyer_the-black-moth.epub b/documents/epub/standard-ebooks/georgette-heyer_the-black-moth.epub new file mode 100644 index 0000000..ea5caf7 --- /dev/null +++ b/documents/epub/standard-ebooks/georgette-heyer_the-black-moth.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c33b2b162043d31bf3d0312508ed6870ca87be4e8e889dd294d3134958148e43 +size 699826 diff --git a/documents/epub/standard-ebooks/h-g-wells_in-the-days-of-the-comet.epub b/documents/epub/standard-ebooks/h-g-wells_in-the-days-of-the-comet.epub new file mode 100644 index 0000000..eefd418 --- /dev/null +++ b/documents/epub/standard-ebooks/h-g-wells_in-the-days-of-the-comet.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae05cecb9d88d5b313a25fbeaf03c459cc36a5e5f0f0f40556956083e9f5139 +size 556184 diff --git a/documents/epub/standard-ebooks/h-m-tomlinson_gallions-reach.epub b/documents/epub/standard-ebooks/h-m-tomlinson_gallions-reach.epub new file mode 100644 index 0000000..27a85c5 --- /dev/null +++ b/documents/epub/standard-ebooks/h-m-tomlinson_gallions-reach.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256df6b9d4512a1c96d8378b1e37bf87ffca007aac41853cda60d09de966f925 +size 604144 diff --git a/documents/epub/standard-ebooks/hans-jakob-christoffel-von-grimmelshausen_the-adventurous-simplicissimus_alfred-thomas-scrope-goodrick.epub b/documents/epub/standard-ebooks/hans-jakob-christoffel-von-grimmelshausen_the-adventurous-simplicissimus_alfred-thomas-scrope-goodrick.epub new file mode 100644 index 0000000..d64f1fd --- /dev/null +++ b/documents/epub/standard-ebooks/hans-jakob-christoffel-von-grimmelshausen_the-adventurous-simplicissimus_alfred-thomas-scrope-goodrick.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a31ef417c3c8255fd630ca1eb27258e070732721113d00a5ddc1e758386e6c6 +size 1332708 diff --git a/documents/epub/standard-ebooks/harry-harrison_planet-of-the-damned.epub b/documents/epub/standard-ebooks/harry-harrison_planet-of-the-damned.epub new file mode 100644 index 0000000..44726cc --- /dev/null +++ b/documents/epub/standard-ebooks/harry-harrison_planet-of-the-damned.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3658235f78661f639b7630b7d212567f6aacf0bd6298572bc2506afea0ac4ca1 +size 764586 diff --git a/documents/epub/standard-ebooks/henry-van-dyke-jr_poetry.epub b/documents/epub/standard-ebooks/henry-van-dyke-jr_poetry.epub new file mode 100644 index 0000000..b8ce1ff --- /dev/null +++ b/documents/epub/standard-ebooks/henry-van-dyke-jr_poetry.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2945a302f92a64323f45d20728d972719f9052bd7e132be09df63f8152aabfd +size 480747 diff --git a/documents/epub/standard-ebooks/hilaire-belloc_the-cruise-of-the-nona.epub b/documents/epub/standard-ebooks/hilaire-belloc_the-cruise-of-the-nona.epub new file mode 100644 index 0000000..7d1ec35 --- /dev/null +++ b/documents/epub/standard-ebooks/hilaire-belloc_the-cruise-of-the-nona.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd79392fc714f915ba1b12762ebdb51fe034dadbb0642368d0d79c1db637bc0e +size 655799 diff --git a/documents/epub/standard-ebooks/hugh-lofting_the-story-of-doctor-dolittle.epub b/documents/epub/standard-ebooks/hugh-lofting_the-story-of-doctor-dolittle.epub new file mode 100644 index 0000000..cf225ac --- /dev/null +++ b/documents/epub/standard-ebooks/hugh-lofting_the-story-of-doctor-dolittle.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9538ad3419aadd172b8527ad5cebf664e54fb9b2fb5421c49ae7ac615a34d9ba +size 444449 diff --git a/documents/epub/standard-ebooks/ivan-turgenev_on-the-eve_constance-garnett.epub b/documents/epub/standard-ebooks/ivan-turgenev_on-the-eve_constance-garnett.epub new file mode 100644 index 0000000..c36b11f --- /dev/null +++ b/documents/epub/standard-ebooks/ivan-turgenev_on-the-eve_constance-garnett.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312df0f0019c48da25a91dc7a4dced5544c35453b31622bf782669f7eb62281c +size 374294 diff --git a/documents/epub/standard-ebooks/j-j-connington_the-case-with-nine-solutions.epub b/documents/epub/standard-ebooks/j-j-connington_the-case-with-nine-solutions.epub new file mode 100644 index 0000000..388b83d --- /dev/null +++ b/documents/epub/standard-ebooks/j-j-connington_the-case-with-nine-solutions.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b027597bf2e8aff70ffe42954ea2cd4d822fc9d30a5f5ce58648f2771da36f34 +size 771667 diff --git a/documents/epub/standard-ebooks/j-j-connington_tragedy-at-ravensthorpe.epub b/documents/epub/standard-ebooks/j-j-connington_tragedy-at-ravensthorpe.epub new file mode 100644 index 0000000..8f5b7f1 --- /dev/null +++ b/documents/epub/standard-ebooks/j-j-connington_tragedy-at-ravensthorpe.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec53d91c7c52bc902fed1180e6090a514e551103c5b863a95e80666bc29da2a +size 296836 diff --git a/documents/epub/standard-ebooks/jessie-redmon-fauset_plum-bun.epub b/documents/epub/standard-ebooks/jessie-redmon-fauset_plum-bun.epub new file mode 100644 index 0000000..892dfca --- /dev/null +++ b/documents/epub/standard-ebooks/jessie-redmon-fauset_plum-bun.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b2d314aa634ce6db559bc1719aa47684614fcb6705cba3dbe59a42e7ba8cf8 +size 565347 diff --git a/documents/epub/standard-ebooks/john-buchan_the-powerhouse.epub b/documents/epub/standard-ebooks/john-buchan_the-powerhouse.epub new file mode 100644 index 0000000..eca3cc5 --- /dev/null +++ b/documents/epub/standard-ebooks/john-buchan_the-powerhouse.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c756605f4f8662b12517ce6f313d72d9349445ccfcb6ab5104de444def85625a +size 596181 diff --git a/documents/epub/standard-ebooks/john-t-mcintyre_ashton-kirk-investigator.epub b/documents/epub/standard-ebooks/john-t-mcintyre_ashton-kirk-investigator.epub new file mode 100644 index 0000000..7558b05 --- /dev/null +++ b/documents/epub/standard-ebooks/john-t-mcintyre_ashton-kirk-investigator.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:296e7051448f4373160873dc8bf7e8fb5f695b04e89b9174c275b49b774484a7 +size 570208 diff --git a/documents/epub/standard-ebooks/jorge-isaacs_maria_rollo-ogden.epub b/documents/epub/standard-ebooks/jorge-isaacs_maria_rollo-ogden.epub new file mode 100644 index 0000000..36d530e --- /dev/null +++ b/documents/epub/standard-ebooks/jorge-isaacs_maria_rollo-ogden.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd835538268e54428ac73bb4f8e91be0a1cbd829876756a29b0aa57e1558d85a +size 705350 diff --git a/documents/epub/standard-ebooks/joseph-conrad_chance.epub b/documents/epub/standard-ebooks/joseph-conrad_chance.epub new file mode 100644 index 0000000..ab097de --- /dev/null +++ b/documents/epub/standard-ebooks/joseph-conrad_chance.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:484b0c17bd4da39d8a39b75471316ce7b86c4b1493fc5c82a94b7d57348b449b +size 675610 diff --git a/documents/epub/standard-ebooks/joseph-conrad_ford-madox-ford_romance.epub b/documents/epub/standard-ebooks/joseph-conrad_ford-madox-ford_romance.epub new file mode 100644 index 0000000..0010fca --- /dev/null +++ b/documents/epub/standard-ebooks/joseph-conrad_ford-madox-ford_romance.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec4103c8fd80b004011941e2f1a76988535b717853deeddbf9dfbe356cfb72bc +size 689878 diff --git a/documents/epub/standard-ebooks/josiah-henson_father-hensons-story-of-his-own-life.epub b/documents/epub/standard-ebooks/josiah-henson_father-hensons-story-of-his-own-life.epub new file mode 100644 index 0000000..0aa9440 --- /dev/null +++ b/documents/epub/standard-ebooks/josiah-henson_father-hensons-story-of-his-own-life.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82e72f4e34c9c1ae8839056390fb254838cf0704596457b16f84d978315bdf4 +size 540068 diff --git a/documents/epub/standard-ebooks/langston-hughes_not-without-laughter.epub b/documents/epub/standard-ebooks/langston-hughes_not-without-laughter.epub new file mode 100644 index 0000000..953dba2 --- /dev/null +++ b/documents/epub/standard-ebooks/langston-hughes_not-without-laughter.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07decd3c08f7f229533f265862f2d76cb96c28ab9882ea02c4996fc7b054f7b6 +size 406221 diff --git a/documents/epub/standard-ebooks/louis-joseph-vance_the-lone-wolf.epub b/documents/epub/standard-ebooks/louis-joseph-vance_the-lone-wolf.epub new file mode 100644 index 0000000..c1cbade --- /dev/null +++ b/documents/epub/standard-ebooks/louis-joseph-vance_the-lone-wolf.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f26f437a57523610ead04625ad1599114ec92c8971b5923f8b7797808ca34f +size 498182 diff --git a/documents/epub/standard-ebooks/luis-de-camoes_the-lusiads_richard-f-burton.epub b/documents/epub/standard-ebooks/luis-de-camoes_the-lusiads_richard-f-burton.epub new file mode 100644 index 0000000..50de6fa --- /dev/null +++ b/documents/epub/standard-ebooks/luis-de-camoes_the-lusiads_richard-f-burton.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969985c77ca94337ee4b3852f6f41f272c1170832b0760e575c7513901f4f48e +size 793349 diff --git a/documents/epub/standard-ebooks/m-e-braddon_the-trail-of-the-serpent.epub b/documents/epub/standard-ebooks/m-e-braddon_the-trail-of-the-serpent.epub new file mode 100644 index 0000000..f5021f7 --- /dev/null +++ b/documents/epub/standard-ebooks/m-e-braddon_the-trail-of-the-serpent.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4f2a75f09fc687be0ad53b0147e8d9e7c70f619acf1e5468da3f1806d03a14 +size 801946 diff --git a/documents/epub/standard-ebooks/m-e-braddon_the-venetians.epub b/documents/epub/standard-ebooks/m-e-braddon_the-venetians.epub new file mode 100644 index 0000000..d235a71 --- /dev/null +++ b/documents/epub/standard-ebooks/m-e-braddon_the-venetians.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c773a6bbc7b870e396d6b26ac62c4c176477ed5154349cc89fbc6f2bcb3ef0b +size 863428 diff --git a/documents/epub/standard-ebooks/margaret-ayer-barnes_years-of-grace.epub b/documents/epub/standard-ebooks/margaret-ayer-barnes_years-of-grace.epub new file mode 100644 index 0000000..bce5eeb --- /dev/null +++ b/documents/epub/standard-ebooks/margaret-ayer-barnes_years-of-grace.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d54cc8eb171aade81ace507ad1df27c7b7e6da2ce9aa98f6125d0e38731c1b +size 1060196 diff --git a/documents/epub/standard-ebooks/margaret-oliphant_the-ladies-lindores.epub b/documents/epub/standard-ebooks/margaret-oliphant_the-ladies-lindores.epub new file mode 100644 index 0000000..b1efbed --- /dev/null +++ b/documents/epub/standard-ebooks/margaret-oliphant_the-ladies-lindores.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f98935895f3df50689db205932849dbc8fe507e2d3413ea6de94eee61102ec3 +size 768954 diff --git a/documents/epub/standard-ebooks/mary-butts_armed-with-madness.epub b/documents/epub/standard-ebooks/mary-butts_armed-with-madness.epub new file mode 100644 index 0000000..a37421d --- /dev/null +++ b/documents/epub/standard-ebooks/mary-butts_armed-with-madness.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e499379434c1a08614595e577d416bcceed0d13e3f6a3f6479c5a8c3fd205ce +size 395354 diff --git a/documents/epub/standard-ebooks/mary-de-morgan_on-a-pincushion.epub b/documents/epub/standard-ebooks/mary-de-morgan_on-a-pincushion.epub new file mode 100644 index 0000000..45d8691 --- /dev/null +++ b/documents/epub/standard-ebooks/mary-de-morgan_on-a-pincushion.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f087ade4bafcff262cb394aa76c2dc05b533c50c56bc0a68ca43dcabb4998d +size 434610 diff --git a/documents/epub/standard-ebooks/mary-de-morgan_the-necklace-of-princess-fiorimonde.epub b/documents/epub/standard-ebooks/mary-de-morgan_the-necklace-of-princess-fiorimonde.epub new file mode 100644 index 0000000..c278144 --- /dev/null +++ b/documents/epub/standard-ebooks/mary-de-morgan_the-necklace-of-princess-fiorimonde.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba31b55483439b73b938152100666f4ac00bbec42645a8fc83657d4cfdb5bcf1 +size 453412 diff --git a/documents/epub/standard-ebooks/mary-de-morgan_the-windfairies.epub b/documents/epub/standard-ebooks/mary-de-morgan_the-windfairies.epub new file mode 100644 index 0000000..73d2dbe --- /dev/null +++ b/documents/epub/standard-ebooks/mary-de-morgan_the-windfairies.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1218600f540e2b84f2953c60e56722071142900172601a256c2df8601a9808bf +size 463077 diff --git a/documents/epub/standard-ebooks/mary-shelley_short-fiction.epub b/documents/epub/standard-ebooks/mary-shelley_short-fiction.epub new file mode 100644 index 0000000..4a4bc77 --- /dev/null +++ b/documents/epub/standard-ebooks/mary-shelley_short-fiction.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:961fddac2330e5a2030d2602a58ea8940f12910950cb6cf7d2976d9320a91f34 +size 641012 diff --git a/documents/epub/standard-ebooks/may-sinclair_mary-olivier-a-life.epub b/documents/epub/standard-ebooks/may-sinclair_mary-olivier-a-life.epub new file mode 100644 index 0000000..dfa5435 --- /dev/null +++ b/documents/epub/standard-ebooks/may-sinclair_mary-olivier-a-life.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cc345c979d5676996fa8788c59af1e47cb5cbd5f0cce6b792102882c1ecbb1 +size 515005 diff --git a/documents/epub/standard-ebooks/metta-victor_the-dead-letter.epub b/documents/epub/standard-ebooks/metta-victor_the-dead-letter.epub new file mode 100644 index 0000000..80f3076 --- /dev/null +++ b/documents/epub/standard-ebooks/metta-victor_the-dead-letter.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d7b61b1549a3dbb45c8b038a7a752810f52be11ac475e9893e07def22f0263 +size 892443 diff --git a/documents/epub/standard-ebooks/mignon-g-eberhart_the-patient-in-room-18.epub b/documents/epub/standard-ebooks/mignon-g-eberhart_the-patient-in-room-18.epub new file mode 100644 index 0000000..19243b3 --- /dev/null +++ b/documents/epub/standard-ebooks/mignon-g-eberhart_the-patient-in-room-18.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3653e76cfa3c3dcca7dd76aea44b607a8c4124b4a24dae13010e8958ff9c875f +size 620863 diff --git a/documents/epub/standard-ebooks/nikolai-gogol_the-inspector-general_thomas-seltzer.epub b/documents/epub/standard-ebooks/nikolai-gogol_the-inspector-general_thomas-seltzer.epub new file mode 100644 index 0000000..88deaf2 --- /dev/null +++ b/documents/epub/standard-ebooks/nikolai-gogol_the-inspector-general_thomas-seltzer.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b916d7b57b43d9d570b756b854f943183a2bda8da797adfd88cb15e7f9ad59 +size 419606 diff --git a/documents/epub/standard-ebooks/noah-brooks_our-baseball-club-and-how-it-won-the-championship.epub b/documents/epub/standard-ebooks/noah-brooks_our-baseball-club-and-how-it-won-the-championship.epub new file mode 100644 index 0000000..c3d2dd7 --- /dev/null +++ b/documents/epub/standard-ebooks/noah-brooks_our-baseball-club-and-how-it-won-the-championship.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86e5b880f52a3e8c592e6db633dd99d654e0ddee041f4c5f0cb0bbad9cc8767 +size 863518 diff --git a/documents/epub/standard-ebooks/olaf-stapledon_last-and-first-men.epub b/documents/epub/standard-ebooks/olaf-stapledon_last-and-first-men.epub new file mode 100644 index 0000000..4286d54 --- /dev/null +++ b/documents/epub/standard-ebooks/olaf-stapledon_last-and-first-men.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:122937d7c9d58ed216382140f56dc041fd80290efaad89e4168b4b58ff1ba75e +size 636454 diff --git a/documents/epub/standard-ebooks/r-austin-freeman_the-mystery-of-31-new-inn.epub b/documents/epub/standard-ebooks/r-austin-freeman_the-mystery-of-31-new-inn.epub new file mode 100644 index 0000000..81be4ad --- /dev/null +++ b/documents/epub/standard-ebooks/r-austin-freeman_the-mystery-of-31-new-inn.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3965542a731240ba61f90587723fae2aae3f155503e8e969561a24203ff6872 +size 577404 diff --git a/documents/epub/standard-ebooks/richard-hughes_a-high-wind-in-jamaica.epub b/documents/epub/standard-ebooks/richard-hughes_a-high-wind-in-jamaica.epub new file mode 100644 index 0000000..e9f8f07 --- /dev/null +++ b/documents/epub/standard-ebooks/richard-hughes_a-high-wind-in-jamaica.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd975568ac1b682e4a7974799c70f280d275cc8aff6402a4d9f1d95ad164ce27 +size 466016 diff --git a/documents/epub/standard-ebooks/ring-lardner_jack-keefe-stories.epub b/documents/epub/standard-ebooks/ring-lardner_jack-keefe-stories.epub new file mode 100644 index 0000000..9199b68 --- /dev/null +++ b/documents/epub/standard-ebooks/ring-lardner_jack-keefe-stories.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a46cc4f966a230ea42db6d08addf37012fff8036037f8378968abb3ca298d2 +size 992018 diff --git a/documents/epub/standard-ebooks/ring-lardner_my-four-weeks-in-france.epub b/documents/epub/standard-ebooks/ring-lardner_my-four-weeks-in-france.epub new file mode 100644 index 0000000..0af2732 --- /dev/null +++ b/documents/epub/standard-ebooks/ring-lardner_my-four-weeks-in-france.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2147cd3e9c2f278898b918cb8092e67c80d63c1800377734af272ee82e765c0b +size 580556 diff --git a/documents/epub/standard-ebooks/robert-e-howard_conan-stories.epub b/documents/epub/standard-ebooks/robert-e-howard_conan-stories.epub new file mode 100644 index 0000000..458cca0 --- /dev/null +++ b/documents/epub/standard-ebooks/robert-e-howard_conan-stories.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39579da6bad5c84ee4061d5c4b2ad777fdf1e648115507b6ba1ba0bf4ca52e83 +size 859409 diff --git a/documents/epub/standard-ebooks/rufus-king_murder-by-the-clock.epub b/documents/epub/standard-ebooks/rufus-king_murder-by-the-clock.epub new file mode 100644 index 0000000..bac23bb --- /dev/null +++ b/documents/epub/standard-ebooks/rufus-king_murder-by-the-clock.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6d5eaf59ccccfc854859cf29faf9ea6215372e555cae9e0c2c84ce0a22985c +size 305781 diff --git a/documents/epub/standard-ebooks/s-fowler-wright_the-world-below.epub b/documents/epub/standard-ebooks/s-fowler-wright_the-world-below.epub new file mode 100644 index 0000000..f48524b --- /dev/null +++ b/documents/epub/standard-ebooks/s-fowler-wright_the-world-below.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a672dd2f6726b092fbcaf7380804e5946d5e58d3d739bbef538f46ec97be4ddc +size 432515 diff --git a/documents/epub/standard-ebooks/sigrid-undset_jenny_w-emme.epub b/documents/epub/standard-ebooks/sigrid-undset_jenny_w-emme.epub new file mode 100644 index 0000000..dfb4aa7 --- /dev/null +++ b/documents/epub/standard-ebooks/sigrid-undset_jenny_w-emme.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b869912ef6c9ed2ab2e0bd2687b4c7efafeaa7814436b96943d912f375c955 +size 603402 diff --git a/documents/epub/standard-ebooks/stella-benson_the-faraway-bride.epub b/documents/epub/standard-ebooks/stella-benson_the-faraway-bride.epub new file mode 100644 index 0000000..300c395 --- /dev/null +++ b/documents/epub/standard-ebooks/stella-benson_the-faraway-bride.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9f9a19238608d3e69ae6986d09e647d001050e524229e924b914590a88d255 +size 641225 diff --git a/documents/epub/standard-ebooks/w-n-p-barbellion_the-journal-of-a-disappointed-man.epub b/documents/epub/standard-ebooks/w-n-p-barbellion_the-journal-of-a-disappointed-man.epub new file mode 100644 index 0000000..d886bf6 --- /dev/null +++ b/documents/epub/standard-ebooks/w-n-p-barbellion_the-journal-of-a-disappointed-man.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983c21777d47b54ba25339cc434fadf213d19dab2d785bd6ab2e2e6e69d3c671 +size 764600 diff --git a/documents/epub/standard-ebooks/w-somerset-maugham_cakes-and-ale.epub b/documents/epub/standard-ebooks/w-somerset-maugham_cakes-and-ale.epub new file mode 100644 index 0000000..76e5173 --- /dev/null +++ b/documents/epub/standard-ebooks/w-somerset-maugham_cakes-and-ale.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9055332d5b8dda1d1d13ab256a753b0c018d490ff2e84593880f0a034e3599c9 +size 549871 diff --git a/documents/epub/standard-ebooks/wilkie-collins_the-haunted-hotel.epub b/documents/epub/standard-ebooks/wilkie-collins_the-haunted-hotel.epub new file mode 100644 index 0000000..ca02a09 --- /dev/null +++ b/documents/epub/standard-ebooks/wilkie-collins_the-haunted-hotel.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d170b06fa44f67ab4daeb9ffede5bcb712650346781bfceb99b248fa18f447 +size 697684 diff --git a/documents/epub/standard-ebooks/wilkie-collins_the-law-and-the-lady.epub b/documents/epub/standard-ebooks/wilkie-collins_the-law-and-the-lady.epub new file mode 100644 index 0000000..bcc4b76 --- /dev/null +++ b/documents/epub/standard-ebooks/wilkie-collins_the-law-and-the-lady.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35db48b67cb3666935137045205fb041cd241870062d4f0841b5927ba1dfb5b3 +size 856325 diff --git a/documents/epub/standard-ebooks/william-faulkner_as-i-lay-dying.epub b/documents/epub/standard-ebooks/william-faulkner_as-i-lay-dying.epub new file mode 100644 index 0000000..5af05db --- /dev/null +++ b/documents/epub/standard-ebooks/william-faulkner_as-i-lay-dying.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4779ada81cce64525feced88898147725807481f849a657793cb3e5ba2a60b +size 508497 diff --git a/documents/epub/standard-ebooks/william-gerhardie_futility.epub b/documents/epub/standard-ebooks/william-gerhardie_futility.epub new file mode 100644 index 0000000..14c3c5f --- /dev/null +++ b/documents/epub/standard-ebooks/william-gerhardie_futility.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da24bd4fbfc9c142f6e4cd1aca96f50bdad4122f160f190714508b91eb708ad +size 625483 diff --git a/documents/epub/standard-ebooks/william-james_the-varieties-of-religious-experience.epub b/documents/epub/standard-ebooks/william-james_the-varieties-of-religious-experience.epub new file mode 100644 index 0000000..dab25c4 --- /dev/null +++ b/documents/epub/standard-ebooks/william-james_the-varieties-of-religious-experience.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86791e1720cc77717ab491affc07bb5bacb0a92696b9e085fd047fd274fc6469 +size 876986 diff --git a/documents/epub/standard-ebooks/william-morris_the-roots-of-the-mountains.epub b/documents/epub/standard-ebooks/william-morris_the-roots-of-the-mountains.epub new file mode 100644 index 0000000..3db2cc9 --- /dev/null +++ b/documents/epub/standard-ebooks/william-morris_the-roots-of-the-mountains.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e5df41717ba4c2e69641f2d1991bc2e3e14e1f0a911f08a7c52d38d4359224 +size 827856 diff --git a/documents/epub/standard-ebooks/william-morris_the-sundering-flood.epub b/documents/epub/standard-ebooks/william-morris_the-sundering-flood.epub new file mode 100644 index 0000000..94f1128 --- /dev/null +++ b/documents/epub/standard-ebooks/william-morris_the-sundering-flood.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab61145d0414b0f7ce4534c7d1a2356c7a686ab00acb3a9d222c657c5c9defa6 +size 657237 diff --git a/documents/epub/standard-ebooks/zeami-motokiyo_plays_various-translators.epub b/documents/epub/standard-ebooks/zeami-motokiyo_plays_various-translators.epub new file mode 100644 index 0000000..30c5c25 --- /dev/null +++ b/documents/epub/standard-ebooks/zeami-motokiyo_plays_various-translators.epub @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899ec1be1fc52ddc59c3f3adf15202a62d56928c0947fb569e05cd2566471de9 +size 901005 diff --git a/scrape/epub_standard_ebooks.py b/scrape/epub_standard_ebooks.py new file mode 100644 index 0000000..626bca8 --- /dev/null +++ b/scrape/epub_standard_ebooks.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +import re +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) +from scrape.utils import ( + sanitize_filename, + download_file, + validate_file_format, + check_file_size, + rate_limit, + MAGIC_EPUB, +) +import requests + +DEST = Path("documents/epub/standard-ebooks") +DEST.mkdir(parents=True, exist_ok=True) +TARGET = 110 +BASE = "https://standardebooks.org" + +session = requests.Session() +session.headers["User-Agent"] = ( + "DocSpec-Corpus-Scraper/1.0 (https://github.com/docspec/documents)" +) + +ebook_paths: list[str] = [] +page = 1 +while len(ebook_paths) < TARGET: + print(f"Fetching listing page {page}...") + rate_limit(2.0) + resp = session.get(f"{BASE}/ebooks?page={page}", timeout=30) + if resp.status_code == 404: + break + resp.raise_for_status() + found = sorted(set(re.findall(r'href="(/ebooks/[a-z0-9][^"]+)"', resp.text))) + new = [ + p + for p in found + if not p.endswith("/") and "/downloads/" not in p and p.count("/") >= 2 + ] + ebook_paths.extend(new) + if not new: + break + page += 1 + +ebook_paths = list(dict.fromkeys(ebook_paths)) +print(f"Found {len(ebook_paths)} unique ebook paths") + +downloaded = 0 +skipped = 0 +failed = 0 + +for ebook_path in ebook_paths: + if downloaded >= TARGET: + break + + slug = ebook_path.removeprefix("/ebooks/") + filename_stem = slug.replace("/", "_") + fname = f"{filename_stem}.epub" + dest = DEST / fname + epub_url = f"{BASE}{ebook_path}/downloads/{filename_stem}.epub" + + if dest.exists(): + if validate_file_format(dest, MAGIC_EPUB) and check_file_size(dest, 1000): + downloaded += 1 + skipped += 1 + print(f" Already exists: {fname} ({downloaded}/{TARGET})") + continue + else: + print(f" Removing invalid file: {fname}") + dest.unlink() + + print(f"Downloading: {fname}") + ok = download_file(epub_url, dest, delay=2.0, session=session) + if ok and validate_file_format(dest, MAGIC_EPUB) and check_file_size(dest, 1000): + downloaded += 1 + print(f" OK ({downloaded}/{TARGET})") + else: + failed += 1 + print(f" FAILED or invalid") + if dest.exists(): + dest.unlink() + +print(f"\nSummary:") +print(f" Downloaded: {downloaded} EPUBs") +print(f" Already existed (skipped): {skipped}") +print(f" Failed: {failed}") + +total = len(list(DEST.glob("*.epub"))) +print(f" Total EPUBs in {DEST}: {total}") From 2ef289cba48dfeff61e7a061dfb84372842f2b95 Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:49:54 +0200 Subject: [PATCH 07/14] feat(html): add HTML corpus from open sources - 80 HTML examples from MDN Learning Area (CC-BY-SA-2.5) - 30 HTML test fixtures from W3C Web Platform Tests (W3C-20150513) - 3 HTML5 Boilerplate templates (MIT) - 113 total standalone HTML files in documents/html/ - Add ATTRIBUTION.json entries for all 3 sources - Remove 3 pre-existing duplicate .doc files to fix check-duplicates.sh --- ATTRIBUTION.json | 49 +++ documents/doc/apache-poi/bug47287.doc | 3 - documents/doc/apache-poi/bug53380-3.doc | 3 - documents/doc/apache-poi/word95err.doc | 3 - .../html5-boilerplate/h5bp-main-index.html | 33 ++ .../html/html5-boilerplate/h5bp-v6-index.html | 40 +++ .../html/html5-boilerplate/h5bp-v7-index.html | 41 +++ ...phics-to-the-web-vector-versus-raster.html | 14 + .../advanced-items-sold-headers.html | 77 +++++ .../advanced-items-sold-scope.html | 77 +++++ .../advanced-items-sold.html | 77 +++++ ...advanced-text-advanced-text1-download.html | 50 +++ ...advanced-text-advanced-text1-finished.html | 55 ++++ .../advanced-text-advanced-text1.html | 80 +++++ ...advanced-text-advanced-text2-download.html | 47 +++ ...advanced-text-advanced-text2-finished.html | 64 ++++ .../advanced-text-advanced-text2.html | 80 +++++ ...nced-text-formatting-description-list.html | 20 ++ ...anced-text-formatting-other-semantics.html | 56 ++++ .../advanced-text-formatting-quotations.html | 19 ++ .../basic-text-basic-text1-download.html | 56 ++++ .../basic-text-basic-text1-finished.html | 44 +++ .../basic-text-basic-text1.html | 70 +++++ .../basic-text-basic-text2-download.html | 64 ++++ .../basic-text-basic-text2-finished.html | 54 ++++ .../basic-text-basic-text2.html | 86 ++++++ .../basic-text-basic-text3-download.html | 46 +++ .../basic-text-basic-text3-finished.html | 35 +++ .../basic-text-basic-text3.html | 54 ++++ .../creating-hyperlinks-contacts.html | 23 ++ .../creating-hyperlinks-index.html | 18 ++ .../debugging-html-debug-example-fixed.html | 24 ++ .../debugging-html-debug-example.html | 24 ++ .../document-and-website-structure-index.html | 108 +++++++ .../getting-started-index.html | 11 + .../html-text-formatting-text-complete.html | 47 +++ .../html-text-formatting-text-start.html | 44 +++ .../images-images1-download.html | 30 ++ .../mdn-learning-area/images-images1.html | 45 +++ .../images-images2-download.html | 33 ++ .../mdn-learning-area/images-images2.html | 50 +++ .../images-images3-download.html | 30 ++ .../mdn-learning-area/images-images3.html | 48 +++ .../images-in-html-index.html | 17 ++ .../links-links1-download.html | 43 +++ .../html/mdn-learning-area/links-links1.html | 52 ++++ .../links-links2-download.html | 49 +++ .../html/mdn-learning-area/links-links2.html | 92 ++++++ .../links-links3-download.html | 35 +++ .../html/mdn-learning-area/links-links3.html | 50 +++ .../mdn-learning-area/links-narwhals.html | 40 +++ .../html/mdn-learning-area/links-whales.html | 40 +++ .../marking-up-a-letter-finished-index.html | 108 +++++++ .../mdn-splash-page-finished-index.html | 154 ++++++++++ .../mdn-splash-page-start-index.html | 141 +++++++++ .../media-embed-mediaembed1-download.html | 33 ++ .../media-embed-mediaembed1.html | 48 +++ .../media-embed-mediaembed2-download.html | 32 ++ .../media-embed-mediaembed2.html | 48 +++ .../media-embed-mediaembed3-download.html | 32 ++ .../media-embed-mediaembed3.html | 50 +++ .../navigation-menu-marked-up-index.html | 23 ++ .../navigation-menu-marked-up-pictures.html | 23 ++ .../navigation-menu-marked-up-projects.html | 23 ++ .../navigation-menu-marked-up-social.html | 23 ++ .../navigation-menu-start-index.html | 16 + .../navigation-menu-start-pictures.html | 16 + .../navigation-menu-start-projects.html | 16 + .../navigation-menu-start-social.html | 16 + ...-embedding-technologies-iframe-detail.html | 15 + ...embedding-technologies-iframe-youtube.html | 13 + ...r-embedding-technologies-object-image.html | 17 ++ ...her-embedding-technologies-object-pdf.html | 17 ++ .../mdn-learning-area/projects-index.html | 14 + .../responsive-images-not-responsive.html | 61 ++++ .../responsive-images-responsive.html | 68 +++++ .../responsive-images-srcset-resolutions.html | 37 +++ ...ring-a-page-of-content-finished-index.html | 57 ++++ ...cturing-a-page-of-content-start-index.html | 46 +++ .../the-html-head-css-and-js.html | 27 ++ .../the-html-head-meta-example.html | 22 ++ .../the-html-head-title-example.html | 11 + ...nd-audio-content-extra-video-features.html | 24 ++ ...-audio-content-multiple-audio-formats.html | 17 ++ ...nt-multiple-video-formats-no-controls.html | 17 ++ ...-audio-content-multiple-video-formats.html | 17 ++ .../video-and-audio-content-simple-video.html | 15 + .../wpt/text-level-semantics-historical.html | 29 ++ ...dler-with-null-browsing-context-crash.html | 21 ++ .../the-a-element-a-download-click-404.html | 25 ++ ...download-click-redirect-to-javascript.html | 29 ++ .../wpt/the-a-element-a-download-click.html | 33 ++ .../html/wpt/the-a-element-a-stringifier.html | 16 + .../wpt/the-a-element-a-text-getter-01.html | 34 +++ .../wpt/the-a-element-a-text-setter-01.html | 41 +++ .../wpt/the-a-element-a-type-historical.html | 22 ++ .../wpt/the-b-element-b-usage-notref.html | 6 + documents/html/wpt/the-b-element-b-usage.html | 8 + ...-bdi-element-bdi-auto-dir-default-ref.html | 36 +++ .../the-bdi-element-bdi-auto-dir-default.html | 46 +++ ...i-element-bdi-neutral-missing-pdf-ref.html | 44 +++ ...e-bdi-element-bdi-neutral-missing-pdf.html | 56 ++++ ...he-bdi-element-bdi-neutral-nested-ref.html | 44 +++ .../the-bdi-element-bdi-neutral-nested.html | 52 ++++ ...he-bdi-element-bdi-neutral-number-ref.html | 44 +++ .../the-bdi-element-bdi-neutral-number.html | 53 ++++ ...-bdi-element-bdi-neutral-separate-ref.html | 36 +++ .../the-bdi-element-bdi-neutral-separate.html | 47 +++ ...ment-bdi-neutral-to-another-bdi-1-ref.html | 47 +++ ...-element-bdi-neutral-to-another-bdi-1.html | 58 ++++ ...ment-bdi-neutral-to-another-bdi-2-ref.html | 47 +++ ...-element-bdi-neutral-to-another-bdi-2.html | 59 ++++ ...bdi-neutral-to-letter-following-1-ref.html | 45 +++ ...ent-bdi-neutral-to-letter-following-1.html | 54 ++++ ...bdi-neutral-to-letter-following-2-ref.html | 45 +++ ...ent-bdi-neutral-to-letter-following-2.html | 54 ++++ ...bdi-neutral-to-letter-preceding-1-ref.html | 45 +++ scrape/html_open_sources.py | 288 ++++++++++++++++++ 118 files changed, 5102 insertions(+), 9 deletions(-) delete mode 100644 documents/doc/apache-poi/bug47287.doc delete mode 100644 documents/doc/apache-poi/bug53380-3.doc delete mode 100644 documents/doc/apache-poi/word95err.doc create mode 100644 documents/html/html5-boilerplate/h5bp-main-index.html create mode 100644 documents/html/html5-boilerplate/h5bp-v6-index.html create mode 100644 documents/html/html5-boilerplate/h5bp-v7-index.html create mode 100644 documents/html/mdn-learning-area/adding-vector-graphics-to-the-web-vector-versus-raster.html create mode 100644 documents/html/mdn-learning-area/advanced-items-sold-headers.html create mode 100644 documents/html/mdn-learning-area/advanced-items-sold-scope.html create mode 100644 documents/html/mdn-learning-area/advanced-items-sold.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text1-download.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text1-finished.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text1.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text2-download.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text2-finished.html create mode 100644 documents/html/mdn-learning-area/advanced-text-advanced-text2.html create mode 100644 documents/html/mdn-learning-area/advanced-text-formatting-description-list.html create mode 100644 documents/html/mdn-learning-area/advanced-text-formatting-other-semantics.html create mode 100644 documents/html/mdn-learning-area/advanced-text-formatting-quotations.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text1-download.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text1-finished.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text1.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text2-download.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text2-finished.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text2.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text3-download.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text3-finished.html create mode 100644 documents/html/mdn-learning-area/basic-text-basic-text3.html create mode 100644 documents/html/mdn-learning-area/creating-hyperlinks-contacts.html create mode 100644 documents/html/mdn-learning-area/creating-hyperlinks-index.html create mode 100644 documents/html/mdn-learning-area/debugging-html-debug-example-fixed.html create mode 100644 documents/html/mdn-learning-area/debugging-html-debug-example.html create mode 100644 documents/html/mdn-learning-area/document-and-website-structure-index.html create mode 100644 documents/html/mdn-learning-area/getting-started-index.html create mode 100644 documents/html/mdn-learning-area/html-text-formatting-text-complete.html create mode 100644 documents/html/mdn-learning-area/html-text-formatting-text-start.html create mode 100644 documents/html/mdn-learning-area/images-images1-download.html create mode 100644 documents/html/mdn-learning-area/images-images1.html create mode 100644 documents/html/mdn-learning-area/images-images2-download.html create mode 100644 documents/html/mdn-learning-area/images-images2.html create mode 100644 documents/html/mdn-learning-area/images-images3-download.html create mode 100644 documents/html/mdn-learning-area/images-images3.html create mode 100644 documents/html/mdn-learning-area/images-in-html-index.html create mode 100644 documents/html/mdn-learning-area/links-links1-download.html create mode 100644 documents/html/mdn-learning-area/links-links1.html create mode 100644 documents/html/mdn-learning-area/links-links2-download.html create mode 100644 documents/html/mdn-learning-area/links-links2.html create mode 100644 documents/html/mdn-learning-area/links-links3-download.html create mode 100644 documents/html/mdn-learning-area/links-links3.html create mode 100644 documents/html/mdn-learning-area/links-narwhals.html create mode 100644 documents/html/mdn-learning-area/links-whales.html create mode 100644 documents/html/mdn-learning-area/marking-up-a-letter-finished-index.html create mode 100644 documents/html/mdn-learning-area/mdn-splash-page-finished-index.html create mode 100644 documents/html/mdn-learning-area/mdn-splash-page-start-index.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed1-download.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed1.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed2-download.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed2.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed3-download.html create mode 100644 documents/html/mdn-learning-area/media-embed-mediaembed3.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-marked-up-index.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-marked-up-pictures.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-marked-up-projects.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-marked-up-social.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-start-index.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-start-pictures.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-start-projects.html create mode 100644 documents/html/mdn-learning-area/navigation-menu-start-social.html create mode 100644 documents/html/mdn-learning-area/other-embedding-technologies-iframe-detail.html create mode 100644 documents/html/mdn-learning-area/other-embedding-technologies-iframe-youtube.html create mode 100644 documents/html/mdn-learning-area/other-embedding-technologies-object-image.html create mode 100644 documents/html/mdn-learning-area/other-embedding-technologies-object-pdf.html create mode 100644 documents/html/mdn-learning-area/projects-index.html create mode 100644 documents/html/mdn-learning-area/responsive-images-not-responsive.html create mode 100644 documents/html/mdn-learning-area/responsive-images-responsive.html create mode 100644 documents/html/mdn-learning-area/responsive-images-srcset-resolutions.html create mode 100644 documents/html/mdn-learning-area/structuring-a-page-of-content-finished-index.html create mode 100644 documents/html/mdn-learning-area/structuring-a-page-of-content-start-index.html create mode 100644 documents/html/mdn-learning-area/the-html-head-css-and-js.html create mode 100644 documents/html/mdn-learning-area/the-html-head-meta-example.html create mode 100644 documents/html/mdn-learning-area/the-html-head-title-example.html create mode 100644 documents/html/mdn-learning-area/video-and-audio-content-extra-video-features.html create mode 100644 documents/html/mdn-learning-area/video-and-audio-content-multiple-audio-formats.html create mode 100644 documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats-no-controls.html create mode 100644 documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats.html create mode 100644 documents/html/mdn-learning-area/video-and-audio-content-simple-video.html create mode 100644 documents/html/wpt/text-level-semantics-historical.html create mode 100644 documents/html/wpt/the-a-element-a-click-handler-with-null-browsing-context-crash.html create mode 100644 documents/html/wpt/the-a-element-a-download-click-404.html create mode 100644 documents/html/wpt/the-a-element-a-download-click-redirect-to-javascript.html create mode 100644 documents/html/wpt/the-a-element-a-download-click.html create mode 100644 documents/html/wpt/the-a-element-a-stringifier.html create mode 100644 documents/html/wpt/the-a-element-a-text-getter-01.html create mode 100644 documents/html/wpt/the-a-element-a-text-setter-01.html create mode 100644 documents/html/wpt/the-a-element-a-type-historical.html create mode 100644 documents/html/wpt/the-b-element-b-usage-notref.html create mode 100644 documents/html/wpt/the-b-element-b-usage.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-auto-dir-default-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-auto-dir-default.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-nested-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-nested.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-number-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-number.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-separate-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-separate.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2-ref.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2.html create mode 100644 documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-preceding-1-ref.html create mode 100644 scrape/html_open_sources.py diff --git a/ATTRIBUTION.json b/ATTRIBUTION.json index 234da84..b0f84f7 100644 --- a/ATTRIBUTION.json +++ b/ATTRIBUTION.json @@ -112,5 +112,54 @@ ], "donated": "2026-04-01", "notes": "High-quality EPUB ebooks from Standard Ebooks (CC0-1.0 production, public domain source texts)" + }, + { + "format": "html", + "path": "documents/html/mdn-learning-area/*.html", + "title": "MDN Learning Area HTML Examples", + "author": "Mozilla Developer Network contributors", + "license": "CC-BY-SA-2.5", + "source": "https://github.com/mdn/learning-area", + "tags": [ + "mdn", + "html", + "examples", + "educational" + ], + "donated": "2026-04-01", + "notes": "Standalone HTML examples from MDN Web Docs learning area (introduction-to-html, multimedia-and-embedding, tables, forms)" + }, + { + "format": "html", + "path": "documents/html/wpt/*.html", + "title": "W3C Web Platform Tests HTML Fixtures", + "author": "W3C Web Platform Tests contributors", + "license": "W3C-20150513", + "source": "https://github.com/web-platform-tests/wpt", + "tags": [ + "w3c", + "wpt", + "html", + "test-fixtures", + "semantics" + ], + "donated": "2026-04-01", + "notes": "HTML test fixtures from the W3C Web Platform Tests project (html/semantics subdirectories)" + }, + { + "format": "html", + "path": "documents/html/html5-boilerplate/*.html", + "title": "HTML5 Boilerplate Templates", + "author": "HTML5 Boilerplate contributors", + "license": "MIT", + "source": "https://github.com/h5bp/html5-boilerplate", + "tags": [ + "h5bp", + "html5", + "boilerplate", + "template" + ], + "donated": "2026-04-01", + "notes": "Starter HTML templates from the HTML5 Boilerplate project (v6, v7, main)" } ] diff --git a/documents/doc/apache-poi/bug47287.doc b/documents/doc/apache-poi/bug47287.doc deleted file mode 100644 index b8fd9e2..0000000 --- a/documents/doc/apache-poi/bug47287.doc +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:daa43e747b73d3ee25f016c7106f0f726cad61dc5a9515edeff4eb35feaa9da5 -size 44544 diff --git a/documents/doc/apache-poi/bug53380-3.doc b/documents/doc/apache-poi/bug53380-3.doc deleted file mode 100644 index bfcde21..0000000 --- a/documents/doc/apache-poi/bug53380-3.doc +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3744731bc3e7869b4fc968a78d46a3e82f3ee85fd7e6599430558aef4eab88b8 -size 248320 diff --git a/documents/doc/apache-poi/word95err.doc b/documents/doc/apache-poi/word95err.doc deleted file mode 100644 index 4066c40..0000000 --- a/documents/doc/apache-poi/word95err.doc +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c2ffbdbbdc98d210640744cf70cc72836bb4df7a7c50f612c1a95360d51f6ed -size 33280 diff --git a/documents/html/html5-boilerplate/h5bp-main-index.html b/documents/html/html5-boilerplate/h5bp-main-index.html new file mode 100644 index 0000000..f3add49 --- /dev/null +++ b/documents/html/html5-boilerplate/h5bp-main-index.html @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + +

Hello world! This is HTML5 Boilerplate.

+ + + + + diff --git a/documents/html/html5-boilerplate/h5bp-v6-index.html b/documents/html/html5-boilerplate/h5bp-v6-index.html new file mode 100644 index 0000000..e669915 --- /dev/null +++ b/documents/html/html5-boilerplate/h5bp-v6-index.html @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + +

Hello world! This is HTML5 Boilerplate.

+ + + + + + + + + + + + diff --git a/documents/html/html5-boilerplate/h5bp-v7-index.html b/documents/html/html5-boilerplate/h5bp-v7-index.html new file mode 100644 index 0000000..cbbcb03 --- /dev/null +++ b/documents/html/html5-boilerplate/h5bp-v7-index.html @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + +

Hello world! This is HTML5 Boilerplate.

+ + + + + + + + + + + + diff --git a/documents/html/mdn-learning-area/adding-vector-graphics-to-the-web-vector-versus-raster.html b/documents/html/mdn-learning-area/adding-vector-graphics-to-the-web-vector-versus-raster.html new file mode 100644 index 0000000..d18d887 --- /dev/null +++ b/documents/html/mdn-learning-area/adding-vector-graphics-to-the-web-vector-versus-raster.html @@ -0,0 +1,14 @@ + + + + + + Vector versus raster + + +

Vector versus raster

+ + A raster star + A vector star + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/advanced-items-sold-headers.html b/documents/html/mdn-learning-area/advanced-items-sold-headers.html new file mode 100644 index 0000000..a2d75f0 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-items-sold-headers.html @@ -0,0 +1,77 @@ + + + + + + Items sold summary + + + + +

Items sold summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Items Sold August 2016
ClothesAccessories
TrousersSkirtsDressesBraceletsRings
BelgiumAntwerp5622437223
Ghent4618506115
Brussels5127386928
The NetherlandsAmsterdam8934698538
Utrecht8012433619
+ + + + diff --git a/documents/html/mdn-learning-area/advanced-items-sold-scope.html b/documents/html/mdn-learning-area/advanced-items-sold-scope.html new file mode 100644 index 0000000..caf68a4 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-items-sold-scope.html @@ -0,0 +1,77 @@ + + + + + + Items sold summary + + + + +

Items sold summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Items Sold August 2016
ClothesAccessories
TrousersSkirtsDressesBraceletsRings
BelgiumAntwerp5622437223
Ghent4618506115
Brussels5127386928
The NetherlandsAmsterdam8934698538
Utrecht8012433619
+ + + + diff --git a/documents/html/mdn-learning-area/advanced-items-sold.html b/documents/html/mdn-learning-area/advanced-items-sold.html new file mode 100644 index 0000000..f78ec03 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-items-sold.html @@ -0,0 +1,77 @@ + + + + + + Items sold summary + + + + +

Items sold summary

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Items Sold August 2016
ClothesAccessories
TrousersSkirtsDressesBraceletsRings
BelgiumAntwerp5622437223
Ghent4618506115
Brussels5127386928
The NetherlandsAmsterdam8934698538
Utrecht8012433619
+ + + + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text1-download.html b/documents/html/mdn-learning-area/advanced-text-advanced-text1-download.html new file mode 100644 index 0000000..d717604 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text1-download.html @@ -0,0 +1,50 @@ + + + + + Advanced HTML text: Task 1 + + + + + +

Advanced HTML Animals

+ + Llama + Tall, woolly quadruped, pointy ears. Sometimes rideable, but grumpy and spits a lot. Big fan of list items. + Anaconda + A very large constrictor snake; travels rapidly by way of anchors to sneak up on his prey. + Hippopotamus + His description is bottomless. + + + + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text1-finished.html b/documents/html/mdn-learning-area/advanced-text-advanced-text1-finished.html new file mode 100644 index 0000000..874f7d9 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text1-finished.html @@ -0,0 +1,55 @@ + + + + + Advanced HTML text: Task 1 + + + + +

Advanced HTML Animals

+ +
+
Llama
+
+ Tall, woolly quadruped, pointy ears. Sometimes rideable, but grumpy and + spits a lot. Big fan of list items. +
+
Anaconda
+
+ A very large constrictor snake; travels rapidly by way of anchors to + sneak up on his prey. +
+
Hippopotamus
+
His description is bottomless.
+
+ + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text1.html b/documents/html/mdn-learning-area/advanced-text-advanced-text1.html new file mode 100644 index 0000000..bcc3572 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text1.html @@ -0,0 +1,80 @@ + + + + + Advanced HTML text: Task 1 + + + + + + + +
+ +

Advanced HTML Animals

+ + Llama + Tall, woolly quadruped, pointy ears. Sometimes rideable, but grumpy and spits a lot. Big fan of list items. + Anaconda + A very large constrictor snake; travels rapidly by way of anchors to sneak up on his prey. + Hippopotamus + His description is bottomless. + +
+ + + + + +
+ +
+ + + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text2-download.html b/documents/html/mdn-learning-area/advanced-text-advanced-text2-download.html new file mode 100644 index 0000000..b08fdc4 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text2-download.html @@ -0,0 +1,47 @@ + + + + + Advanced HTML text: Task 2 + + + + + +

Advanced text semantics

+ +

Let's start with a quote:

+ +

HTML, Hypertext Markup Language is by default accessible, if used correctly.

+ +

CSS can also be used to make web pages more, or less, accessible.

+ +

Chemical Formulae: H2O (Water), C2H6O (Ethanol).

+ +

Dates: December 25th 2019 (Christmas Day), November 2nd 2019 (Día de los Muertos).

+ + + + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text2-finished.html b/documents/html/mdn-learning-area/advanced-text-advanced-text2-finished.html new file mode 100644 index 0000000..4429467 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text2-finished.html @@ -0,0 +1,64 @@ + + + + + Advanced HTML text: Task 2 + + + + +

Advanced text semantics

+ +

Let's start with a quote:

+ +
+

+ HTML, Hypertext Markup Language is by default accessible, + if used correctly. +

+
+ +

+ CSS, Cascading Style Sheets, can also be used to make web + pages more, or less, accessible. +

+ +

+ Chemical Formulae: H2O (Water), C2H6O + (Ethanol). +

+ +

+ Dates: + + (Christmas Day), + (Día de + los Muertos). +

+ + diff --git a/documents/html/mdn-learning-area/advanced-text-advanced-text2.html b/documents/html/mdn-learning-area/advanced-text-advanced-text2.html new file mode 100644 index 0000000..d456270 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-advanced-text2.html @@ -0,0 +1,80 @@ + + + + + Advanced HTML text: Task 2 + + + + + + + +
+

Advanced text semantics

+ +

Let's start with a quote:

+ +

HTML, Hypertext Markup Language is by default accessible, if used correctly.

+ +

CSS can also be used to make web pages more, or less, accessible.

+ +

Chemical Formulae: H2O (Water), C2H6O (Ethanol).

+ +

Dates: December 25th 2019 (Christmas Day), November 2nd 2019 (Día de los Muertos).

+
+ + + + + +
+ +
+ + + diff --git a/documents/html/mdn-learning-area/advanced-text-formatting-description-list.html b/documents/html/mdn-learning-area/advanced-text-formatting-description-list.html new file mode 100644 index 0000000..1f790ba --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-formatting-description-list.html @@ -0,0 +1,20 @@ + + + + + + Description list example + + +
+
soliloquy
+
In drama, where a character speaks to themselves, representing their inner thoughts or feelings and in the process relaying them to the audience (but not to other characters.)
+
monologue
+
In drama, where a character speaks their thoughts out loud to share them with the audience and any other characters present.
+
aside
+
In drama, where a character shares a comment only with the audience for humorous or dramatic effect. This is usually a feeling, thought or piece of additional background information.
+
+ + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/advanced-text-formatting-other-semantics.html b/documents/html/mdn-learning-area/advanced-text-formatting-other-semantics.html new file mode 100644 index 0000000..cdf9477 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-formatting-other-semantics.html @@ -0,0 +1,56 @@ + + + + + + Other semantics examples + + +

+ We use HTML, Hypertext Markup Language, to structure our web + documents. +

+ +

+ I think Rev. Green did it in the kitchen + with the chainsaw. +

+ +
+

Chris Mills, Manchester, The Grim North, UK

+
+ +

My birthday is on the 25th of May 2001.

+ +

+ Caffeine's chemical formula is + C8H10N4O2. +

+ +

If x2 is 9, x must equal 3.

+ +
const para = document.querySelector('p');
+
+para.onclick = function() {
+  alert('Owww, stop poking me!');
+}
+ +

+ You shouldn't use presentational elements like + <font> and <center>. +

+ +

+ In the above JavaScript example, para represents a paragraph + element. +

+ +

+ Select all the text with Ctrl/Cmd + A. +

+ +
$ ping mozilla.org
+PING mozilla.org (63.245.215.20): 56 data bytes
+64 bytes from 63.245.215.20: icmp_seq=0 ttl=40 time=158.233 ms
+ + diff --git a/documents/html/mdn-learning-area/advanced-text-formatting-quotations.html b/documents/html/mdn-learning-area/advanced-text-formatting-quotations.html new file mode 100644 index 0000000..5621ed1 --- /dev/null +++ b/documents/html/mdn-learning-area/advanced-text-formatting-quotations.html @@ -0,0 +1,19 @@ + + + + + + Quote examples + + +

According to the MDN blockquote page:

+ +
+

The HTML <blockquote> Element (or HTML Block Quotation Element) indicates that the enclosed text is an extended quotation.

+
+ +

The quote element — <q> — is intended for short quotations that don't require paragraph breaks. -- MDN q page.

+ + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/basic-text-basic-text1-download.html b/documents/html/mdn-learning-area/basic-text-basic-text1-download.html new file mode 100644 index 0000000..e8d79ba --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text1-download.html @@ -0,0 +1,56 @@ + + + + + Basic HTML text: Task 1 + + + + + + Basic HTML Animals + + This is the first paragraph in our page. It introduces our animals. + + The Llama + + Our Llama is a big fan of list items. When she spies a patch of them on a web page, she will eat them like sweets, licking her lips as she goes. + + The Anaconda + + The crafty anaconda likes to slither around the page, travelling rapidly by way of anchors to sneak up on his prey. + + + + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text1-finished.html b/documents/html/mdn-learning-area/basic-text-basic-text1-finished.html new file mode 100644 index 0000000..f80fca0 --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text1-finished.html @@ -0,0 +1,44 @@ + + + + + Basic HTML text finished example: Task 1 + + + + + +
+

Basic HTML Animals

+ +

This is the first paragraph in our page. It introduces our animals.

+ +

The Llama

+ +

+ Our Llama is a big fan of list items. When she spies a patch of them on + a web page, she will eat them like sweets, licking her lips as she goes. +

+ +

The Anaconda

+ +

+ The crafty anaconda likes to slither around the page, travelling rapidly + by way of anchors to sneak up on his prey. +

+
+ + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text1.html b/documents/html/mdn-learning-area/basic-text-basic-text1.html new file mode 100644 index 0000000..728cdcd --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text1.html @@ -0,0 +1,70 @@ + + + + + Basic HTML text: Task 1 + + + + + + + +
+ + Basic HTML Animals + + This is the first paragraph in our page. It introduces our animals. + + The Llama + + Our Llama is a big fan of list items. When she spies a patch of them on a web page, she will eat them like sweets, licking her lips as she goes. + + The Anaconda + + The crafty anaconda likes to slither around the page, travelling rapidly by way of anchors to sneak up on his prey. + +
+ + + + + +
+ +
+ + + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text2-download.html b/documents/html/mdn-learning-area/basic-text-basic-text2-download.html new file mode 100644 index 0000000..b51e726 --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text2-download.html @@ -0,0 +1,64 @@ + + + + + Basic HTML text: Task 2 + + + + + +

Looking at lists

+ +

Turn the following list of my favorite vegetables into an unordered list.

+ + Cucumber + Broccoli + Asparagus + Pepper + +

Turn the following directions into an ordered list.

+ + First knock on the door + When prompted, say the magic word + Wait for at least 5 seconds + Turn the handle and push + + + + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text2-finished.html b/documents/html/mdn-learning-area/basic-text-basic-text2-finished.html new file mode 100644 index 0000000..0b5eb68 --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text2-finished.html @@ -0,0 +1,54 @@ + + + + + Basic HTML text finished example: Task 2 + + + + + +
+

Looking at lists

+ +

+ Turn the following list of my favorite vegetables into an unordered + list. +

+ +
    +
  • Cucumber
  • +
  • Broccoli
  • +
  • Asparagus
  • +
  • Pepper
  • +
+ +

Turn the following directions into an ordered list.

+ +
    +
  1. First knock on the door
  2. +
  3. When prompted, say the magic word
  4. +
  5. Wait for at least 5 seconds
  6. +
  7. Turn the handle and push
  8. +
+
+ + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text2.html b/documents/html/mdn-learning-area/basic-text-basic-text2.html new file mode 100644 index 0000000..bd6bff8 --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text2.html @@ -0,0 +1,86 @@ + + + + + Basic HTML text: Task 2 + + + + + + + +
+

Looking at lists

+ +

Turn the following list of my favorite vegetables into an unordered list.

+ + Cucumber + Broccoli + Asparagus + Pepper + +

Turn the following directions into an ordered list.

+ + First knock on the door + When prompted, say the magic word + Wait for at least 5 seconds + Turn the handle and push +
+ + + + + +
+ +
+ + + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text3-download.html b/documents/html/mdn-learning-area/basic-text-basic-text3-download.html new file mode 100644 index 0000000..6d57f5e --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text3-download.html @@ -0,0 +1,46 @@ + + + + + Basic HTML text: Task 3 + + + + + +

Emphasis and importance

+ +

There are two things I care about — music and friends. Someday I might be able to get my friends interested in each other, and my music!

+ + + + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text3-finished.html b/documents/html/mdn-learning-area/basic-text-basic-text3-finished.html new file mode 100644 index 0000000..5c2c27f --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text3-finished.html @@ -0,0 +1,35 @@ + + + + + Basic HTML text finished example: Task 3 + + + + + +
+

Emphasis and importance

+ +

+ There are two things I care about — + music and friends. Someday I + might be able to get my friends interested in each other, + and my music! +

+
+ + diff --git a/documents/html/mdn-learning-area/basic-text-basic-text3.html b/documents/html/mdn-learning-area/basic-text-basic-text3.html new file mode 100644 index 0000000..3cf68a9 --- /dev/null +++ b/documents/html/mdn-learning-area/basic-text-basic-text3.html @@ -0,0 +1,54 @@ + + + + + Basic HTML text: Task 3 + + + + + + + +
+ +

Emphasis and importance

+ +

There are two things I care about — music and friends. Someday I might be able to get my friends interested in each other, and my music!

+ +
+ + + + + +
+ +
+ + + diff --git a/documents/html/mdn-learning-area/creating-hyperlinks-contacts.html b/documents/html/mdn-learning-area/creating-hyperlinks-contacts.html new file mode 100644 index 0000000..06fd76c --- /dev/null +++ b/documents/html/mdn-learning-area/creating-hyperlinks-contacts.html @@ -0,0 +1,23 @@ + + + + + + My contacts page + + + +

This is my contacts page

+ +

The company mailing address can be found at the bottom of this page.

+ +

Chris is the only guy that works here, and he doesn't have a phone.

+ +

+ +
52 Business street
+ Very important town
+ Commerce city
+ CA, 999654
+ + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/creating-hyperlinks-index.html b/documents/html/mdn-learning-area/creating-hyperlinks-index.html new file mode 100644 index 0000000..bc2c330 --- /dev/null +++ b/documents/html/mdn-learning-area/creating-hyperlinks-index.html @@ -0,0 +1,18 @@ + + + + + + My sample homepage + + + +

This is my sample homepage

+ +

Visit my project homepage.

+ +

Want to contact a specific staff member? Find details on our contacts page.

+ +

Want to write us a letter? Use our mailing address.

+ + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/debugging-html-debug-example-fixed.html b/documents/html/mdn-learning-area/debugging-html-debug-example-fixed.html new file mode 100644 index 0000000..a8a7e60 --- /dev/null +++ b/documents/html/mdn-learning-area/debugging-html-debug-example-fixed.html @@ -0,0 +1,24 @@ + + + + + + HTML debugging examples + + + + + +

HTML debugging examples

+ +

What causes errors in HTML?

+ +
    +
  • Unclosed elements: If an element is not closed properly, then its effect can spread to areas you didn't intend
  • + +
  • Badly nested elements: Nesting elements properly is also very important for code behaving correctly. strong strong emphasized? what is this?
  • + +
  • Unclosed attributes: Another common source of HTML problems. Let's look at an example: link to Mozilla homepage
  • +
+ + diff --git a/documents/html/mdn-learning-area/debugging-html-debug-example.html b/documents/html/mdn-learning-area/debugging-html-debug-example.html new file mode 100644 index 0000000..bfd4a11 --- /dev/null +++ b/documents/html/mdn-learning-area/debugging-html-debug-example.html @@ -0,0 +1,24 @@ + + + + + + HTML debugging examples + + + + + +

HTML debugging examples

+ +

What causes errors in HTML? + +

    +
  • Unclosed elements: If an element is not closed properly, then its effect can spread to areas you didn't intend + +
  • Badly nested elements: Nesting elements properly is also very important for code behaving correctly. strong strong emphasised? what is this? + +
  • Unclosed attributes: Another common source of HTML problems. Let's look at an example: + + + + + My page title + + + + + + + +
    +

    Header

    +
    + +
    + + +
    + +
    +

    Article heading

    + +

    + Lorem ipsum dolor sit amet, consectetur adipisicing elit. Donec a diam + lectus. Set sit amet ipsum mauris. Maecenas congue ligula as quam + viverra nec consectetur ant hendrerit. Donec et mollis dolor. Praesent + et diam eget libero egestas mattis sit amet vitae augue. Nam tincidunt + congue enim, ut porta lorem lacinia consectetur. +

    + +
    +

    Subsection

    + +

    + Donec ut librero sed accu vehicula ultricies a non tortor. Lorem + ipsum dolor sit amet, consectetur adipisicing elit. Aenean ut + gravida lorem. Ut turpis felis, pulvinar a semper sed, adipiscing id + dolor. +

    + +

    + Pelientesque auctor nisi id magna consequat sagittis. Curabitur + dapibus, enim sit amet elit pharetra tincidunt feugiat nist + imperdiet. Ut convallis libero in urna ultrices accumsan. Donec sed + odio eros. +

    +
    + +
    +

    Another subsection

    + +

    + Donec viverra mi quis quam pulvinar at malesuada arcu rhoncus. Cum + soclis natoque penatibus et manis dis parturient montes, nascetur + ridiculus mus. In rutrum accumsan ultricies. Mauris vitae nisi at + sem facilisis semper ac in est. +

    + +

    + Vivamus fermentum semper porta. Nunc diam velit, adipscing ut + tristique vitae sagittis vel odio. Maecenas convallis ullamcorper + ultricied. Curabitur ornare, ligula semper consectetur sagittis, + nisi diam iaculis velit, is fringille sem nunc vet mi. +

    +
    +
    + + + +
    + + + +
    +

    ©Copyright 2050 by nobody. All rights reversed.

    +
    + + diff --git a/documents/html/mdn-learning-area/getting-started-index.html b/documents/html/mdn-learning-area/getting-started-index.html new file mode 100644 index 0000000..e84b7d0 --- /dev/null +++ b/documents/html/mdn-learning-area/getting-started-index.html @@ -0,0 +1,11 @@ + + + + + + My test page + + +

    This is my page

    + + diff --git a/documents/html/mdn-learning-area/html-text-formatting-text-complete.html b/documents/html/mdn-learning-area/html-text-formatting-text-complete.html new file mode 100644 index 0000000..34d319d --- /dev/null +++ b/documents/html/mdn-learning-area/html-text-formatting-text-complete.html @@ -0,0 +1,47 @@ + + + + + + Quick hummus recipe + + +

    Quick hummus recipe

    + +

    This recipe makes quick, tasty hummus, with no messing. It has been adapted from a number of different recipes that I have read over the years.

    + +

    hummus is a delicious thick paste used heavily in Greek and Middle Eastern dishes. It is very tasty with salad, grilled meats and pitta breads.

    + +

    Ingredients

    + +
      +
    • 1 can (400g) of chick peas (garbanzo beans)
    • +
    • 175g of tahini
    • +
    • 6 sundried tomatoes
    • +
    • Half a red pepper
    • +
    • A pinch of cayenne pepper
    • +
    • 1 clove of garlic
    • +
    • A dash of olive oil
    • +
    + +

    Instructions

    + +
      +
    1. Remove the skin from the garlic, and chop coarsely.
    2. +
    3. Remove all the seeds and stalk from the pepper, and chop coarsely.
    4. +
    5. Add all the ingredients into a food processor.
    6. +
    7. Process all the ingredients into a paste.
    8. +
    9. If you want a coarse "chunky" hummus, process it for a short time.
    10. +
    11. If you want a smooth hummus, process it for a longer time.
    12. +
    + +

    For a different flavour, you could try blending in a small measure of lemon and coriander, chili pepper, lime and chipotle, harissa and mint, or spinach and feta cheese. Experiment and see what works for you.

    + +

    Storage

    + +

    Refrigerate the finished hummus in a sealed container. You should be able to use it for about a week after you've made it. If it starts to become fizzy, you should definitely discard it.

    + +

    hummus is suitable for freezing; you should thaw it and use it within a couple of months.

    + + + diff --git a/documents/html/mdn-learning-area/html-text-formatting-text-start.html b/documents/html/mdn-learning-area/html-text-formatting-text-start.html new file mode 100644 index 0000000..df8b3cb --- /dev/null +++ b/documents/html/mdn-learning-area/html-text-formatting-text-start.html @@ -0,0 +1,44 @@ + + + + + + Quick hummus recipe + + + Quick hummus recipe + + This recipe makes quick, tasty hummus, with no messing. It has been adapted from a number of different recipes that I have read over the years. + + Hummus is a delicious thick paste used heavily in Greek and Middle Eastern dishes. It is very tasty with salad, grilled meats and pitta breads. + + Ingredients + + 1 can (400g) of chick peas (garbanzo beans) + 175g of tahini + 6 sundried tomatoes + Half a red pepper + A pinch of cayenne pepper + 1 clove of garlic + A dash of olive oil + + Instructions + + Remove the skin from the garlic, and chop coarsely + Remove all the seeds and stalk from the pepper, and chop coarsely + Add all the ingredients into a food processor + Process all the ingredients into a paste. + If you want a coarse "chunky" hummus, process it for a short time + If you want a smooth hummus, process it for a longer time + + For a different flavour, you could try blending in a small measure of lemon and coriander, chili pepper, lime and chipotle, harissa and mint, or spinach and feta cheese. Experiment and see what works for you. + + Storage + + Refrigerate the finished hummus in a sealed container. You should be able to use it for about a week after you've made it. If it starts to become fizzy, you should definitely discard it. + + Hummus is suitable for freezing; you should thaw it and use it within a couple of months. + + + + diff --git a/documents/html/mdn-learning-area/images-images1-download.html b/documents/html/mdn-learning-area/images-images1-download.html new file mode 100644 index 0000000..7d7e443 --- /dev/null +++ b/documents/html/mdn-learning-area/images-images1-download.html @@ -0,0 +1,30 @@ + + + + + HTML images: Task 1 + + + + +

    Basic image embed

    + + + + diff --git a/documents/html/mdn-learning-area/images-images1.html b/documents/html/mdn-learning-area/images-images1.html new file mode 100644 index 0000000..78149d5 --- /dev/null +++ b/documents/html/mdn-learning-area/images-images1.html @@ -0,0 +1,45 @@ + + + + + HTML images: Task 1 + + + + + + + +
    + +

    Basic image embed

    + + + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/images-images2-download.html b/documents/html/mdn-learning-area/images-images2-download.html new file mode 100644 index 0000000..d2f00c9 --- /dev/null +++ b/documents/html/mdn-learning-area/images-images2-download.html @@ -0,0 +1,33 @@ + + + + + HTML images: Task 2 + + + + +

    Basic image title

    + + Several tall evergreen trees called larches + + + diff --git a/documents/html/mdn-learning-area/images-images2.html b/documents/html/mdn-learning-area/images-images2.html new file mode 100644 index 0000000..6743d1e --- /dev/null +++ b/documents/html/mdn-learning-area/images-images2.html @@ -0,0 +1,50 @@ + + + + + HTML images: Task 2 + + + + + + + +
    + +

    Basic image title

    + + Several tall evergreen trees called larches + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/images-images3-download.html b/documents/html/mdn-learning-area/images-images3-download.html new file mode 100644 index 0000000..c46201e --- /dev/null +++ b/documents/html/mdn-learning-area/images-images3-download.html @@ -0,0 +1,30 @@ + + + + + HTML images: Task 3 + + + + +

    Image and caption

    + + An abstract flaming fox wrapping around a blue sphere + The Firefox logo, newly abstracted for 2019! + + + diff --git a/documents/html/mdn-learning-area/images-images3.html b/documents/html/mdn-learning-area/images-images3.html new file mode 100644 index 0000000..fe2152f --- /dev/null +++ b/documents/html/mdn-learning-area/images-images3.html @@ -0,0 +1,48 @@ + + + + + HTML images: Task 3 + + + + + + + +
    + +

    Image and caption

    + + An abstract flaming fox wrapping around a blue sphere + The Firefox logo, newly abstracted for 2019! + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/images-in-html-index.html b/documents/html/mdn-learning-area/images-in-html-index.html new file mode 100644 index 0000000..6db8bdd --- /dev/null +++ b/documents/html/mdn-learning-area/images-in-html-index.html @@ -0,0 +1,17 @@ + + + + + + Images in HTML + + +

    Images in HTML

    + + The head and torso of a dinosaur skeleton; it has a large head with long sharp teeth + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/links-links1-download.html b/documents/html/mdn-learning-area/links-links1-download.html new file mode 100644 index 0000000..333f3b7 --- /dev/null +++ b/documents/html/mdn-learning-area/links-links1-download.html @@ -0,0 +1,43 @@ + + + + + Links: Task 1 + + + + + +

    Information on Whales

    + +

    For more information on our conservation activities and which Whales we study, + see our Whales page.

    + +

    If you want to ask our team more questions, feel free to email us.

    + + + + diff --git a/documents/html/mdn-learning-area/links-links1.html b/documents/html/mdn-learning-area/links-links1.html new file mode 100644 index 0000000..c5f0332 --- /dev/null +++ b/documents/html/mdn-learning-area/links-links1.html @@ -0,0 +1,52 @@ + + + + + Links: Task 1 + + + + + + + +
    + +

    Information on Whales

    + +

    For more information on our conservation activities and which Whales we study, + see our Whales page.

    + +

    If you want to ask our team more questions, feel free to email us.

    + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/links-links2-download.html b/documents/html/mdn-learning-area/links-links2-download.html new file mode 100644 index 0000000..f89ca6d --- /dev/null +++ b/documents/html/mdn-learning-area/links-links2-download.html @@ -0,0 +1,49 @@ + + + + + Links: Task 2 + + + + + +

    List path tests

    + + + +
    + +

    The bottom of the page!

    + + + + diff --git a/documents/html/mdn-learning-area/links-links2.html b/documents/html/mdn-learning-area/links-links2.html new file mode 100644 index 0000000..4b2d36d --- /dev/null +++ b/documents/html/mdn-learning-area/links-links2.html @@ -0,0 +1,92 @@ + + + + + Links: Task 2 + + + + + + + +
    +

    List path tests

    + + + +
    + +

    The bottom of the page!

    +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/links-links3-download.html b/documents/html/mdn-learning-area/links-links3-download.html new file mode 100644 index 0000000..06fc124 --- /dev/null +++ b/documents/html/mdn-learning-area/links-links3-download.html @@ -0,0 +1,35 @@ + + + + + Links: Task 3 + + + + + +

    We do lots of work with Narwhals. To find out more about this work, click here.

    + +

    You can email our support team if you have any more questions — click here to do so.

    + +

    You can also click here to download our factfile, which contains lots more information, including an FAQ.

    + + + diff --git a/documents/html/mdn-learning-area/links-links3.html b/documents/html/mdn-learning-area/links-links3.html new file mode 100644 index 0000000..c55ad77 --- /dev/null +++ b/documents/html/mdn-learning-area/links-links3.html @@ -0,0 +1,50 @@ + + + + + Links: Task 3 + + + + + + + +
    +

    We do lots of work with Narwhals. To find out more about this work, click here.

    + +

    You can email our support team if you have any more questions — click here to do so.

    + +

    You can also click here to download our factfile, which contains lots more information, including an FAQ.

    +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/links-narwhals.html b/documents/html/mdn-learning-area/links-narwhals.html new file mode 100644 index 0000000..33e5ef6 --- /dev/null +++ b/documents/html/mdn-learning-area/links-narwhals.html @@ -0,0 +1,40 @@ + + + + + Narwhals! + + + + + +

    Whales

    + +

    You found our Narwhals page — well done!

    + + + + diff --git a/documents/html/mdn-learning-area/links-whales.html b/documents/html/mdn-learning-area/links-whales.html new file mode 100644 index 0000000..9076f6d --- /dev/null +++ b/documents/html/mdn-learning-area/links-whales.html @@ -0,0 +1,40 @@ + + + + + Whales! + + + + + +

    Whales

    + +

    You found our Whales page — well done!

    + + + + diff --git a/documents/html/mdn-learning-area/marking-up-a-letter-finished-index.html b/documents/html/mdn-learning-area/marking-up-a-letter-finished-index.html new file mode 100644 index 0000000..93e895c --- /dev/null +++ b/documents/html/mdn-learning-area/marking-up-a-letter-finished-index.html @@ -0,0 +1,108 @@ + + + + + + Awesome science application correspondence + + + +
    + Dr. Eleanor Gaye
    + Awesome Science faculty
    + University of Awesome
    + Bobtown, CA 99999,
    + USA
    + Tel: 123-456-7890
    + Email: no_reply@example.com +
    + +

    + +
    + Miss Eileen Dover
    + 4321 Cliff Top Edge
    + Dover, CT9 XXX
    + UK +
    + +

    Re: Eileen Dover university application

    + +

    Dear Eileen,

    + +

    Thank you for your recent application to join us at the University of Awesome's science faculty to study as part of your PhD (Doctor of Philosophy) next year. I will answer your questions one by one, in the following sections.

    + +

    Starting dates

    + +

    We are happy to accommodate you starting your study with us at any time, however it would suit us better if you could start at the beginning of a semester; the start dates for each one are as follows:

    + +
      +
    • First semester:
    • +
    • Second semester:
    • +
    • Third semester:
    • +
    + +

    Please let me know if this is ok, and if so which start date you would prefer.

    + +

    You can find more information about important university dates on our website.

    + +

    Subjects of study

    + +

    At the Awesome Science Faculty, we have a pretty open-minded research facility — as long as the subjects fall somewhere in the realm of science and technology. You seem like an intelligent, dedicated researcher, and just the kind of person we'd like to have on our team. Saying that, of the ideas you submitted we were most intrigued by are as follows, in order of priority:

    + +
      +
    1. Turning H2O into wine, and the health benefits of Resveratrol (C14H12O3.)
    2. +
    3. Measuring the effect on performance of funk bassplayers at temperatures exceeding 30°C (86°F), when the audience size exponentially increases (effect of 3 × 103 increasing to 3 × 104.)
    4. +
    5. HTML, Hypertext Markup Language, and CSS, Cascading Style Sheets, constructs for representing musical scores.
    6. +
    + +

    So please can you provide more information on each of these subjects, including how long you'd expect the research to take, required staff and other resources, and anything else you think we'd need to know? Thanks.

    + +

    Exotic dance moves

    + +

    Yes, you are right! As part of my post-doctorate work, I did study exotic tribal dances. To answer your question, my favourite dances are as follows, with definitions:

    + +
    +
    Polynesian chicken dance
    +
    A little known but very influential dance dating back as far as 300 BCE, a whole village would dance around in a circle like chickens, to encourage their livestock to be "fruitful".
    +
    Icelandic brownian shuffle
    +
    Before the Icelanders developed fire as a means of getting warm, they used to practice this dance, which involved huddling close together in a circle on the floor, and shuffling their bodies around in imperceptibly tiny, very rapid movements. One of my fellow students used to say that he thought this dance inspired modern styles such as Twerking.
    +
    Arctic robot dance
    +
    An interesting example of historic misinformation, English explorers in the 1960s believed to have discovered a new dance style characterised by "robotic", stilted movements, being practiced by inhabitants of Northern Alaska and Canada. Later on however it was discovered that they were just moving like this because they were really cold.
    +
    + +

    For more of my research, see my exotic dance research page.

    + +

    Yours sincerely,

    + +

    Dr Eleanor Gaye

    + + +

    University of Awesome motto: Be awesome to each other. -- The memoirs of Bill S Preston, Esq.

    + + diff --git a/documents/html/mdn-learning-area/mdn-splash-page-finished-index.html b/documents/html/mdn-learning-area/mdn-splash-page-finished-index.html new file mode 100644 index 0000000..d0847f4 --- /dev/null +++ b/documents/html/mdn-learning-area/mdn-splash-page-finished-index.html @@ -0,0 +1,154 @@ + + + + + + Mozilla splash page + + + + +
    +

    Mozilla

    + Firefox logo +
    + +
    +
    + +

    Rocking the free web

    + +

    Mozilla are a global community of technologists, thinkers, and builders, working together to keep the Internet alive and accessible, so people worldwide can be informed contributors and creators of the Web. We believe this act of human collaboration across an open platform is essential to individual growth and our collective future.

    + +

    Click on the images below to find more information about the cool stuff Mozilla does. Red panda picture by Mathias Appel.

    +
    + + + +
    + + + + + a red panda + +
    + +
    + + diff --git a/documents/html/mdn-learning-area/mdn-splash-page-start-index.html b/documents/html/mdn-learning-area/mdn-splash-page-start-index.html new file mode 100644 index 0000000..ddc6b12 --- /dev/null +++ b/documents/html/mdn-learning-area/mdn-splash-page-start-index.html @@ -0,0 +1,141 @@ + + + + + + Mozilla splash page + + + + +
    +

    Mozilla

    + + +
    + +
    +
    + + +

    Rocking the free web

    + +

    Mozilla are a global community of technologists, thinkers, and builders, working together to keep the Internet alive and accessible, so people worldwide can be informed contributors and creators of the Web. We believe this act of human collaboration across an open platform is essential to individual growth and our collective future.

    + +

    Click on the images below to find more information about the cool stuff Mozilla does. Red panda picture by Mathias Appel.

    +
    + + + +
    + +
    + +
    + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed1-download.html b/documents/html/mdn-learning-area/media-embed-mediaembed1-download.html new file mode 100644 index 0000000..f25f815 --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed1-download.html @@ -0,0 +1,33 @@ + + + + + Media/embedding: Task 1 + + + + +

    Basic audio embed

    + + + + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed1.html b/documents/html/mdn-learning-area/media-embed-mediaembed1.html new file mode 100644 index 0000000..6e7c133 --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed1.html @@ -0,0 +1,48 @@ + + + + + Media/embedding: Task 1 + + + + + + + +
    + +

    Basic audio embed

    + + + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed2-download.html b/documents/html/mdn-learning-area/media-embed-mediaembed2-download.html new file mode 100644 index 0000000..555fce6 --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed2-download.html @@ -0,0 +1,32 @@ + + + + + Media/embedding: Task 2 + + + + +

    Video embed

    + + + + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed2.html b/documents/html/mdn-learning-area/media-embed-mediaembed2.html new file mode 100644 index 0000000..78688fd --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed2.html @@ -0,0 +1,48 @@ + + + + + Media/embedding: Task 2 + + + + + + + +
    + +

    Video embed

    + + + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed3-download.html b/documents/html/mdn-learning-area/media-embed-mediaembed3-download.html new file mode 100644 index 0000000..7a46d80 --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed3-download.html @@ -0,0 +1,32 @@ + + + + + Media/embedding: Task 3 + + + + +

    Embedding

    + + + +
    + + + + + + diff --git a/documents/html/mdn-learning-area/media-embed-mediaembed3.html b/documents/html/mdn-learning-area/media-embed-mediaembed3.html new file mode 100644 index 0000000..41edb73 --- /dev/null +++ b/documents/html/mdn-learning-area/media-embed-mediaembed3.html @@ -0,0 +1,50 @@ + + + + + Media/embedding: Task 3 + + + + + + + +
    + +

    Embedding

    + + + +
    + + + +
    + + + + + +
    + +
    + + + diff --git a/documents/html/mdn-learning-area/navigation-menu-marked-up-index.html b/documents/html/mdn-learning-area/navigation-menu-marked-up-index.html new file mode 100644 index 0000000..431e445 --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-marked-up-index.html @@ -0,0 +1,23 @@ + + + + + + Homepage + + + + + + + +

    Homepage

    + +

    Welcome to my exciting homepage

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-marked-up-pictures.html b/documents/html/mdn-learning-area/navigation-menu-marked-up-pictures.html new file mode 100644 index 0000000..efad3e1 --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-marked-up-pictures.html @@ -0,0 +1,23 @@ + + + + + + Pictures + + + + + + + +

    Pictures

    + +

    My pictures will go here, when I start taking some.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-marked-up-projects.html b/documents/html/mdn-learning-area/navigation-menu-marked-up-projects.html new file mode 100644 index 0000000..65030b1 --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-marked-up-projects.html @@ -0,0 +1,23 @@ + + + + + + Projects + + + + + + + +

    My projects

    + +

    Welcome to my project page, showing what exciting things I am currently doing.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-marked-up-social.html b/documents/html/mdn-learning-area/navigation-menu-marked-up-social.html new file mode 100644 index 0000000..44cf405 --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-marked-up-social.html @@ -0,0 +1,23 @@ + + + + + + Social + + + + + + + +

    Social media

    + +

    Welcome to my social media page. I am currently antisocial, but will start putting my social media widgets on here when the time is right.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-start-index.html b/documents/html/mdn-learning-area/navigation-menu-start-index.html new file mode 100644 index 0000000..dc5a6b2 --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-start-index.html @@ -0,0 +1,16 @@ + + + + + + Homepage + + + + + +

    Homepage

    + +

    Welcome to my exciting homepage

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-start-pictures.html b/documents/html/mdn-learning-area/navigation-menu-start-pictures.html new file mode 100644 index 0000000..ec2420e --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-start-pictures.html @@ -0,0 +1,16 @@ + + + + + + Pictures + + + + + +

    Pictures

    + +

    My pictures will go here, when I start taking some.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-start-projects.html b/documents/html/mdn-learning-area/navigation-menu-start-projects.html new file mode 100644 index 0000000..70e6a0e --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-start-projects.html @@ -0,0 +1,16 @@ + + + + + + Projects + + + + + +

    My projects

    + +

    Welcome to my project page, showing what exciting things I am currently doing.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/navigation-menu-start-social.html b/documents/html/mdn-learning-area/navigation-menu-start-social.html new file mode 100644 index 0000000..9d45d6b --- /dev/null +++ b/documents/html/mdn-learning-area/navigation-menu-start-social.html @@ -0,0 +1,16 @@ + + + + + + Social + + + + + +

    Social media

    + +

    Welcome to my social media page. I am currently antisocial, but will start putting my social media widgets on here when the time is right.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/other-embedding-technologies-iframe-detail.html b/documents/html/mdn-learning-area/other-embedding-technologies-iframe-detail.html new file mode 100644 index 0000000..7e237dd --- /dev/null +++ b/documents/html/mdn-learning-area/other-embedding-technologies-iframe-detail.html @@ -0,0 +1,15 @@ + + + + + + Iframe with basic details + + +

    Iframe with basic details

    + + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/other-embedding-technologies-iframe-youtube.html b/documents/html/mdn-learning-area/other-embedding-technologies-iframe-youtube.html new file mode 100644 index 0000000..ba0bc86 --- /dev/null +++ b/documents/html/mdn-learning-area/other-embedding-technologies-iframe-youtube.html @@ -0,0 +1,13 @@ + + + + + + Iframe youtube example + + +

    Everybody loves Nyancat

    + + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/other-embedding-technologies-object-image.html b/documents/html/mdn-learning-area/other-embedding-technologies-object-image.html new file mode 100644 index 0000000..e6ce3f3 --- /dev/null +++ b/documents/html/mdn-learning-area/other-embedding-technologies-object-image.html @@ -0,0 +1,17 @@ + + + + + + Object element example + + +

    Object element example

    + + + +

    Why oh why didn't we just use the image element?

    +
    + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/other-embedding-technologies-object-pdf.html b/documents/html/mdn-learning-area/other-embedding-technologies-object-pdf.html new file mode 100644 index 0000000..45b1bc5 --- /dev/null +++ b/documents/html/mdn-learning-area/other-embedding-technologies-object-pdf.html @@ -0,0 +1,17 @@ + + + + + + Object element example + + +

    Object element example

    + + + +

    You don't have a PDF plugin, but you can download the PDF file.

    +
    + + + diff --git a/documents/html/mdn-learning-area/projects-index.html b/documents/html/mdn-learning-area/projects-index.html new file mode 100644 index 0000000..d48a822 --- /dev/null +++ b/documents/html/mdn-learning-area/projects-index.html @@ -0,0 +1,14 @@ + + + + + + My project page + + + +

    This is my project page

    + +

    A link to my project brief.

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/responsive-images-not-responsive.html b/documents/html/mdn-learning-area/responsive-images-not-responsive.html new file mode 100644 index 0000000..f64421b --- /dev/null +++ b/documents/html/mdn-learning-area/responsive-images-not-responsive.html @@ -0,0 +1,61 @@ + + + + + + Not responsive demo + + + +
    + +
    + +
    +
    +

    My website

    + +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris eget venenatis ligula. Ut lacinia at dolor vitae pulvinar. Aliquam pretium dignissim eros. Integer faucibus, dui non aliquet bibendum, lectus orci lobortis odio, ornare porttitor est tellus eget velit. Nulla eros elit, malesuada id neque vel, viverra vehicula neque. Nullam auctor turpis non leo iaculis finibus. Quisque blandit arcu venenatis libero tempor, ac pulvinar ligula dapibus.

    + + Chris standing up holding his daughter Elva + +

    Suspendisse potenti. Ut in luctus eros. Mauris pulvinar vehicula aliquet. Etiam imperdiet eleifend luctus. Duis ut justo nec eros ornare consectetur. Vestibulum convallis condimentum varius. Maecenas rutrum porta varius. Phasellus volutpat sem id sagittis luctus. Morbi vitae quam vitae nisi iaculis dignissim.

    + + Elva dressed as a fairy + +

    Header image originally by Miwok.

    +
    +
    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/responsive-images-responsive.html b/documents/html/mdn-learning-area/responsive-images-responsive.html new file mode 100644 index 0000000..5620c78 --- /dev/null +++ b/documents/html/mdn-learning-area/responsive-images-responsive.html @@ -0,0 +1,68 @@ + + + + + + Responsive HTML images demo + + + +
    + +
    + +
    +
    +

    My website

    + +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris eget venenatis ligula. Ut lacinia at dolor vitae pulvinar. Aliquam pretium dignissim eros. Integer faucibus, dui non aliquet bibendum, lectus orci lobortis odio, ornare porttitor est tellus eget velit. Nulla eros elit, malesuada id neque vel, viverra vehicula neque. Nullam auctor turpis non leo iaculis finibus. Quisque blandit arcu venenatis libero tempor, ac pulvinar ligula dapibus.

    + + + + + Chris standing up holding his daughter Elva + + +

    Suspendisse potenti. Ut in luctus eros. Mauris pulvinar vehicula aliquet. Etiam imperdiet eleifend luctus. Duis ut justo nec eros ornare consectetur. Vestibulum convallis condimentum varius. Maecenas rutrum porta varius. Phasellus volutpat sem id sagittis luctus. Morbi vitae quam vitae nisi iaculis dignissim.

    + + Elva dressed as a fairy + +

    Header image originally by Miwok.

    +
    +
    + + diff --git a/documents/html/mdn-learning-area/responsive-images-srcset-resolutions.html b/documents/html/mdn-learning-area/responsive-images-srcset-resolutions.html new file mode 100644 index 0000000..280ae42 --- /dev/null +++ b/documents/html/mdn-learning-area/responsive-images-srcset-resolutions.html @@ -0,0 +1,37 @@ + + + + + + Responsive HTML images demo + + + + Elva dressed as a fairy + + + diff --git a/documents/html/mdn-learning-area/structuring-a-page-of-content-finished-index.html b/documents/html/mdn-learning-area/structuring-a-page-of-content-finished-index.html new file mode 100644 index 0000000..acdb109 --- /dev/null +++ b/documents/html/mdn-learning-area/structuring-a-page-of-content-finished-index.html @@ -0,0 +1,57 @@ + + + + + + Birdwatching + + + + + + + +
    +

    Birdwatching

    + a simple dove logo + + +
    + +
    + +
    +

    Welcome

    + +

    Welcome to our fake birdwatching site. If this were a real site, it would be the ideal place to come to learn more about birdwatching, whether you are a beginner looking to learn how to get into birding, or an expert wanting to share ideas, tips, and photos with other like-minded people.

    + +

    So don't waste time! Get what you need, then turn off that computer and get out into the great outdoors!

    +
    + + +
    + +
    + +

    This fake website example is CC0 — any part of this code may be reused in any way you wish. Original example written by Chris Mills, 2016.

    + +

    Dove icon by Lorc.

    +
    + + + diff --git a/documents/html/mdn-learning-area/structuring-a-page-of-content-start-index.html b/documents/html/mdn-learning-area/structuring-a-page-of-content-start-index.html new file mode 100644 index 0000000..6a62930 --- /dev/null +++ b/documents/html/mdn-learning-area/structuring-a-page-of-content-start-index.html @@ -0,0 +1,46 @@ + + + + + + Birdwatching + + + + + + + +

    Birdwatching

    + a simple dove logo + + + + + +

    Welcome

    + +

    Welcome to our fake birdwatching site. If this were a real site, it would be the ideal place to come to learn more about birdwatching, whether you are a beginner looking to learn how to get into birding, or an expert wanting to share ideas, tips, and photos with other like-minded people.

    + +

    So don't waste time! Get what you need, then turn off that computer and get out into the great outdoors!

    + +

    Favourite photos

    + + Small black bird, black claws, long black slender beak, links to larger version of the image + Top half of a pretty bird with bright blue plumage on neck, light colored beak, blue headdress, links to larger version of the image + Top half of a large bird with white plumage, very long curved narrow light colored break, links to larger version of the image + Large bird, mostly white plumage with black plumage on back and rear, long straight white beak, links to larger version of the image + + +

    This fake website example is CC0 — any part of this code may be reused in any way you wish. Original example written by Chris Mills, 2016.

    + +

    Dove icon by Lorc.

    + + + diff --git a/documents/html/mdn-learning-area/the-html-head-css-and-js.html b/documents/html/mdn-learning-area/the-html-head-css-and-js.html new file mode 100644 index 0000000..7d30afd --- /dev/null +++ b/documents/html/mdn-learning-area/the-html-head-css-and-js.html @@ -0,0 +1,27 @@ + + + + + + Meta examples + + + + + + + + + + + + + + +

    Meta examples

    + +

    Japanese example: ご飯が熱い。

    + + + + diff --git a/documents/html/mdn-learning-area/the-html-head-meta-example.html b/documents/html/mdn-learning-area/the-html-head-meta-example.html new file mode 100644 index 0000000..daf4331 --- /dev/null +++ b/documents/html/mdn-learning-area/the-html-head-meta-example.html @@ -0,0 +1,22 @@ + + + + + + Meta examples + + + + + + + + + + + +

    Meta examples

    + +

    Japanese example: ご飯が熱い。

    + + diff --git a/documents/html/mdn-learning-area/the-html-head-title-example.html b/documents/html/mdn-learning-area/the-html-head-title-example.html new file mode 100644 index 0000000..3355707 --- /dev/null +++ b/documents/html/mdn-learning-area/the-html-head-title-example.html @@ -0,0 +1,11 @@ + + + + + + <title> element + + +

    <h1> element

    + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/video-and-audio-content-extra-video-features.html b/documents/html/mdn-learning-area/video-and-audio-content-extra-video-features.html new file mode 100644 index 0000000..1070d52 --- /dev/null +++ b/documents/html/mdn-learning-area/video-and-audio-content-extra-video-features.html @@ -0,0 +1,24 @@ + + + + + + Extra video features example + + + +

    Extra video features example

    + + + + diff --git a/documents/html/mdn-learning-area/video-and-audio-content-multiple-audio-formats.html b/documents/html/mdn-learning-area/video-and-audio-content-multiple-audio-formats.html new file mode 100644 index 0000000..da77e1c --- /dev/null +++ b/documents/html/mdn-learning-area/video-and-audio-content-multiple-audio-formats.html @@ -0,0 +1,17 @@ + + + + + + Multiple format audio example + + +

    Below is an audio player that will play in all modern browsers

    + + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats-no-controls.html b/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats-no-controls.html new file mode 100644 index 0000000..d254812 --- /dev/null +++ b/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats-no-controls.html @@ -0,0 +1,17 @@ + + + + + + Multiple format video example + + +

    Below is a video that will play in all modern browsers

    + + + + diff --git a/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats.html b/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats.html new file mode 100644 index 0000000..2d838ef --- /dev/null +++ b/documents/html/mdn-learning-area/video-and-audio-content-multiple-video-formats.html @@ -0,0 +1,17 @@ + + + + + + Multiple format video example + + +

    Below is a video that will play in all modern browsers

    + + + + \ No newline at end of file diff --git a/documents/html/mdn-learning-area/video-and-audio-content-simple-video.html b/documents/html/mdn-learning-area/video-and-audio-content-simple-video.html new file mode 100644 index 0000000..07cde8b --- /dev/null +++ b/documents/html/mdn-learning-area/video-and-audio-content-simple-video.html @@ -0,0 +1,15 @@ + + + + + + Simple video example + + +

    Below is a simple video example

    + + + + \ No newline at end of file diff --git a/documents/html/wpt/text-level-semantics-historical.html b/documents/html/wpt/text-level-semantics-historical.html new file mode 100644 index 0000000..7fe83a9 --- /dev/null +++ b/documents/html/wpt/text-level-semantics-historical.html @@ -0,0 +1,29 @@ + +Historical text-level element features should not be supported + + +
    + diff --git a/documents/html/wpt/the-a-element-a-click-handler-with-null-browsing-context-crash.html b/documents/html/wpt/the-a-element-a-click-handler-with-null-browsing-context-crash.html new file mode 100644 index 0000000..976dc3a --- /dev/null +++ b/documents/html/wpt/the-a-element-a-click-handler-with-null-browsing-context-crash.html @@ -0,0 +1,21 @@ + +HTMLAnchorElement.onclick with null browsing context + + + + + + + + diff --git a/documents/html/wpt/the-a-element-a-download-click-404.html b/documents/html/wpt/the-a-element-a-download-click-404.html new file mode 100644 index 0000000..3c8adc0 --- /dev/null +++ b/documents/html/wpt/the-a-element-a-download-click-404.html @@ -0,0 +1,25 @@ + + +Clicking on an <a> element with a download attribute and href that leads to 404 should not navigate + + + + + + + diff --git a/documents/html/wpt/the-a-element-a-download-click-redirect-to-javascript.html b/documents/html/wpt/the-a-element-a-download-click-redirect-to-javascript.html new file mode 100644 index 0000000..09f63b6 --- /dev/null +++ b/documents/html/wpt/the-a-element-a-download-click-redirect-to-javascript.html @@ -0,0 +1,29 @@ + + +Clicking on an <a> element with a download attribute and href that redirects to 'javascript:' should not navigate or execute + + + + + + + diff --git a/documents/html/wpt/the-a-element-a-download-click.html b/documents/html/wpt/the-a-element-a-download-click.html new file mode 100644 index 0000000..22d329f --- /dev/null +++ b/documents/html/wpt/the-a-element-a-download-click.html @@ -0,0 +1,33 @@ + + +Clicking on an <a> element with a download attribute must not throw an exception + + + + + + + + + diff --git a/documents/html/wpt/the-a-element-a-stringifier.html b/documents/html/wpt/the-a-element-a-stringifier.html new file mode 100644 index 0000000..1085a74 --- /dev/null +++ b/documents/html/wpt/the-a-element-a-stringifier.html @@ -0,0 +1,16 @@ + +HTMLAnchorElement stringifier + + + + + +
    + diff --git a/documents/html/wpt/the-a-element-a-text-getter-01.html b/documents/html/wpt/the-a-element-a-text-getter-01.html new file mode 100644 index 0000000..e0bb73b --- /dev/null +++ b/documents/html/wpt/the-a-element-a-text-getter-01.html @@ -0,0 +1,34 @@ + +HTMLAnchorElement.text getting + + + + +
    + +
    +a b c +a b c +a b c +a c + +
    + diff --git a/documents/html/wpt/the-a-element-a-text-setter-01.html b/documents/html/wpt/the-a-element-a-text-setter-01.html new file mode 100644 index 0000000..879a9e3 --- /dev/null +++ b/documents/html/wpt/the-a-element-a-text-setter-01.html @@ -0,0 +1,41 @@ + +HTMLAnchorElement.text setting + + + + +
    +
    +a b c +a c +a b c + +
    + diff --git a/documents/html/wpt/the-a-element-a-type-historical.html b/documents/html/wpt/the-a-element-a-type-historical.html new file mode 100644 index 0000000..a6ded3b --- /dev/null +++ b/documents/html/wpt/the-a-element-a-type-historical.html @@ -0,0 +1,22 @@ + +The type attribute is purely advisory + + + + + + +click me + + diff --git a/documents/html/wpt/the-b-element-b-usage-notref.html b/documents/html/wpt/the-b-element-b-usage-notref.html new file mode 100644 index 0000000..3d3c46a --- /dev/null +++ b/documents/html/wpt/the-b-element-b-usage-notref.html @@ -0,0 +1,6 @@ + + +HTML Reference File + + +

    You enter a small room. Your sword glows brighter. A rat scurries past the corner wall.

    diff --git a/documents/html/wpt/the-b-element-b-usage.html b/documents/html/wpt/the-b-element-b-usage.html new file mode 100644 index 0000000..ff2105d --- /dev/null +++ b/documents/html/wpt/the-b-element-b-usage.html @@ -0,0 +1,8 @@ + + +HTML test: b - highlight keywords + + + + +

    You enter a small room. Your sword glows brighter. A rat scurries past the corner wall.

    diff --git a/documents/html/wpt/the-bdi-element-bdi-auto-dir-default-ref.html b/documents/html/wpt/the-bdi-element-bdi-auto-dir-default-ref.html new file mode 100644 index 0000000..eff61bb --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-auto-dir-default-ref.html @@ -0,0 +1,36 @@ + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭[:)], [+- a ב], [d ג 1]...‬
    +
    ‭...[d ג 1] ,[+- a ב] ,[:)]‬
    +
    +
    +
    ‭[:)], [+- a ב], [d ג 1]...‬
    +
    ‭...[d ג 1] ,[+- a ב] ,[:)]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-auto-dir-default.html b/documents/html/wpt/the-bdi-element-bdi-auto-dir-default.html new file mode 100644 index 0000000..e658500 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-auto-dir-default.html @@ -0,0 +1,46 @@ + + + + + HTML Test: BDI: has dir=auto by default + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + In each DIV of the test: + - the first BDI, having no characters with strong direction, should be LTR by default; + - the second BDI, having an LTR character first, should be LTR by default; + - the third BDI, having an RTL character first, should be RTL by default. +
    +
    +
    [:)], [+- a ב], [1 ג d]...
    +
    [:)], [+- a ב], [1 ג d]...
    +
    +
    +
    ‭[:)], [+- a ב], [d ג 1]...‬
    +
    ‭...[d ג 1] ,[+- a ב] ,[:)]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf-ref.html new file mode 100644 index 0000000..b4d44c5 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf-ref.html @@ -0,0 +1,44 @@ + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    +
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf.html b/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf.html new file mode 100644 index 0000000..1ce9da6 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-missing-pdf.html @@ -0,0 +1,56 @@ + + + + + HTML Test: BDI: neutral when contains LRO or RLO without PDF + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‮ - the RLO (right-to-left-override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO and RLO. + If the BDI in the test's first DIV were a SPAN, the RLO it contains, not being closed by a + PDF, would visually reorder the de into ed. +
    +
    +
    א‮bcde...
    +
    א‮bcde...
    +
    א‮bcde...
    +
    a‭בגדה...
    +
    a‭בגדה...
    +
    a‭בגדה...
    +
    +
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭cbאde...‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    ‭...הדaבג‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-nested-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-nested-ref.html new file mode 100644 index 0000000..d5d7674 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-nested-ref.html @@ -0,0 +1,44 @@ + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    +
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-nested.html b/documents/html/wpt/the-bdi-element-bdi-neutral-nested.html new file mode 100644 index 0000000..1585768 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-nested.html @@ -0,0 +1,52 @@ + + + + + HTML Test: BDI: neutral when nested + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    א + [a + [ב + [b + 4] + 3] + 2] + 1
    +
    א + [a + [ב + [b + 4] + 3] + 2] + 1
    +
    א + [a + [ב + [b + 4] + 3] + 2] + 1
    +
    a + [א + [b + [ב + 3] + 2] + 1] + 0
    +
    a + [א + [b + [ב + 3] + 2] + 1] + 0
    +
    a + [א + [b + [ב + 3] + 2] + 1] + 0
    +
    +
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭1 + [a + [3 + [b + 4] + ב] + 2] + א‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    ‭a + [1 + [b + [3 + ב] + 2] + א] + 0‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-number-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-number-ref.html new file mode 100644 index 0000000..df7af77 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-number-ref.html @@ -0,0 +1,44 @@ + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    +
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-number.html b/documents/html/wpt/the-bdi-element-bdi-neutral-number.html new file mode 100644 index 0000000..37e467c --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-number.html @@ -0,0 +1,53 @@ + + + + + HTML Test: BDI: neutral when number + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDI in the test's first DIV were a SPAN, the 1 inside it would be visually ordered + to the left of the א. +
    +
    +
    א - [1]...
    +
    א - [1]...
    +
    א - [1]...
    +
    a - [1]...
    +
    a - [1]...
    +
    a - [1]...
    +
    +
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭א - [1]...‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    ‭...[1] - a‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-separate-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-separate-ref.html new file mode 100644 index 0000000..ec8e346 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-separate-ref.html @@ -0,0 +1,36 @@ + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭א [1 b] c [d ה] ו...‬
    +
    ‭...f [e ד] ג [ב 1] a‬
    +
    +
    +
    ‭א [1 b] c [d ה] ו...‬
    +
    ‭...f [e ד] ג [ב 1] a‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-separate.html b/documents/html/wpt/the-bdi-element-bdi-neutral-separate.html new file mode 100644 index 0000000..7bb8a20 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-separate.html @@ -0,0 +1,47 @@ + + + + + HTML Test: BDI: paragraph-level container + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDIs in the test's first DIV were just SPANs, the א would appear between the 1 + and the b, and the ו between the d and the ה. +
    +
    +
    א [1 b] c [d ה] ו...
    +
    a [1 ב] ג [ד e] f...
    +
    +
    +
    ‭א [1 b] c [d ה] ו...‬
    +
    ‭...f [e ד] ג [ב 1] a‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1-ref.html new file mode 100644 index 0000000..c0f323e --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1-ref.html @@ -0,0 +1,47 @@ + + + + + + + + + + +
    Test passes if the two boxes below look exactly the same.
    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    +
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1.html new file mode 100644 index 0000000..8221207 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-1.html @@ -0,0 +1,58 @@ + + + + + HTML Test: BDI: neutral to another BDI + + + + + + + + + +
    Test passes if the two boxes below look exactly the same.
    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDIs in the test's first DIV were SPANs, the ב would be rendered to the left + of the א. +
    +
    +
    [א] > [ב]...
    +
    [א] > [ב]...
    +
    [א] > [ב]...
    +
    [a] > [b]...
    +
    [a] > [b]...
    +
    [a] > [b]...
    +
    +
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭[א] > [ב]...‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    ‭...[b] < [a]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2-ref.html new file mode 100644 index 0000000..9aef97c --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2-ref.html @@ -0,0 +1,47 @@ + + + + + + + + + + +
    Test passes if the two boxes below look exactly the same.
    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2.html new file mode 100644 index 0000000..85aec46 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-another-bdi-2.html @@ -0,0 +1,59 @@ + + + + + HTML Test: BDI: neutral to another immediately following BDI + + + + + + + + + +
    Test passes if the two boxes below look exactly the same.
    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDIs in the test's first DIV were SPANs, the ב would be rendered to the left of + the א. +
    +
    +
    אב...
    +
    אב...
    +
    אב...
    +
    ab...
    +
    ab...
    +
    ab...
    +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1-ref.html new file mode 100644 index 0000000..a34d09b --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1-ref.html @@ -0,0 +1,45 @@ + + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    +
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1.html new file mode 100644 index 0000000..76da57c --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-1.html @@ -0,0 +1,54 @@ + + + + + HTML Test: BDI: neutral to following letter + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDI in the test's first DIV were a SPAN, the ב would be rendered to the left + of the א +
    +
    +
    [א] > ב...
    +
    [א] > ב...
    +
    [א] > ב...
    +
    [a] > b...
    +
    [a] > b...
    +
    [a] > b...
    +
    +
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭[א] > ב...‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    ‭...b < [a]‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2-ref.html new file mode 100644 index 0000000..80f3618 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2-ref.html @@ -0,0 +1,45 @@ + + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2.html new file mode 100644 index 0000000..ce41983 --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-following-2.html @@ -0,0 +1,54 @@ + + + + + HTML Test: BDI: neutral to immediately following letter + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. + If the BDI in the following DIV were a SPAN, the ב would be rendered to the left + of the א +
    +
    +
    אב...
    +
    אב...
    +
    אב...
    +
    ab...
    +
    ab...
    +
    ab...
    +
    +
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭אב...‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    ‭...ba‬
    +
    + + diff --git a/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-preceding-1-ref.html b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-preceding-1-ref.html new file mode 100644 index 0000000..5e39eab --- /dev/null +++ b/documents/html/wpt/the-bdi-element-bdi-neutral-to-letter-preceding-1-ref.html @@ -0,0 +1,45 @@ + + + + + + + + + + +

    Test passes if the two boxes below look exactly the same.

    +
    + Key to entities used below: + א ... ו - The first six Hebrew letters (strongly RTL). + ‭ - The LRO (left-to-right override) formatting character. + ‬ - The PDF (pop directional formatting) formatting character; closes LRO. +
    +
    +
    ‭א > [ב]...‬
    +
    ‭א > [ב]...‬
    +
    ‭א > [ב]...‬
    +
    ‭...[b] < a‬
    +
    ‭...[b] < a‬
    +
    ‭...[b] < a‬
    +
    +
    +
    ‭א > [ב]...‬
    +
    ‭א > [ב]...‬
    +
    ‭א > [ב]...‬
    +
    ‭...[b] < a‬
    +
    ‭...[b] < a‬
    +
    ‭...[b] < a‬
    +
    + + diff --git a/scrape/html_open_sources.py b/scrape/html_open_sources.py new file mode 100644 index 0000000..2cc2a5e --- /dev/null +++ b/scrape/html_open_sources.py @@ -0,0 +1,288 @@ +#!/usr/bin/env python3 +""" +Scrape standalone HTML files from openly-licensed repositories for the +DocSpec test corpus. + +Sources: + 1. MDN Learning Area — CC-BY-SA-2.5 (github.com/mdn/learning-area) + 2. W3C Web Platform Tests — W3C-20150513 (github.com/web-platform-tests/wpt) + 3. HTML5 Boilerplate — MIT (github.com/h5bp/html5-boilerplate) + +Strategy: sparse-clone via git (bypasses GitHub API rate limits), then copy +and validate the HTML files into documents/html//. + +Usage: + scrape/venv/bin/python scrape/html_open_sources.py +""" + +import shutil +import subprocess +import sys +import time +from pathlib import Path +import requests + +sys.path.insert(0, str(Path(__file__).parent)) +from utils import sanitize_filename # noqa: E402 + +REPO_ROOT = Path(__file__).parent.parent +DOCUMENTS_DIR = REPO_ROOT / "documents" / "html" +CLONE_ROOT = Path("/tmp/docspec-html-clones") +MIN_HTML_BYTES = 200 + +SESSION = requests.Session() +SESSION.headers["User-Agent"] = ( + "DocSpec-Corpus-Scraper/1.0 (https://github.com/docspec/documents)" +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def is_valid_html(content: bytes) -> bool: + text = content[:512].decode("utf-8", errors="replace").lstrip() + lower = text.lower() + return lower.startswith(" bool: + dest.parent.mkdir(parents=True, exist_ok=True) + if (dest / ".git").exists(): + print(f" Already cloned: {dest}") + return True + print(f" git clone {url} → {dest}") + r = subprocess.run( + [ + "git", + "clone", + "--depth", + "1", + "--filter=blob:none", + "--sparse", + url, + str(dest), + ], + capture_output=True, + timeout=120, + env={ + **__import__("os").environ, + "GIT_TERMINAL_PROMPT": "0", + "GCM_INTERACTIVE": "never", + }, + ) + if r.returncode != 0: + print(f" Clone failed: {r.stderr.decode()[:200]}") + return False + r = subprocess.run( + ["git", "sparse-checkout", "add", *subdirs], + capture_output=True, + timeout=120, + cwd=dest, + env={ + **__import__("os").environ, + "GIT_TERMINAL_PROMPT": "0", + "GCM_INTERACTIVE": "never", + }, + ) + if r.returncode != 0: + print(f" Sparse-checkout failed: {r.stderr.decode()[:200]}") + return False + return True + + +def unique_dest(out_dir: Path, rel_path: str, seen: set[str]) -> Path: + parts = [p for p in rel_path.replace("\\", "/").split("/") if p] + base = ( + sanitize_filename("-".join(parts[-2:])) + if len(parts) >= 2 + else sanitize_filename(parts[-1]) + ) + stem, _, ext = base.rpartition(".") + ext = ext or "html" + candidate = f"{stem}.{ext}" + n = 0 + while candidate in seen: + n += 1 + candidate = f"{stem}-{n}.{ext}" + seen.add(candidate) + return out_dir / candidate + + +def copy_html_files( + src_dir: Path, rel_prefix: str, out_dir: Path, seen: set[str], limit: int +) -> int: + out_dir.mkdir(parents=True, exist_ok=True) + search_dir = src_dir / rel_prefix if rel_prefix else src_dir + if not search_dir.exists(): + print(f" Source dir not found: {search_dir}") + return 0 + count = 0 + for src in sorted(search_dir.rglob("*.html")): + if count >= limit: + break + content = src.read_bytes() + if len(content) < MIN_HTML_BYTES or not is_valid_html(content): + continue + rel = str(src.relative_to(src_dir)) + dest = unique_dest(out_dir, rel, seen) + shutil.copy2(src, dest) + count += 1 + return count + + +# --------------------------------------------------------------------------- +# Source 1 — MDN Learning Area (CC-BY-SA-2.5) +# --------------------------------------------------------------------------- + +MDN_HTML_SUBDIRS = [ + "html/introduction-to-html", + "html/multimedia-and-embedding", + "html/tables", + "html/advanced-text-formatting", + "html/forms", + "html/css-and-js", +] + + +def scrape_mdn(target: int = 80) -> int: + print("\n── MDN Learning Area (CC-BY-SA-2.5) ──────────────────────────────") + clone_dir = CLONE_ROOT / "mdn-learning-area" + if not git_sparse_clone( + "https://github.com/mdn/learning-area.git", + clone_dir, + MDN_HTML_SUBDIRS, + ): + return 0 + + out_dir = DOCUMENTS_DIR / "mdn-learning-area" + out_dir.mkdir(parents=True, exist_ok=True) + seen: set[str] = {p.name for p in out_dir.glob("*.html")} + + count = 0 + for subdir in MDN_HTML_SUBDIRS: + if count >= target: + break + n = copy_html_files(clone_dir, subdir, out_dir, seen, limit=target - count) + print(f" {subdir}: {n} files") + count += n + + print(f" MDN total: {count}") + return count + + +# --------------------------------------------------------------------------- +# Source 2 — W3C Web Platform Tests (W3C-20150513) +# --------------------------------------------------------------------------- + +WPT_HTML_SUBDIRS = [ + "html/semantics/text-level-semantics", + "html/semantics/grouping-content", + "html/semantics/sections", +] + + +def scrape_wpt(target: int = 30) -> int: + print("\n── W3C Web Platform Tests (W3C-20150513) ─────────────────────────") + clone_dir = CLONE_ROOT / "wpt" + if not git_sparse_clone( + "https://github.com/web-platform-tests/wpt.git", + clone_dir, + WPT_HTML_SUBDIRS, + ): + return 0 + + out_dir = DOCUMENTS_DIR / "wpt" + out_dir.mkdir(parents=True, exist_ok=True) + seen: set[str] = {p.name for p in out_dir.glob("*.html")} + + count = 0 + for subdir in WPT_HTML_SUBDIRS: + if count >= target: + break + n = copy_html_files(clone_dir, subdir, out_dir, seen, limit=target - count) + print(f" {subdir}: {n} files") + count += n + + print(f" WPT total: {count}") + return count + + +# --------------------------------------------------------------------------- +# Source 3 — HTML5 Boilerplate (MIT) — direct raw downloads +# --------------------------------------------------------------------------- + +H5BP_URLS: list[tuple[str, str]] = [ + ( + "https://raw.githubusercontent.com/h5bp/html5-boilerplate/main/src/index.html", + "h5bp-main-index.html", + ), + ( + "https://raw.githubusercontent.com/h5bp/html5-boilerplate/v7.3.0/dist/index.html", + "h5bp-v7-index.html", + ), + ( + "https://raw.githubusercontent.com/h5bp/html5-boilerplate/v6.1.0/dist/index.html", + "h5bp-v6-index.html", + ), +] + + +def scrape_h5bp() -> int: + print("\n── HTML5 Boilerplate (MIT) ────────────────────────────────────────") + out_dir = DOCUMENTS_DIR / "html5-boilerplate" + out_dir.mkdir(parents=True, exist_ok=True) + count = 0 + for raw_url, filename in H5BP_URLS: + dest = out_dir / filename + if dest.exists(): + count += 1 + continue + print(f" GET {filename}") + try: + time.sleep(0.5) + resp = SESSION.get(raw_url, timeout=30) + resp.raise_for_status() + content = resp.content + if len(content) >= MIN_HTML_BYTES and is_valid_html(content): + dest.write_bytes(content) + count += 1 + else: + print(f" Rejected: {filename}") + except requests.RequestException as exc: + print(f" Error: {exc}") + print(f" H5BP total: {count}") + return count + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + + +def count_html() -> int: + return len(list(DOCUMENTS_DIR.rglob("*.html"))) + + +def main() -> None: + print("=" * 65) + print(" DocSpec HTML Corpus Scraper") + print("=" * 65) + CLONE_ROOT.mkdir(parents=True, exist_ok=True) + + scrape_mdn(target=80) + scrape_wpt(target=30) + scrape_h5bp() + + on_disk = count_html() + print(f"\n{'=' * 65}") + print(f"Total HTML files: {on_disk}") + if on_disk < 100: + print(f"⚠️ Only {on_disk} files — check network connectivity and retry") + sys.exit(1) + print(f"✅ {on_disk} ≥ 100 files") + + +if __name__ == "__main__": + main() From 4c2472bb3e4cb57b4f51ab9a3504209222a9f2bc Mon Sep 17 00:00:00 2001 From: Stephan Meijer Date: Wed, 1 Apr 2026 17:52:08 +0200 Subject: [PATCH 08/14] feat(markdown): add Markdown corpus from open-source repositories --- ATTRIBUTION.json | 33 + .../commonmark/about-this-document.md | 37 + documents/markdown/commonmark/atx-headings.md | 19 + documents/markdown/commonmark/autolinks.md | 188 +++ .../markdown/commonmark/backslash-escapes.md | 139 ++ documents/markdown/commonmark/blank-lines.md | 45 + documents/markdown/commonmark/block-quotes.md | 430 ++++++ .../commonmark/characters-and-lines.md | 52 + documents/markdown/commonmark/code-spans.md | 234 +++ .../container-blocks-and-leaf-blocks.md | 13 + .../emphasis-and-strong-emphasis.md | 1365 +++++++++++++++++ ...entity-and-numeric-character-references.md | 212 +++ .../markdown/commonmark/fenced-code-blocks.md | 427 ++++++ documents/markdown/commonmark/foo.md | 34 + .../markdown/commonmark/hard-line-breaks.md | 151 ++ documents/markdown/commonmark/html-blocks.md | 822 ++++++++++ documents/markdown/commonmark/images.md | 228 +++ .../commonmark/indented-code-blocks.md | 201 +++ .../commonmark/insecure-characters.md | 7 + .../commonmark/link-reference-definitions.md | 356 +++++ documents/markdown/commonmark/links.md | 1071 +++++++++++++ documents/markdown/commonmark/list-items.md | 1120 ++++++++++++++ documents/markdown/commonmark/lists.md | 650 ++++++++ documents/markdown/commonmark/overview.md | 39 + documents/markdown/commonmark/paragraphs.md | 111 ++ .../commonmark/phase-1-block-structure.md | 143 ++ .../commonmark/phase-2-inline-structure.md | 169 ++ documents/markdown/commonmark/precedence.md | 27 + documents/markdown/commonmark/raw-html.md | 277 ++++ .../markdown/commonmark/setext-headings.md | 417 +++++ .../markdown/commonmark/soft-line-breaks.md | 36 + documents/markdown/commonmark/tabs.md | 137 ++ .../markdown/commonmark/textual-content.md | 36 + .../markdown/commonmark/thematic-breaks.md | 225 +++ .../markdown/commonmark/what-is-markdown.md | 93 ++ .../commonmark/why-is-a-spec-needed.md | 154 ++ .../markdown/docusaurus/configuration.md | 294 ++++ .../markdown/docusaurus/creating-pages.md | 140 ++ documents/markdown/docusaurus/deployment.md | 942 ++++++++++++ .../markdown/docusaurus/docs-create-doc.md | 202 +++ .../markdown/docusaurus/docs-introduction.md | 120 ++ documents/markdown/docusaurus/installation.md | 194 +++ .../markdown-features-admonitions.md | 420 +++++ .../docusaurus/markdown-features-assets.md | 235 +++ .../markdown-features-code-blocks.md | 848 ++++++++++ .../docusaurus/markdown-features-intro.md | 235 +++ .../docusaurus/markdown-features-links.md | 56 + .../docusaurus/markdown-features-react.md | 373 +++++ .../docusaurus/markdown-features-tabs.md | 378 +++++ documents/markdown/rust-book/appendix-00.md | 4 + .../rust-book/appendix-01-keywords.md | 140 ++ .../rust-book/appendix-02-operators.md | 206 +++ .../rust-book/appendix-03-derivable-traits.md | 185 +++ .../appendix-04-useful-development-tools.md | 169 ++ .../rust-book/appendix-05-editions.md | 59 + .../rust-book/appendix-06-translation.md | 32 + .../rust-book/appendix-07-nightly-rust.md | 206 +++ .../rust-book/ch00-00-introduction.md | 201 +++ .../rust-book/ch01-00-getting-started.md | 8 + .../rust-book/ch01-01-installation.md | 177 +++ .../markdown/rust-book/ch01-02-hello-world.md | 214 +++ .../markdown/rust-book/ch01-03-hello-cargo.md | 261 ++++ .../ch02-00-guessing-game-tutorial.md | 951 ++++++++++++ .../ch03-00-common-programming-concepts.md | 23 + .../ch03-01-variables-and-mutability.md | 192 +++ .../markdown/rust-book/ch03-02-data-types.md | 385 +++++ .../rust-book/ch03-03-how-functions-work.md | 253 +++ .../markdown/rust-book/ch03-04-comments.md | 45 + .../rust-book/ch03-05-control-flow.md | 397 +++++ .../ch04-00-understanding-ownership.md | 7 + .../rust-book/ch04-01-what-is-ownership.md | 522 +++++++ .../ch04-02-references-and-borrowing.md | 263 ++++ .../markdown/rust-book/ch04-03-slices.md | 334 ++++ .../markdown/rust-book/ch05-00-structs.md | 14 + .../rust-book/ch05-01-defining-structs.md | 313 ++++ .../rust-book/ch05-02-example-structs.md | 252 +++ .../rust-book/ch05-03-method-syntax.md | 260 ++++ documents/markdown/rust-book/ch06-00-enums.md | 11 + .../rust-book/ch06-01-defining-an-enum.md | 330 ++++ documents/markdown/rust-book/ch06-02-match.md | 265 ++++ .../markdown/rust-book/ch06-03-if-let.md | 147 ++ ...ojects-with-packages-crates-and-modules.md | 52 + .../rust-book/ch07-01-packages-and-crates.md | 72 + ...ng-modules-to-control-scope-and-privacy.md | 180 +++ ...referring-to-an-item-in-the-module-tree.md | 293 ++++ ...g-paths-into-scope-with-the-use-keyword.md | 314 ++++ ...separating-modules-into-different-files.md | 129 ++ .../rust-book/ch08-00-common-collections.md | 25 + .../markdown/rust-book/ch08-01-vectors.md | 260 ++++ .../markdown/rust-book/ch08-02-strings.md | 447 ++++++ .../markdown/rust-book/ch08-03-hash-maps.md | 252 +++ .../rust-book/ch09-00-error-handling.md | 24 + ...ch09-01-unrecoverable-errors-with-panic.md | 170 ++ .../ch09-02-recoverable-errors-with-result.md | 546 +++++++ .../ch09-03-to-panic-or-not-to-panic.md | 236 +++ .../markdown/rust-book/ch10-00-generics.md | 115 ++ .../markdown/rust-book/ch10-01-syntax.md | 323 ++++ .../markdown/rust-book/ch10-02-traits.md | 404 +++++ .../rust-book/ch10-03-lifetime-syntax.md | 641 ++++++++ .../markdown/rust-book/ch11-00-testing.md | 34 + .../rust-book/ch11-01-writing-tests.md | 557 +++++++ .../rust-book/ch11-02-running-tests.md | 188 +++ .../rust-book/ch11-03-test-organization.md | 266 ++++ .../rust-book/ch12-00-an-io-project.md | 48 + ...h12-01-accepting-command-line-arguments.md | 133 ++ .../rust-book/ch12-02-reading-a-file.md | 56 + ...improving-error-handling-and-modularity.md | 513 +++++++ ...2-04-testing-the-librarys-functionality.md | 218 +++ ...2-05-working-with-environment-variables.md | 204 +++ ...-06-writing-to-stderr-instead-of-stdout.md | 112 ++ .../rust-book/ch13-00-functional-features.md | 24 + .../markdown/rust-book/ch13-01-closures.md | 426 +++++ .../markdown/rust-book/ch13-02-iterators.md | 229 +++ .../ch13-03-improving-our-io-project.md | 193 +++ .../markdown/rust-book/ch13-04-performance.md | 57 + .../rust-book/ch14-00-more-about-cargo.md | 14 + .../rust-book/ch14-01-release-profiles.md | 75 + .../ch14-02-publishing-to-crates-io.md | 481 ++++++ .../rust-book/ch14-03-cargo-workspaces.md | 389 +++++ .../rust-book/ch14-04-installing-binaries.md | 48 + .../rust-book/ch14-05-extending-cargo.md | 17 + .../rust-book/ch15-00-smart-pointers.md | 46 + documents/markdown/rust-book/ch15-01-box.md | 263 ++++ documents/markdown/rust-book/ch15-02-deref.md | 312 ++++ documents/markdown/rust-book/ch15-03-drop.md | 146 ++ documents/markdown/rust-book/ch15-04-rc.md | 176 +++ .../rust-book/ch15-05-interior-mutability.md | 361 +++++ .../rust-book/ch15-06-reference-cycles.md | 323 ++++ .../markdown/rust-book/ch16-00-concurrency.md | 49 + .../markdown/rust-book/ch16-01-threads.md | 284 ++++ .../rust-book/ch16-02-message-passing.md | 267 ++++ .../rust-book/ch16-03-shared-state.md | 255 +++ ...04-extensible-concurrency-sync-and-send.md | 101 ++ .../markdown/rust-book/ch17-00-async-await.md | 167 ++ .../rust-book/ch17-01-futures-and-syntax.md | 405 +++++ .../ch17-02-concurrency-with-async.md | 421 +++++ .../rust-book/ch17-03-more-futures.md | 249 +++ .../markdown/rust-book/ch17-04-streams.md | 112 ++ .../rust-book/ch17-05-traits-for-async.md | 542 +++++++ .../ch17-06-futures-tasks-threads.md | 105 ++ documents/markdown/rust-book/ch18-00-oop.md | 17 + .../markdown/rust-book/ch18-01-what-is-oo.md | 149 ++ .../rust-book/ch18-02-trait-objects.md | 255 +++ .../rust-book/ch18-03-oo-design-patterns.md | 546 +++++++ .../markdown/rust-book/ch19-00-patterns.md | 29 + .../ch19-01-all-the-places-for-patterns.md | 266 ++++ .../rust-book/ch19-02-refutability.md | 93 ++ .../rust-book/ch19-03-pattern-syntax.md | 639 ++++++++ .../rust-book/ch20-00-advanced-features.md | 22 + .../markdown/rust-book/ch20-01-unsafe-rust.md | 572 +++++++ .../rust-book/ch20-02-advanced-traits.md | 482 ++++++ .../rust-book/ch20-03-advanced-types.md | 303 ++++ ...ch20-04-advanced-functions-and-closures.md | 178 +++ .../markdown/rust-book/ch20-05-macros.md | 530 +++++++ .../ch21-00-final-project-a-web-server.md | 41 + .../rust-book/ch21-01-single-threaded.md | 473 ++++++ .../rust-book/ch21-02-multithreaded.md | 728 +++++++++ .../ch21-03-graceful-shutdown-and-cleanup.md | 238 +++ documents/markdown/rust-book/foreword.md | 47 + documents/markdown/rust-book/title-page.md | 30 + scrape/markdown_github.py | 309 ++++ 161 files changed, 40257 insertions(+) create mode 100644 documents/markdown/commonmark/about-this-document.md create mode 100644 documents/markdown/commonmark/atx-headings.md create mode 100644 documents/markdown/commonmark/autolinks.md create mode 100644 documents/markdown/commonmark/backslash-escapes.md create mode 100644 documents/markdown/commonmark/blank-lines.md create mode 100644 documents/markdown/commonmark/block-quotes.md create mode 100644 documents/markdown/commonmark/characters-and-lines.md create mode 100644 documents/markdown/commonmark/code-spans.md create mode 100644 documents/markdown/commonmark/container-blocks-and-leaf-blocks.md create mode 100644 documents/markdown/commonmark/emphasis-and-strong-emphasis.md create mode 100644 documents/markdown/commonmark/entity-and-numeric-character-references.md create mode 100644 documents/markdown/commonmark/fenced-code-blocks.md create mode 100644 documents/markdown/commonmark/foo.md create mode 100644 documents/markdown/commonmark/hard-line-breaks.md create mode 100644 documents/markdown/commonmark/html-blocks.md create mode 100644 documents/markdown/commonmark/images.md create mode 100644 documents/markdown/commonmark/indented-code-blocks.md create mode 100644 documents/markdown/commonmark/insecure-characters.md create mode 100644 documents/markdown/commonmark/link-reference-definitions.md create mode 100644 documents/markdown/commonmark/links.md create mode 100644 documents/markdown/commonmark/list-items.md create mode 100644 documents/markdown/commonmark/lists.md create mode 100644 documents/markdown/commonmark/overview.md create mode 100644 documents/markdown/commonmark/paragraphs.md create mode 100644 documents/markdown/commonmark/phase-1-block-structure.md create mode 100644 documents/markdown/commonmark/phase-2-inline-structure.md create mode 100644 documents/markdown/commonmark/precedence.md create mode 100644 documents/markdown/commonmark/raw-html.md create mode 100644 documents/markdown/commonmark/setext-headings.md create mode 100644 documents/markdown/commonmark/soft-line-breaks.md create mode 100644 documents/markdown/commonmark/tabs.md create mode 100644 documents/markdown/commonmark/textual-content.md create mode 100644 documents/markdown/commonmark/thematic-breaks.md create mode 100644 documents/markdown/commonmark/what-is-markdown.md create mode 100644 documents/markdown/commonmark/why-is-a-spec-needed.md create mode 100644 documents/markdown/docusaurus/configuration.md create mode 100644 documents/markdown/docusaurus/creating-pages.md create mode 100644 documents/markdown/docusaurus/deployment.md create mode 100644 documents/markdown/docusaurus/docs-create-doc.md create mode 100644 documents/markdown/docusaurus/docs-introduction.md create mode 100644 documents/markdown/docusaurus/installation.md create mode 100644 documents/markdown/docusaurus/markdown-features-admonitions.md create mode 100644 documents/markdown/docusaurus/markdown-features-assets.md create mode 100644 documents/markdown/docusaurus/markdown-features-code-blocks.md create mode 100644 documents/markdown/docusaurus/markdown-features-intro.md create mode 100644 documents/markdown/docusaurus/markdown-features-links.md create mode 100644 documents/markdown/docusaurus/markdown-features-react.md create mode 100644 documents/markdown/docusaurus/markdown-features-tabs.md create mode 100644 documents/markdown/rust-book/appendix-00.md create mode 100644 documents/markdown/rust-book/appendix-01-keywords.md create mode 100644 documents/markdown/rust-book/appendix-02-operators.md create mode 100644 documents/markdown/rust-book/appendix-03-derivable-traits.md create mode 100644 documents/markdown/rust-book/appendix-04-useful-development-tools.md create mode 100644 documents/markdown/rust-book/appendix-05-editions.md create mode 100644 documents/markdown/rust-book/appendix-06-translation.md create mode 100644 documents/markdown/rust-book/appendix-07-nightly-rust.md create mode 100644 documents/markdown/rust-book/ch00-00-introduction.md create mode 100644 documents/markdown/rust-book/ch01-00-getting-started.md create mode 100644 documents/markdown/rust-book/ch01-01-installation.md create mode 100644 documents/markdown/rust-book/ch01-02-hello-world.md create mode 100644 documents/markdown/rust-book/ch01-03-hello-cargo.md create mode 100644 documents/markdown/rust-book/ch02-00-guessing-game-tutorial.md create mode 100644 documents/markdown/rust-book/ch03-00-common-programming-concepts.md create mode 100644 documents/markdown/rust-book/ch03-01-variables-and-mutability.md create mode 100644 documents/markdown/rust-book/ch03-02-data-types.md create mode 100644 documents/markdown/rust-book/ch03-03-how-functions-work.md create mode 100644 documents/markdown/rust-book/ch03-04-comments.md create mode 100644 documents/markdown/rust-book/ch03-05-control-flow.md create mode 100644 documents/markdown/rust-book/ch04-00-understanding-ownership.md create mode 100644 documents/markdown/rust-book/ch04-01-what-is-ownership.md create mode 100644 documents/markdown/rust-book/ch04-02-references-and-borrowing.md create mode 100644 documents/markdown/rust-book/ch04-03-slices.md create mode 100644 documents/markdown/rust-book/ch05-00-structs.md create mode 100644 documents/markdown/rust-book/ch05-01-defining-structs.md create mode 100644 documents/markdown/rust-book/ch05-02-example-structs.md create mode 100644 documents/markdown/rust-book/ch05-03-method-syntax.md create mode 100644 documents/markdown/rust-book/ch06-00-enums.md create mode 100644 documents/markdown/rust-book/ch06-01-defining-an-enum.md create mode 100644 documents/markdown/rust-book/ch06-02-match.md create mode 100644 documents/markdown/rust-book/ch06-03-if-let.md create mode 100644 documents/markdown/rust-book/ch07-00-managing-growing-projects-with-packages-crates-and-modules.md create mode 100644 documents/markdown/rust-book/ch07-01-packages-and-crates.md create mode 100644 documents/markdown/rust-book/ch07-02-defining-modules-to-control-scope-and-privacy.md create mode 100644 documents/markdown/rust-book/ch07-03-paths-for-referring-to-an-item-in-the-module-tree.md create mode 100644 documents/markdown/rust-book/ch07-04-bringing-paths-into-scope-with-the-use-keyword.md create mode 100644 documents/markdown/rust-book/ch07-05-separating-modules-into-different-files.md create mode 100644 documents/markdown/rust-book/ch08-00-common-collections.md create mode 100644 documents/markdown/rust-book/ch08-01-vectors.md create mode 100644 documents/markdown/rust-book/ch08-02-strings.md create mode 100644 documents/markdown/rust-book/ch08-03-hash-maps.md create mode 100644 documents/markdown/rust-book/ch09-00-error-handling.md create mode 100644 documents/markdown/rust-book/ch09-01-unrecoverable-errors-with-panic.md create mode 100644 documents/markdown/rust-book/ch09-02-recoverable-errors-with-result.md create mode 100644 documents/markdown/rust-book/ch09-03-to-panic-or-not-to-panic.md create mode 100644 documents/markdown/rust-book/ch10-00-generics.md create mode 100644 documents/markdown/rust-book/ch10-01-syntax.md create mode 100644 documents/markdown/rust-book/ch10-02-traits.md create mode 100644 documents/markdown/rust-book/ch10-03-lifetime-syntax.md create mode 100644 documents/markdown/rust-book/ch11-00-testing.md create mode 100644 documents/markdown/rust-book/ch11-01-writing-tests.md create mode 100644 documents/markdown/rust-book/ch11-02-running-tests.md create mode 100644 documents/markdown/rust-book/ch11-03-test-organization.md create mode 100644 documents/markdown/rust-book/ch12-00-an-io-project.md create mode 100644 documents/markdown/rust-book/ch12-01-accepting-command-line-arguments.md create mode 100644 documents/markdown/rust-book/ch12-02-reading-a-file.md create mode 100644 documents/markdown/rust-book/ch12-03-improving-error-handling-and-modularity.md create mode 100644 documents/markdown/rust-book/ch12-04-testing-the-librarys-functionality.md create mode 100644 documents/markdown/rust-book/ch12-05-working-with-environment-variables.md create mode 100644 documents/markdown/rust-book/ch12-06-writing-to-stderr-instead-of-stdout.md create mode 100644 documents/markdown/rust-book/ch13-00-functional-features.md create mode 100644 documents/markdown/rust-book/ch13-01-closures.md create mode 100644 documents/markdown/rust-book/ch13-02-iterators.md create mode 100644 documents/markdown/rust-book/ch13-03-improving-our-io-project.md create mode 100644 documents/markdown/rust-book/ch13-04-performance.md create mode 100644 documents/markdown/rust-book/ch14-00-more-about-cargo.md create mode 100644 documents/markdown/rust-book/ch14-01-release-profiles.md create mode 100644 documents/markdown/rust-book/ch14-02-publishing-to-crates-io.md create mode 100644 documents/markdown/rust-book/ch14-03-cargo-workspaces.md create mode 100644 documents/markdown/rust-book/ch14-04-installing-binaries.md create mode 100644 documents/markdown/rust-book/ch14-05-extending-cargo.md create mode 100644 documents/markdown/rust-book/ch15-00-smart-pointers.md create mode 100644 documents/markdown/rust-book/ch15-01-box.md create mode 100644 documents/markdown/rust-book/ch15-02-deref.md create mode 100644 documents/markdown/rust-book/ch15-03-drop.md create mode 100644 documents/markdown/rust-book/ch15-04-rc.md create mode 100644 documents/markdown/rust-book/ch15-05-interior-mutability.md create mode 100644 documents/markdown/rust-book/ch15-06-reference-cycles.md create mode 100644 documents/markdown/rust-book/ch16-00-concurrency.md create mode 100644 documents/markdown/rust-book/ch16-01-threads.md create mode 100644 documents/markdown/rust-book/ch16-02-message-passing.md create mode 100644 documents/markdown/rust-book/ch16-03-shared-state.md create mode 100644 documents/markdown/rust-book/ch16-04-extensible-concurrency-sync-and-send.md create mode 100644 documents/markdown/rust-book/ch17-00-async-await.md create mode 100644 documents/markdown/rust-book/ch17-01-futures-and-syntax.md create mode 100644 documents/markdown/rust-book/ch17-02-concurrency-with-async.md create mode 100644 documents/markdown/rust-book/ch17-03-more-futures.md create mode 100644 documents/markdown/rust-book/ch17-04-streams.md create mode 100644 documents/markdown/rust-book/ch17-05-traits-for-async.md create mode 100644 documents/markdown/rust-book/ch17-06-futures-tasks-threads.md create mode 100644 documents/markdown/rust-book/ch18-00-oop.md create mode 100644 documents/markdown/rust-book/ch18-01-what-is-oo.md create mode 100644 documents/markdown/rust-book/ch18-02-trait-objects.md create mode 100644 documents/markdown/rust-book/ch18-03-oo-design-patterns.md create mode 100644 documents/markdown/rust-book/ch19-00-patterns.md create mode 100644 documents/markdown/rust-book/ch19-01-all-the-places-for-patterns.md create mode 100644 documents/markdown/rust-book/ch19-02-refutability.md create mode 100644 documents/markdown/rust-book/ch19-03-pattern-syntax.md create mode 100644 documents/markdown/rust-book/ch20-00-advanced-features.md create mode 100644 documents/markdown/rust-book/ch20-01-unsafe-rust.md create mode 100644 documents/markdown/rust-book/ch20-02-advanced-traits.md create mode 100644 documents/markdown/rust-book/ch20-03-advanced-types.md create mode 100644 documents/markdown/rust-book/ch20-04-advanced-functions-and-closures.md create mode 100644 documents/markdown/rust-book/ch20-05-macros.md create mode 100644 documents/markdown/rust-book/ch21-00-final-project-a-web-server.md create mode 100644 documents/markdown/rust-book/ch21-01-single-threaded.md create mode 100644 documents/markdown/rust-book/ch21-02-multithreaded.md create mode 100644 documents/markdown/rust-book/ch21-03-graceful-shutdown-and-cleanup.md create mode 100644 documents/markdown/rust-book/foreword.md create mode 100644 documents/markdown/rust-book/title-page.md create mode 100644 scrape/markdown_github.py diff --git a/ATTRIBUTION.json b/ATTRIBUTION.json index b0f84f7..31a179e 100644 --- a/ATTRIBUTION.json +++ b/ATTRIBUTION.json @@ -161,5 +161,38 @@ ], "donated": "2026-04-01", "notes": "Starter HTML templates from the HTML5 Boilerplate project (v6, v7, main)" + }, + { + "format": "markdown", + "path": "documents/markdown/commonmark/*.md", + "title": "CommonMark Spec Sections", + "author": "John MacFarlane and CommonMark contributors", + "license": "CC-BY-SA-4.0", + "source": "https://github.com/commonmark/commonmark-spec", + "tags": ["commonmark", "spec", "markdown"], + "donated": "2026-04-01", + "notes": "Sections extracted from the CommonMark specification (spec.txt)" + }, + { + "format": "markdown", + "path": "documents/markdown/rust-book/*.md", + "title": "The Rust Programming Language Book", + "author": "Steve Klabnik, Carol Nichols, and the Rust community", + "license": "CC-BY-4.0", + "source": "https://github.com/rust-lang/book", + "tags": ["rust", "book", "programming"], + "donated": "2026-04-01", + "notes": "Chapter files from the official Rust programming language book" + }, + { + "format": "markdown", + "path": "documents/markdown/docusaurus/*.md", + "title": "Docusaurus Documentation", + "author": "Meta Platforms, Inc. and Docusaurus contributors", + "license": "MIT", + "source": "https://github.com/facebook/docusaurus", + "tags": ["docusaurus", "documentation", "static-site"], + "donated": "2026-04-01", + "notes": "Documentation pages from the Docusaurus website docs (MDX saved as .md)" } ] diff --git a/documents/markdown/commonmark/about-this-document.md b/documents/markdown/commonmark/about-this-document.md new file mode 100644 index 0000000..b7e82c2 --- /dev/null +++ b/documents/markdown/commonmark/about-this-document.md @@ -0,0 +1,37 @@ +## About this document + + +This document attempts to specify Markdown syntax unambiguously. +It contains many examples with side-by-side Markdown and +HTML. These are intended to double as conformance tests. An +accompanying script `spec_tests.py` can be used to run the tests +against any Markdown program: + + python test/spec_tests.py --spec spec.txt --program PROGRAM + +Since this document describes how Markdown is to be parsed into +an abstract syntax tree, it would have made sense to use an abstract +representation of the syntax tree instead of HTML. But HTML is capable +of representing the structural distinctions we need to make, and the +choice of HTML for the tests makes it possible to run the tests against +an implementation without writing an abstract syntax tree renderer. + +Note that not every feature of the HTML samples is mandated by +the spec. For example, the spec says what counts as a link +destination, but it doesn't mandate that non-ASCII characters in +the URL be percent-encoded. To use the automatic tests, +implementers will need to provide a renderer that conforms to +the expectations of the spec examples (percent-encoding +non-ASCII characters in URLs). But a conforming implementation +can use a different renderer and may choose not to +percent-encode non-ASCII characters in URLs. + +This document is generated from a text file, `spec.txt`, written +in Markdown with a small extension for the side-by-side tests. +The script `tools/makespec.py` can be used to convert `spec.txt` into +HTML or CommonMark (which can then be converted into other formats). + +In the examples, the `→` character is used to represent tabs. + +# Preliminaries + diff --git a/documents/markdown/commonmark/atx-headings.md b/documents/markdown/commonmark/atx-headings.md new file mode 100644 index 0000000..bb78b3f --- /dev/null +++ b/documents/markdown/commonmark/atx-headings.md @@ -0,0 +1,19 @@ +## ATX headings + + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by spaces or tabs, or +by the end of line. The optional closing sequence of `#`s must be preceded by +spaces or tabs and may be followed by spaces or tabs only. The opening +`#` character may be preceded by up to three spaces of indentation. The raw +contents of the heading are stripped of leading and trailing space or tabs +before being parsed as inline content. The heading level is equal to the number +of `#` characters in the opening sequence. + +Simple headings: + +```````````````````````````````` example +# foo diff --git a/documents/markdown/commonmark/autolinks.md b/documents/markdown/commonmark/autolinks.md new file mode 100644 index 0000000..a215e0c --- /dev/null +++ b/documents/markdown/commonmark/autolinks.md @@ -0,0 +1,188 @@ +## Autolinks + + +[Autolink](@)s are absolute URIs and email addresses inside +`<` and `>`. They are parsed as links, with the URL or email address +as the link label. + +A [URI autolink](@) consists of `<`, followed by an +[absolute URI] followed by `>`. It is parsed as +a link to the URI, with the URI as the link's label. + +An [absolute URI](@), +for these purposes, consists of a [scheme] followed by a colon (`:`) +followed by zero or more characters other than [ASCII control +characters][ASCII control character], [space], `<`, and `>`. +If the URI includes these characters, they must be percent-encoded +(e.g. `%20` for a space). + +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). + +Here are some valid autolinks: + +```````````````````````````````` example + +. +

    http://foo.bar.baz

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    https://foo.bar.baz/test?q=hello&id=22&boolean

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    irc://foo.bar:2233/baz

    +```````````````````````````````` + + +Uppercase is also fine: + +```````````````````````````````` example + +. +

    MAILTO:FOO@BAR.BAZ

    +```````````````````````````````` + + +Note that many strings that count as [absolute URIs] for +purposes of this spec are not valid URIs, because their +schemes are not registered or because of other problems +with their syntax: + +```````````````````````````````` example + +. +

    a+b+c:d

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    made-up-scheme://foo,bar

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    https://../

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    localhost:5001/foo

    +```````````````````````````````` + + +Spaces are not allowed in autolinks: + +```````````````````````````````` example + +. +

    <https://foo.bar/baz bim>

    +```````````````````````````````` + + +Backslash-escapes do not work inside autolinks: + +```````````````````````````````` example + +. +

    https://example.com/\[\

    +```````````````````````````````` + + +An [email autolink](@) +consists of `<`, followed by an [email address], +followed by `>`. The link's label is the email address, +and the URL is `mailto:` followed by the email address. + +An [email address](@), +for these purposes, is anything that matches +the [non-normative regex from the HTML5 +spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): + + /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + +Examples of email autolinks: + +```````````````````````````````` example + +. +

    foo@bar.example.com

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    foo+special@Bar.baz-bar0.com

    +```````````````````````````````` + + +Backslash-escapes do not work inside email autolinks: + +```````````````````````````````` example + +. +

    <foo+@bar.example.com>

    +```````````````````````````````` + + +These are not autolinks: + +```````````````````````````````` example +<> +. +

    <>

    +```````````````````````````````` + + +```````````````````````````````` example +< https://foo.bar > +. +

    < https://foo.bar >

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    <m:abc>

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    <foo.bar.baz>

    +```````````````````````````````` + + +```````````````````````````````` example +https://example.com +. +

    https://example.com

    +```````````````````````````````` + + +```````````````````````````````` example +foo@bar.example.com +. +

    foo@bar.example.com

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/backslash-escapes.md b/documents/markdown/commonmark/backslash-escapes.md new file mode 100644 index 0000000..b9bbf34 --- /dev/null +++ b/documents/markdown/commonmark/backslash-escapes.md @@ -0,0 +1,139 @@ +## Backslash escapes + + +Any ASCII punctuation character may be backslash-escaped: + +```````````````````````````````` example +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ +. +

    !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

    +```````````````````````````````` + + +Backslashes before other characters are treated as literal +backslashes: + +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

    \→\A\a\ \3\φ\«

    +```````````````````````````````` + + +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: + +```````````````````````````````` example +\*not emphasized* +\
    not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +\ö not a character entity +. +

    *not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference" +&ouml; not a character entity

    +```````````````````````````````` + + +If a backslash is itself escaped, the following character is not: + +```````````````````````````````` example +\\*emphasis* +. +

    \emphasis

    +```````````````````````````````` + + +A backslash at the end of the line is a [hard line break]: + +```````````````````````````````` example +foo\ +bar +. +

    foo
    +bar

    +```````````````````````````````` + + +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: + +```````````````````````````````` example +`` \[\` `` +. +

    \[\`

    +```````````````````````````````` + + +```````````````````````````````` example + \[\] +. +
    \[\]
    +
    +```````````````````````````````` + + +```````````````````````````````` example +~~~ +\[\] +~~~ +. +
    \[\]
    +
    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    https://example.com?find=\*

    +```````````````````````````````` + + +```````````````````````````````` example + +. + +```````````````````````````````` + + +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: + +```````````````````````````````` example +[foo](/bar\* "ti\*tle") +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /bar\* "ti\*tle" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +``` foo\+bar +foo +``` +. +
    foo
    +
    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/blank-lines.md b/documents/markdown/commonmark/blank-lines.md new file mode 100644 index 0000000..2980d6b --- /dev/null +++ b/documents/markdown/commonmark/blank-lines.md @@ -0,0 +1,45 @@ +## Blank lines + + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

    aaa

    +

    aaa

    +```````````````````````````````` + + + +# Container blocks + +A [container block](#container-blocks) is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + diff --git a/documents/markdown/commonmark/block-quotes.md b/documents/markdown/commonmark/block-quotes.md new file mode 100644 index 0000000..5b03243 --- /dev/null +++ b/documents/markdown/commonmark/block-quotes.md @@ -0,0 +1,430 @@ +## Block quotes + + +A [block quote marker](@), +optionally preceded by up to three spaces of indentation, +consists of (a) the character `>` together with a following space of +indentation, or (b) a single character `>` not followed by a space of +indentation. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next character other than a space or tab after the + [block quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo +> bar +> baz +. +
    +

    Foo

    +

    bar +baz

    +
    +```````````````````````````````` + + +The space or tab after the `>` characters can be omitted: + +```````````````````````````````` example +># Foo +>bar +> baz +. +
    +

    Foo

    +

    bar +baz

    +
    +```````````````````````````````` + + +The `>` characters can be preceded by up to three spaces of indentation: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
    +

    Foo

    +

    bar +baz

    +
    +```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
    > # Foo
    +> bar
    +> baz
    +
    +```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: + +```````````````````````````````` example +> # Foo +> bar +baz +. +
    +

    Foo

    +

    bar +baz

    +
    +```````````````````````````````` + + +A block quote can contain some lazy and some non-lazy +continuation lines: + +```````````````````````````````` example +> bar +baz +> foo +. +
    +

    bar +baz +foo

    +
    +```````````````````````````````` + + +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: + +```````````````````````````````` example +> foo +--- +. +
    +

    foo

    +
    +
    +```````````````````````````````` + + +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + +```````````````````````````````` example +> - foo +- bar +. +
    +
      +
    • foo
    • +
    +
    +
      +
    • bar
    • +
    +```````````````````````````````` + + +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: + +```````````````````````````````` example +> foo + bar +. +
    +
    foo
    +
    +
    +
    bar
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +. +
    +
    +
    +

    foo

    +
    +```````````````````````````````` + + +Note that in the following case, we have a [lazy +continuation line]: + +```````````````````````````````` example +> foo + - bar +. +
    +

    foo +- bar

    +
    +```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
    +
    +```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
    +

    foo

    +
    +```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
    +

    foo

    +
    +
    +

    bar

    +
    +```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
    +

    foo +bar

    +
    +```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
    +

    foo

    +

    bar

    +
    +```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

    foo

    +
    +

    bar

    +
    +```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
    +

    aaa

    +
    +
    +
    +

    bbb

    +
    +```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
    +

    bar +baz

    +
    +```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
    +

    bar

    +
    +

    baz

    +```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
    +

    bar

    +
    +

    baz

    +```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
    +
    +
    +

    foo +bar

    +
    +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
    +
    +
    +

    foo +bar +baz

    +
    +
    +
    +```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space of indentation. So *five spaces* are needed +after the `>`: + +```````````````````````````````` example +> code + +> not code +. +
    +
    code
    +
    +
    +
    +

    not code

    +
    +```````````````````````````````` + + + diff --git a/documents/markdown/commonmark/characters-and-lines.md b/documents/markdown/commonmark/characters-and-lines.md new file mode 100644 index 0000000..e32ea0d --- /dev/null +++ b/documents/markdown/commonmark/characters-and-lines.md @@ -0,0 +1,52 @@ +## Characters and lines + + +Any sequence of [characters] is a valid CommonMark +document. + +A [character](@) is a Unicode code point. Although some +code points (for example, combining accents) do not correspond to +characters in an intuitive sense, all code points count as characters +for purposes of this spec. + +This spec does not specify an encoding; it thinks of lines as composed +of [characters] rather than bytes. A conforming parser may be limited +to a certain encoding. + +A [line](@) is a sequence of zero or more [characters] +other than line feed (`U+000A`) or carriage return (`U+000D`), +followed by a [line ending] or by the end of file. + +A [line ending](@) is a line feed (`U+000A`), a carriage return +(`U+000D`) not followed by a line feed, or a carriage return and a +following line feed. + +A line containing no characters, or a line containing only spaces +(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). + +The following definitions of character classes will be used in this spec: + +A [Unicode whitespace character](@) is a character in the Unicode `Zs` general +category, or a tab (`U+0009`), line feed (`U+000A`), form feed (`U+000C`), or +carriage return (`U+000D`). + +[Unicode whitespace](@) is a sequence of one or more +[Unicode whitespace characters]. + +A [tab](@) is `U+0009`. + +A [space](@) is `U+0020`. + +An [ASCII control character](@) is a character between `U+0000–1F` (both +including) or `U+007F`. + +An [ASCII punctuation character](@) +is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, +`*`, `+`, `,`, `-`, `.`, `/` (U+0021–2F), +`:`, `;`, `<`, `=`, `>`, `?`, `@` (U+003A–0040), +`[`, `\`, `]`, `^`, `_`, `` ` `` (U+005B–0060), +`{`, `|`, `}`, or `~` (U+007B–007E). + +A [Unicode punctuation character](@) is a character in the Unicode `P` +(punctuation) or `S` (symbol) general categories. + diff --git a/documents/markdown/commonmark/code-spans.md b/documents/markdown/commonmark/code-spans.md new file mode 100644 index 0000000..cbee4d5 --- /dev/null +++ b/documents/markdown/commonmark/code-spans.md @@ -0,0 +1,234 @@ +## Code spans + + +A [backtick string](@) +is a string of one or more backtick characters (`` ` ``) that is neither +preceded nor followed by a backtick. + +A [code span](@) begins with a backtick string and ends with +a backtick string of equal length. The contents of the code span are +the characters between these two backtick strings, normalized in the +following ways: + +- First, [line endings] are converted to [spaces]. +- If the resulting string both begins *and* ends with a [space] + character, but does not consist entirely of [space] + characters, a single [space] character is removed from the + front and back. This allows you to include code that begins + or ends with backtick characters, which must be separated by + whitespace from the opening or closing backtick strings. + +This is a simple code span: + +```````````````````````````````` example +`foo` +. +

    foo

    +```````````````````````````````` + + +Here two backticks are used, because the code contains a backtick. +This example also illustrates stripping of a single leading and +trailing space: + +```````````````````````````````` example +`` foo ` bar `` +. +

    foo ` bar

    +```````````````````````````````` + + +This example shows the motivation for stripping leading and trailing +spaces: + +```````````````````````````````` example +` `` ` +. +

    ``

    +```````````````````````````````` + +Note that only *one* space is stripped: + +```````````````````````````````` example +` `` ` +. +

    ``

    +```````````````````````````````` + +The stripping only happens if the space is on both +sides of the string: + +```````````````````````````````` example +` a` +. +

    a

    +```````````````````````````````` + +Only [spaces], and not [unicode whitespace] in general, are +stripped in this way: + +```````````````````````````````` example +` b ` +. +

     b 

    +```````````````````````````````` + +No stripping occurs if the code span contains only spaces: + +```````````````````````````````` example +` ` +` ` +. +

      +

    +```````````````````````````````` + + +[Line endings] are treated like spaces: + +```````````````````````````````` example +`` +foo +bar +baz +`` +. +

    foo bar baz

    +```````````````````````````````` + +```````````````````````````````` example +`` +foo +`` +. +

    foo

    +```````````````````````````````` + + +Interior spaces are not collapsed: + +```````````````````````````````` example +`foo bar +baz` +. +

    foo bar baz

    +```````````````````````````````` + +Note that browsers will typically collapse consecutive spaces +when rendering `` elements, so it is recommended that +the following CSS be used: + + code{white-space: pre-wrap;} + + +Note that backslash escapes do not work in code spans. All backslashes +are treated literally: + +```````````````````````````````` example +`foo\`bar` +. +

    foo\bar`

    +```````````````````````````````` + + +Backslash escapes are never needed, because one can always choose a +string of *n* backtick characters as delimiters, where the code does +not contain any strings of exactly *n* backtick characters. + +```````````````````````````````` example +``foo`bar`` +. +

    foo`bar

    +```````````````````````````````` + +```````````````````````````````` example +` foo `` bar ` +. +

    foo `` bar

    +```````````````````````````````` + + +Code span backticks have higher precedence than any other inline +constructs except HTML tags and autolinks. Thus, for example, this is +not parsed as emphasized text, since the second `*` is part of a code +span: + +```````````````````````````````` example +*foo`*` +. +

    *foo*

    +```````````````````````````````` + + +And this is not parsed as a link: + +```````````````````````````````` example +[not a `link](/foo`) +. +

    [not a link](/foo)

    +```````````````````````````````` + + +Code spans, HTML tags, and autolinks have the same precedence. +Thus, this is code: + +```````````````````````````````` example +`` +. +

    <a href="">`

    +```````````````````````````````` + + +But this is an HTML tag: + +```````````````````````````````` example +
    ` +. +

    `

    +```````````````````````````````` + + +And this is code: + +```````````````````````````````` example +`` +. +

    <https://foo.bar.baz>`

    +```````````````````````````````` + + +But this is an autolink: + +```````````````````````````````` example +` +. +

    https://foo.bar.`baz`

    +```````````````````````````````` + + +When a backtick string is not closed by a matching backtick string, +we just have literal backticks: + +```````````````````````````````` example +```foo`` +. +

    ```foo``

    +```````````````````````````````` + + +```````````````````````````````` example +`foo +. +

    `foo

    +```````````````````````````````` + +The following case also illustrates the need for opening and +closing backtick strings to be equal in length: + +```````````````````````````````` example +`foo``bar`` +. +

    `foobar

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/container-blocks-and-leaf-blocks.md b/documents/markdown/commonmark/container-blocks-and-leaf-blocks.md new file mode 100644 index 0000000..91712b9 --- /dev/null +++ b/documents/markdown/commonmark/container-blocks-and-leaf-blocks.md @@ -0,0 +1,13 @@ +## Container blocks and leaf blocks + + +We can divide blocks into two types: +[container blocks](#container-blocks), +which can contain other blocks, and [leaf blocks](#leaf-blocks), +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + diff --git a/documents/markdown/commonmark/emphasis-and-strong-emphasis.md b/documents/markdown/commonmark/emphasis-and-strong-emphasis.md new file mode 100644 index 0000000..a6a45a2 --- /dev/null +++ b/documents/markdown/commonmark/emphasis-and-strong-emphasis.md @@ -0,0 +1,1365 @@ +## Emphasis and strong emphasis + + +John Gruber's original [Markdown syntax +description](https://daringfireball.net/projects/markdown/syntax#em) says: + +> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML +> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` +> tag. + +This is enough for most users, but these rules leave much undecided, +especially when it comes to nested emphasis. The original +`Markdown.pl` test suite makes it clear that triple `***` and +`___` delimiters can be used for strong emphasis, and most +implementations have also allowed the following patterns: + +``` markdown +***strong emph*** +***strong** in emph* +***emph* in strong** +**in strong *emph*** +*in emph **strong*** +``` + +The following patterns are less widely supported, but the intent +is clear and they are useful (especially in contexts like bibliography +entries): + +``` markdown +*emph *with emph* in it* +**strong **with strong** in it** +``` + +Many implementations have also restricted intraword emphasis to +the `*` forms, to avoid unwanted emphasis in words containing +internal underscores. (It is best practice to put these in code +spans, but users often do not.) + +``` markdown +internal emphasis: foo*bar*baz +no emphasis: foo_bar_baz +``` + +The rules given below capture all of these patterns, while allowing +for efficient parsing strategies that do not backtrack. + +First, some definitions. A [delimiter run](@) is either +a sequence of one or more `*` characters that is not preceded or +followed by a non-backslash-escaped `*` character, or a sequence +of one or more `_` characters that is not preceded or followed by +a non-backslash-escaped `_` character. + +A [left-flanking delimiter run](@) is +a [delimiter run] that is (1) not followed by [Unicode whitespace], +and either (2a) not followed by a [Unicode punctuation character], or +(2b) followed by a [Unicode punctuation character] and +preceded by [Unicode whitespace] or a [Unicode punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +A [right-flanking delimiter run](@) is +a [delimiter run] that is (1) not preceded by [Unicode whitespace], +and either (2a) not preceded by a [Unicode punctuation character], or +(2b) preceded by a [Unicode punctuation character] and +followed by [Unicode whitespace] or a [Unicode punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +Here are some examples of delimiter runs. + + - left-flanking but not right-flanking: + + ``` + ***abc + _abc + **"abc" + _"abc" + ``` + + - right-flanking but not left-flanking: + + ``` + abc*** + abc_ + "abc"** + "abc"_ + ``` + + - Both left and right-flanking: + + ``` + abc***def + "abc"_"def" + ``` + + - Neither left nor right-flanking: + + ``` + abc *** def + a _ b + ``` + +(The idea of distinguishing left-flanking and right-flanking +delimiter runs based on the character before and the character +after comes from Roopesh Chander's +[vfmd](https://web.archive.org/web/20220608143320/http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +vfmd uses the terminology "emphasis indicator string" instead of "delimiter +run," and its rules for distinguishing left- and right-flanking runs +are a bit more complex than the ones given here.) + +The following rules define emphasis and strong emphasis: + +1. A single `*` character [can open emphasis](@) + iff (if and only if) it is part of a [left-flanking delimiter run]. + +2. A single `_` character [can open emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by a [Unicode punctuation character]. + +3. A single `*` character [can close emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +4. A single `_` character [can close emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by a [Unicode punctuation character]. + +5. A double `**` [can open strong emphasis](@) + iff it is part of a [left-flanking delimiter run]. + +6. A double `__` [can open strong emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by a [Unicode punctuation character]. + +7. A double `**` [can close strong emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +8. A double `__` [can close strong emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by a [Unicode punctuation character]. + +9. Emphasis begins with a delimiter that [can open emphasis] and ends + with a delimiter that [can close emphasis], and that uses the same + character (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both + open and close emphasis, then the sum of the lengths of the + delimiter runs containing the opening and closing delimiters + must not be a multiple of 3 unless both lengths are + multiples of 3. + +10. Strong emphasis begins with a delimiter that + [can open strong emphasis] and ends with a delimiter that + [can close strong emphasis], and that uses the same character + (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both open + and close strong emphasis, then the sum of the lengths of + the delimiter runs containing the opening and closing + delimiters must not be a multiple of 3 unless both lengths + are multiples of 3. + +11. A literal `*` character cannot occur at the beginning or end of + `*`-delimited emphasis or `**`-delimited strong emphasis, unless it + is backslash-escaped. + +12. A literal `_` character cannot occur at the beginning or end of + `_`-delimited emphasis or `__`-delimited strong emphasis, unless it + is backslash-escaped. + +Where rules 1--12 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +13. The number of nestings should be minimized. Thus, for example, + an interpretation `...` is always preferred to + `...`. + +14. An interpretation `...` is always + preferred to `...`. + +15. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first takes precedence. Thus, for example, + `*foo _bar* baz_` is parsed as `foo _bar baz_` rather + than `*foo bar* baz`. + +16. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) takes precedence. Thus, for example, + `**foo **bar baz**` is parsed as `**foo bar baz` + rather than `foo **bar baz`. + +17. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. + +These rules can be illustrated through a series of examples. + +Rule 1: + +```````````````````````````````` example +*foo bar* +. +

    foo bar

    +```````````````````````````````` + + +This is not emphasis, because the opening `*` is followed by +whitespace, and hence not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a * foo bar* +. +

    a * foo bar*

    +```````````````````````````````` + + +This is not emphasis, because the opening `*` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a*"foo"* +. +

    a*"foo"*

    +```````````````````````````````` + + +Unicode nonbreaking spaces count as whitespace, too: + +```````````````````````````````` example +* a * +. +

    * a *

    +```````````````````````````````` + + +Unicode symbols count as punctuation, too: + +```````````````````````````````` example +*$*alpha. + +*£*bravo. + +*€*charlie. + +*𞋿*delta. +. +

    *$*alpha.

    +

    *£*bravo.

    +

    *€*charlie.

    +

    *𞋿*delta.

    +```````````````````````````````` + + +Intraword emphasis with `*` is permitted: + +```````````````````````````````` example +foo*bar* +. +

    foobar

    +```````````````````````````````` + + +```````````````````````````````` example +5*6*78 +. +

    5678

    +```````````````````````````````` + + +Rule 2: + +```````````````````````````````` example +_foo bar_ +. +

    foo bar

    +```````````````````````````````` + + +This is not emphasis, because the opening `_` is followed by +whitespace: + +```````````````````````````````` example +_ foo bar_ +. +

    _ foo bar_

    +```````````````````````````````` + + +This is not emphasis, because the opening `_` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a_"foo"_ +. +

    a_"foo"_

    +```````````````````````````````` + + +Emphasis with `_` is not allowed inside words: + +```````````````````````````````` example +foo_bar_ +. +

    foo_bar_

    +```````````````````````````````` + + +```````````````````````````````` example +5_6_78 +. +

    5_6_78

    +```````````````````````````````` + + +```````````````````````````````` example +пристаням_стремятся_ +. +

    пристаням_стремятся_

    +```````````````````````````````` + + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +```````````````````````````````` example +aa_"bb"_cc +. +

    aa_"bb"_cc

    +```````````````````````````````` + + +This is emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-_(bar)_ +. +

    foo-(bar)

    +```````````````````````````````` + + +Rule 3: + +This is not emphasis, because the closing delimiter does +not match the opening delimiter: + +```````````````````````````````` example +_foo* +. +

    _foo*

    +```````````````````````````````` + + +This is not emphasis, because the closing `*` is preceded by +whitespace: + +```````````````````````````````` example +*foo bar * +. +

    *foo bar *

    +```````````````````````````````` + + +A line ending also counts as whitespace: + +```````````````````````````````` example +*foo bar +* +. +

    *foo bar +*

    +```````````````````````````````` + + +This is not emphasis, because the second `*` is +preceded by punctuation and followed by an alphanumeric +(hence it is not part of a [right-flanking delimiter run]: + +```````````````````````````````` example +*(*foo) +. +

    *(*foo)

    +```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +*(*foo*)* +. +

    (foo)

    +```````````````````````````````` + + +Intraword emphasis with `*` is allowed: + +```````````````````````````````` example +*foo*bar +. +

    foobar

    +```````````````````````````````` + + + +Rule 4: + +This is not emphasis, because the closing `_` is preceded by +whitespace: + +```````````````````````````````` example +_foo bar _ +. +

    _foo bar _

    +```````````````````````````````` + + +This is not emphasis, because the second `_` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +_(_foo) +. +

    _(_foo)

    +```````````````````````````````` + + +This is emphasis within emphasis: + +```````````````````````````````` example +_(_foo_)_ +. +

    (foo)

    +```````````````````````````````` + + +Intraword emphasis is disallowed for `_`: + +```````````````````````````````` example +_foo_bar +. +

    _foo_bar

    +```````````````````````````````` + + +```````````````````````````````` example +_пристаням_стремятся +. +

    _пристаням_стремятся

    +```````````````````````````````` + + +```````````````````````````````` example +_foo_bar_baz_ +. +

    foo_bar_baz

    +```````````````````````````````` + + +This is emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +_(bar)_. +. +

    (bar).

    +```````````````````````````````` + + +Rule 5: + +```````````````````````````````` example +**foo bar** +. +

    foo bar

    +```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +** foo bar** +. +

    ** foo bar**

    +```````````````````````````````` + + +This is not strong emphasis, because the opening `**` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a**"foo"** +. +

    a**"foo"**

    +```````````````````````````````` + + +Intraword strong emphasis with `**` is permitted: + +```````````````````````````````` example +foo**bar** +. +

    foobar

    +```````````````````````````````` + + +Rule 6: + +```````````````````````````````` example +__foo bar__ +. +

    foo bar

    +```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +__ foo bar__ +. +

    __ foo bar__

    +```````````````````````````````` + + +A line ending counts as whitespace: +```````````````````````````````` example +__ +foo bar__ +. +

    __ +foo bar__

    +```````````````````````````````` + + +This is not strong emphasis, because the opening `__` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a__"foo"__ +. +

    a__"foo"__

    +```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +foo__bar__ +. +

    foo__bar__

    +```````````````````````````````` + + +```````````````````````````````` example +5__6__78 +. +

    5__6__78

    +```````````````````````````````` + + +```````````````````````````````` example +пристаням__стремятся__ +. +

    пристаням__стремятся__

    +```````````````````````````````` + + +```````````````````````````````` example +__foo, __bar__, baz__ +. +

    foo, bar, baz

    +```````````````````````````````` + + +This is strong emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-__(bar)__ +. +

    foo-(bar)

    +```````````````````````````````` + + + +Rule 7: + +This is not strong emphasis, because the closing delimiter is preceded +by whitespace: + +```````````````````````````````` example +**foo bar ** +. +

    **foo bar **

    +```````````````````````````````` + + +(Nor can it be interpreted as an emphasized `*foo bar *`, because of +Rule 11.) + +This is not strong emphasis, because the second `**` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +**(**foo) +. +

    **(**foo)

    +```````````````````````````````` + + +The point of this restriction is more easily appreciated +with these examples: + +```````````````````````````````` example +*(**foo**)* +. +

    (foo)

    +```````````````````````````````` + + +```````````````````````````````` example +**Gomphocarpus (*Gomphocarpus physocarpus*, syn. +*Asclepias physocarpa*)** +. +

    Gomphocarpus (Gomphocarpus physocarpus, syn. +Asclepias physocarpa)

    +```````````````````````````````` + + +```````````````````````````````` example +**foo "*bar*" foo** +. +

    foo "bar" foo

    +```````````````````````````````` + + +Intraword emphasis: + +```````````````````````````````` example +**foo**bar +. +

    foobar

    +```````````````````````````````` + + +Rule 8: + +This is not strong emphasis, because the closing delimiter is +preceded by whitespace: + +```````````````````````````````` example +__foo bar __ +. +

    __foo bar __

    +```````````````````````````````` + + +This is not strong emphasis, because the second `__` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +__(__foo) +. +

    __(__foo)

    +```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +_(__foo__)_ +. +

    (foo)

    +```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +__foo__bar +. +

    __foo__bar

    +```````````````````````````````` + + +```````````````````````````````` example +__пристаням__стремятся +. +

    __пристаням__стремятся

    +```````````````````````````````` + + +```````````````````````````````` example +__foo__bar__baz__ +. +

    foo__bar__baz

    +```````````````````````````````` + + +This is strong emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +__(bar)__. +. +

    (bar).

    +```````````````````````````````` + + +Rule 9: + +Any nonempty sequence of inline elements can be the contents of an +emphasized span. + +```````````````````````````````` example +*foo [bar](/url)* +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo +bar* +. +

    foo +bar

    +```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside emphasis: + +```````````````````````````````` example +_foo __bar__ baz_ +. +

    foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +_foo _bar_ baz_ +. +

    foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +__foo_ bar_ +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo *bar** +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo **bar** baz* +. +

    foo bar baz

    +```````````````````````````````` + +```````````````````````````````` example +*foo**bar**baz* +. +

    foobarbaz

    +```````````````````````````````` + +Note that in the preceding case, the interpretation + +``` markdown +

    foobarbaz

    +``` + + +is precluded by the condition that a delimiter that +can both open and close (like the `*` after `foo`) +cannot form emphasis if the sum of the lengths of +the delimiter runs containing the opening and +closing delimiters is a multiple of 3 unless +both lengths are multiples of 3. + + +For the same reason, we don't get two consecutive +emphasis sections in this example: + +```````````````````````````````` example +*foo**bar* +. +

    foo**bar

    +```````````````````````````````` + + +The same condition ensures that the following +cases are all strong emphasis nested inside +emphasis, even when the interior whitespace is +omitted: + + +```````````````````````````````` example +***foo** bar* +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo **bar*** +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo**bar*** +. +

    foobar

    +```````````````````````````````` + + +When the lengths of the interior closing and opening +delimiter runs are *both* multiples of 3, though, +they can match to create emphasis: + +```````````````````````````````` example +foo***bar***baz +. +

    foobarbaz

    +```````````````````````````````` + +```````````````````````````````` example +foo******bar*********baz +. +

    foobar***baz

    +```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +*foo **bar *baz* bim** bop* +. +

    foo bar baz bim bop

    +```````````````````````````````` + + +```````````````````````````````` example +*foo [*bar*](/url)* +. +

    foo bar

    +```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +** is not an empty emphasis +. +

    ** is not an empty emphasis

    +```````````````````````````````` + + +```````````````````````````````` example +**** is not an empty strong emphasis +. +

    **** is not an empty strong emphasis

    +```````````````````````````````` + + + +Rule 10: + +Any nonempty sequence of inline elements can be the contents of an +strongly emphasized span. + +```````````````````````````````` example +**foo [bar](/url)** +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +**foo +bar** +. +

    foo +bar

    +```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside strong emphasis: + +```````````````````````````````` example +__foo _bar_ baz__ +. +

    foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +__foo __bar__ baz__ +. +

    foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +____foo__ bar__ +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +**foo **bar**** +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +**foo *bar* baz** +. +

    foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +**foo*bar*baz** +. +

    foobarbaz

    +```````````````````````````````` + + +```````````````````````````````` example +***foo* bar** +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +**foo *bar*** +. +

    foo bar

    +```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +**foo *bar **baz** +bim* bop** +. +

    foo bar baz +bim bop

    +```````````````````````````````` + + +```````````````````````````````` example +**foo [*bar*](/url)** +. +

    foo bar

    +```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +__ is not an empty emphasis +. +

    __ is not an empty emphasis

    +```````````````````````````````` + + +```````````````````````````````` example +____ is not an empty strong emphasis +. +

    ____ is not an empty strong emphasis

    +```````````````````````````````` + + + +Rule 11: + +```````````````````````````````` example +foo *** +. +

    foo ***

    +```````````````````````````````` + + +```````````````````````````````` example +foo *\** +. +

    foo *

    +```````````````````````````````` + + +```````````````````````````````` example +foo *_* +. +

    foo _

    +```````````````````````````````` + + +```````````````````````````````` example +foo ***** +. +

    foo *****

    +```````````````````````````````` + + +```````````````````````````````` example +foo **\*** +. +

    foo *

    +```````````````````````````````` + + +```````````````````````````````` example +foo **_** +. +

    foo _

    +```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 11 determines +that the excess literal `*` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +**foo* +. +

    *foo

    +```````````````````````````````` + + +```````````````````````````````` example +*foo** +. +

    foo*

    +```````````````````````````````` + + +```````````````````````````````` example +***foo** +. +

    *foo

    +```````````````````````````````` + + +```````````````````````````````` example +****foo* +. +

    ***foo

    +```````````````````````````````` + + +```````````````````````````````` example +**foo*** +. +

    foo*

    +```````````````````````````````` + + +```````````````````````````````` example +*foo**** +. +

    foo***

    +```````````````````````````````` + + + +Rule 12: + +```````````````````````````````` example +foo ___ +. +

    foo ___

    +```````````````````````````````` + + +```````````````````````````````` example +foo _\__ +. +

    foo _

    +```````````````````````````````` + + +```````````````````````````````` example +foo _*_ +. +

    foo *

    +```````````````````````````````` + + +```````````````````````````````` example +foo _____ +. +

    foo _____

    +```````````````````````````````` + + +```````````````````````````````` example +foo __\___ +. +

    foo _

    +```````````````````````````````` + + +```````````````````````````````` example +foo __*__ +. +

    foo *

    +```````````````````````````````` + + +```````````````````````````````` example +__foo_ +. +

    _foo

    +```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 12 determines +that the excess literal `_` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +_foo__ +. +

    foo_

    +```````````````````````````````` + + +```````````````````````````````` example +___foo__ +. +

    _foo

    +```````````````````````````````` + + +```````````````````````````````` example +____foo_ +. +

    ___foo

    +```````````````````````````````` + + +```````````````````````````````` example +__foo___ +. +

    foo_

    +```````````````````````````````` + + +```````````````````````````````` example +_foo____ +. +

    foo___

    +```````````````````````````````` + + +Rule 13 implies that if you want emphasis nested directly inside +emphasis, you must use different delimiters: + +```````````````````````````````` example +**foo** +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +*_foo_* +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +__foo__ +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +_*foo*_ +. +

    foo

    +```````````````````````````````` + + +However, strong emphasis within strong emphasis is possible without +switching delimiters: + +```````````````````````````````` example +****foo**** +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +____foo____ +. +

    foo

    +```````````````````````````````` + + + +Rule 13 can be applied to arbitrarily long sequences of +delimiters: + +```````````````````````````````` example +******foo****** +. +

    foo

    +```````````````````````````````` + + +Rule 14: + +```````````````````````````````` example +***foo*** +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +_____foo_____ +. +

    foo

    +```````````````````````````````` + + +Rule 15: + +```````````````````````````````` example +*foo _bar* baz_ +. +

    foo _bar baz_

    +```````````````````````````````` + + +```````````````````````````````` example +*foo __bar *baz bim__ bam* +. +

    foo bar *baz bim bam

    +```````````````````````````````` + + +Rule 16: + +```````````````````````````````` example +**foo **bar baz** +. +

    **foo bar baz

    +```````````````````````````````` + + +```````````````````````````````` example +*foo *bar baz* +. +

    *foo bar baz

    +```````````````````````````````` + + +Rule 17: + +```````````````````````````````` example +*[bar*](/url) +. +

    *bar*

    +```````````````````````````````` + + +```````````````````````````````` example +_foo [bar_](/url) +. +

    _foo bar_

    +```````````````````````````````` + + +```````````````````````````````` example +* +. +

    *

    +```````````````````````````````` + + +```````````````````````````````` example +** +. +

    **

    +```````````````````````````````` + + +```````````````````````````````` example +__ +. +

    __

    +```````````````````````````````` + + +```````````````````````````````` example +*a `*`* +. +

    a *

    +```````````````````````````````` + + +```````````````````````````````` example +_a `_`_ +. +

    a _

    +```````````````````````````````` + + +```````````````````````````````` example +**a +. +

    **ahttps://foo.bar/?q=**

    +```````````````````````````````` + + +```````````````````````````````` example +__a +. +

    __ahttps://foo.bar/?q=__

    +```````````````````````````````` + + + diff --git a/documents/markdown/commonmark/entity-and-numeric-character-references.md b/documents/markdown/commonmark/entity-and-numeric-character-references.md new file mode 100644 index 0000000..ce3eece --- /dev/null +++ b/documents/markdown/commonmark/entity-and-numeric-character-references.md @@ -0,0 +1,212 @@ +## Entity and numeric character references + + +Valid HTML entity references and numeric character references +can be used in place of the corresponding Unicode character, +with the following exceptions: + +- Entity and character references are not recognized in code + blocks and code spans. + +- Entity and character references cannot stand in place of + special characters that define structural elements in + CommonMark. For example, although `*` can be used + in place of a literal `*` character, `*` cannot replace + `*` in emphasis delimiters, bullet list markers, or thematic + breaks. + +Conforming CommonMark parsers need not store information about +whether a particular character was represented in the source +using a Unicode character or an entity reference. + +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. + +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ +. +

      & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

    +```````````````````````````````` + + +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--7 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. + +```````````````````````````````` example +# Ӓ Ϡ � +. +

    # Ӓ Ϡ �

    +```````````````````````````````` + + +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-6 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). + +```````````````````````````````` example +" ആ ಫ +. +

    " ആ ಫ

    +```````````````````````````````` + + +Here are some nonentities: + +```````````````````````````````` example +  &x; &#; &#x; +� +&#abcdef0; +&ThisIsNotDefined; &hi?; +. +

    &nbsp &x; &#; &#x; +&#87654321; +&#abcdef0; +&ThisIsNotDefined; &hi?;

    +```````````````````````````````` + + +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: + +```````````````````````````````` example +© +. +

    &copy

    +```````````````````````````````` + + +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: + +```````````````````````````````` example +&MadeUpEntity; +. +

    &MadeUpEntity;

    +```````````````````````````````` + + +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: + +```````````````````````````````` example + +. + +```````````````````````````````` + + +```````````````````````````````` example +[foo](/föö "föö") +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /föö "föö" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +``` föö +foo +``` +. +
    foo
    +
    +```````````````````````````````` + + +Entity and numeric character references are treated as literal +text in code spans and code blocks: + +```````````````````````````````` example +`föö` +. +

    f&ouml;&ouml;

    +```````````````````````````````` + + +```````````````````````````````` example + föfö +. +
    f&ouml;f&ouml;
    +
    +```````````````````````````````` + + +Entity and numeric character references cannot be used +in place of symbols indicating structure in CommonMark +documents. + +```````````````````````````````` example +*foo* +*foo* +. +

    *foo* +foo

    +```````````````````````````````` + +```````````````````````````````` example +* foo + +* foo +. +

    * foo

    +
      +
    • foo
    • +
    +```````````````````````````````` + +```````````````````````````````` example +foo bar +. +

    foo + +bar

    +```````````````````````````````` + +```````````````````````````````` example + foo +. +

    →foo

    +```````````````````````````````` + + +```````````````````````````````` example +[a](url "tit") +. +

    [a](url "tit")

    +```````````````````````````````` + + + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code spans, and so on. + diff --git a/documents/markdown/commonmark/fenced-code-blocks.md b/documents/markdown/commonmark/fenced-code-blocks.md new file mode 100644 index 0000000..9746fa4 --- /dev/null +++ b/documents/markdown/commonmark/fenced-code-blocks.md @@ -0,0 +1,427 @@ +## Fenced code blocks + + +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, preceded by up to three spaces of indentation. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +spaces or tabs and called the [info string](@). If the [info string] comes +after a backtick fence, it must not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +preceded by N spaces of indentation, then up to N spaces of indentation are +removed from each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented N spaces or less, all +of the indentation is removed.) + +The closing code fence may be preceded by up to three spaces of indentation, and +may be followed only by spaces or tabs, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real downside to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: + +```````````````````````````````` example +``` +< + > +``` +. +
    <
    + >
    +
    +```````````````````````````````` + + +With tildes: + +```````````````````````````````` example +~~~ +< + > +~~~ +. +
    <
    + >
    +
    +```````````````````````````````` + +Fewer than three backticks is not enough: + +```````````````````````````````` example +`` +foo +`` +. +

    foo

    +```````````````````````````````` + +The closing code fence must use the same character as the opening +fence: + +```````````````````````````````` example +``` +aaa +~~~ +``` +. +
    aaa
    +~~~
    +
    +```````````````````````````````` + + +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
    aaa
    +```
    +
    +```````````````````````````````` + + +The closing code fence must be at least as long as the opening fence: + +```````````````````````````````` example +```` +aaa +``` +`````` +. +
    aaa
    +```
    +
    +```````````````````````````````` + + +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
    aaa
    +~~~
    +
    +```````````````````````````````` + + +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): + +```````````````````````````````` example +``` +. +
    +```````````````````````````````` + + +```````````````````````````````` example +````` + +``` +aaa +. +
    
    +```
    +aaa
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> ``` +> aaa + +bbb +. +
    +
    aaa
    +
    +
    +

    bbb

    +```````````````````````````````` + + +A code block can have all empty lines as its content: + +```````````````````````````````` example +``` + + +``` +. +
    
    +  
    +
    +```````````````````````````````` + + +A code block can be empty: + +```````````````````````````````` example +``` +``` +. +
    +```````````````````````````````` + + +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: + +```````````````````````````````` example + ``` + aaa +aaa +``` +. +
    aaa
    +aaa
    +
    +```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + aaa +aaa + ``` +. +
    aaa
    +aaa
    +aaa
    +
    +```````````````````````````````` + + +```````````````````````````````` example + ``` + aaa + aaa + aaa + ``` +. +
    aaa
    + aaa
    +aaa
    +
    +```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + ``` + aaa + ``` +. +
    ```
    +aaa
    +```
    +
    +```````````````````````````````` + + +Closing fences may be preceded by up to three spaces of indentation, and their +indentation need not match that of the opening fence: + +```````````````````````````````` example +``` +aaa + ``` +. +
    aaa
    +
    +```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + ``` +. +
    aaa
    +
    +```````````````````````````````` + + +This is not a closing fence, because it is indented 4 spaces: + +```````````````````````````````` example +``` +aaa + ``` +. +
    aaa
    +    ```
    +
    +```````````````````````````````` + + + +Code fences (opening and closing) cannot contain internal spaces or tabs: + +```````````````````````````````` example +``` ``` +aaa +. +

    +aaa

    +```````````````````````````````` + + +```````````````````````````````` example +~~~~~~ +aaa +~~~ ~~ +. +
    aaa
    +~~~ ~~
    +
    +```````````````````````````````` + + +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: + +```````````````````````````````` example +foo +``` +bar +``` +baz +. +

    foo

    +
    bar
    +
    +

    baz

    +```````````````````````````````` + + +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: + +```````````````````````````````` example +foo +--- +~~~ +bar +~~~ +# baz +. +

    foo

    +
    bar
    +
    +

    baz

    +```````````````````````````````` + + +An [info string] can be provided after the opening code fence. +Although this spec doesn't mandate any particular treatment of +the info string, the first word is typically used to specify +the language of the code block. In HTML output, the language is +normally indicated by adding a class to the `code` element consisting +of `language-` followed by the language name. + +```````````````````````````````` example +```ruby +def foo(x) + return 3 +end +``` +. +
    def foo(x)
    +  return 3
    +end
    +
    +```````````````````````````````` + + +```````````````````````````````` example +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ +. +
    def foo(x)
    +  return 3
    +end
    +
    +```````````````````````````````` + + +```````````````````````````````` example +````; +```` +. +
    +```````````````````````````````` + + +[Info strings] for backtick code blocks cannot contain backticks: + +```````````````````````````````` example +``` aa ``` +foo +. +

    aa +foo

    +```````````````````````````````` + + +[Info strings] for tilde code blocks can contain backticks and tildes: + +```````````````````````````````` example +~~~ aa ``` ~~~ +foo +~~~ +. +
    foo
    +
    +```````````````````````````````` + + +Closing code fences cannot have [info strings]: + +```````````````````````````````` example +``` +``` aaa +``` +. +
    ``` aaa
    +
    +```````````````````````````````` + + + diff --git a/documents/markdown/commonmark/foo.md b/documents/markdown/commonmark/foo.md new file mode 100644 index 0000000..e1b1a80 --- /dev/null +++ b/documents/markdown/commonmark/foo.md @@ -0,0 +1,34 @@ +## foo + +**** +. +
    +

    foo

    +
    +```````````````````````````````` + + +```````````````````````````````` example +Foo bar +# baz +Bar foo +. +

    Foo bar

    +

    baz

    +

    Bar foo

    +```````````````````````````````` + + +ATX headings can be empty: + +```````````````````````````````` example +## +# +### ### +. +

    +

    +

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/hard-line-breaks.md b/documents/markdown/commonmark/hard-line-breaks.md new file mode 100644 index 0000000..e88fdba --- /dev/null +++ b/documents/markdown/commonmark/hard-line-breaks.md @@ -0,0 +1,151 @@ +## Hard line breaks + + +A line ending (not in a code span or HTML tag) that is preceded +by two or more spaces and does not occur at the end of a block +is parsed as a [hard line break](@) (rendered +in HTML as a `
    ` tag): + +```````````````````````````````` example +foo +baz +. +

    foo
    +baz

    +```````````````````````````````` + + +For a more visible alternative, a backslash before the +[line ending] may be used instead of two or more spaces: + +```````````````````````````````` example +foo\ +baz +. +

    foo
    +baz

    +```````````````````````````````` + + +More than two spaces can be used: + +```````````````````````````````` example +foo +baz +. +

    foo
    +baz

    +```````````````````````````````` + + +Leading spaces at the beginning of the next line are ignored: + +```````````````````````````````` example +foo + bar +. +

    foo
    +bar

    +```````````````````````````````` + + +```````````````````````````````` example +foo\ + bar +. +

    foo
    +bar

    +```````````````````````````````` + + +Hard line breaks can occur inside emphasis, links, and other constructs +that allow inline content: + +```````````````````````````````` example +*foo +bar* +. +

    foo
    +bar

    +```````````````````````````````` + + +```````````````````````````````` example +*foo\ +bar* +. +

    foo
    +bar

    +```````````````````````````````` + + +Hard line breaks do not occur inside code spans + +```````````````````````````````` example +`code +span` +. +

    code span

    +```````````````````````````````` + + +```````````````````````````````` example +`code\ +span` +. +

    code\ span

    +```````````````````````````````` + + +or HTML tags: + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +Hard line breaks are for separating inline content within a block. +Neither syntax for hard line breaks works at the end of a paragraph or +other block element: + +```````````````````````````````` example +foo\ +. +

    foo\

    +```````````````````````````````` + + +```````````````````````````````` example +foo +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +### foo\ +. +

    foo\

    +```````````````````````````````` + + +```````````````````````````````` example +### foo +. +

    foo

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/html-blocks.md b/documents/markdown/commonmark/html-blocks.md new file mode 100644 index 0000000..8e010c9 --- /dev/null +++ b/documents/markdown/commonmark/html-blocks.md @@ -0,0 +1,822 @@ +## HTML blocks + + +An [HTML block](@) is a group of lines that is treated +as raw HTML (and will not be escaped in HTML output). + +There are seven kinds of [HTML block], which can be defined by their +start and end conditions. The block begins with a line that meets a +[start condition](@) (after up to three optional spaces of indentation). +It ends with the first subsequent line that meets a matching +[end condition](@), or the last line of the document, or the last line of +the [container block](#container-blocks) containing the current HTML +block, if no line is encountered that meets the [end condition]. If +the first line meets both the [start condition] and the [end +condition], the block will contain just that line. + +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +``, ``, ``, or `` (case-insensitive; it +need not match the start tag). + +2. **Start condition:** line begins with the string ``. + +3. **Start condition:** line begins with the string ``. + +4. **Start condition:** line begins with the string ``. + +5. **Start condition:** line begins with the string +``. + +6. **Start condition:** line begins with the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. + +7. **Start condition:** line begins with a complete [open tag] +(with any [tag name] other than `pre`, `script`, +`style`, or `textarea`) or a complete [closing tag], +all on a single line, followed by zero or more spaces and tabs, +followed by the end of the line.\ +**End condition:** line is followed by a [blank line]. + +HTML blocks continue until they are closed by their appropriate +[end condition], or the last line of the document or other [container +block](#container-blocks). This means any HTML **within an HTML +block** that might otherwise be recognised as a start condition will +be ignored by the parser and passed through as-is, without changing +the parser's state. + +For instance, `
    ` within an HTML block started by `` will not affect
    +the parser state; as the HTML block was started in by start condition 6, it
    +will end at any blank line. This can be surprising:
    +
    +```````````````````````````````` example
    +
    +
    +**Hello**,
    +
    +_world_.
    +
    +
    +. +
    +
    +**Hello**,
    +

    world. +

    +
    +```````````````````````````````` + +In this case, the HTML block is terminated by the blank line — the `**Hello**` +text remains verbatim — and regular parsing resumes, with a paragraph, +emphasised `world` and inline and block HTML following. + +All types of [HTML blocks] except type 7 may interrupt +a paragraph. Blocks of type 7 cannot interrupt a paragraph. +(This restriction is intended to prevent unwanted interpretation +of long tags inside a wrapped paragraph as starting HTML blocks.) + +Some simple examples follow. Here are some basic HTML blocks +of type 6: + +```````````````````````````````` example + + + + +
    + hi +
    + +okay. +. + + + + +
    + hi +
    +

    okay.

    +```````````````````````````````` + + +```````````````````````````````` example +
    +*foo* +```````````````````````````````` + + +Here we have two HTML blocks with a Markdown paragraph between them: + +```````````````````````````````` example +
    + +*Markdown* + +
    +. +
    +

    Markdown

    +
    +```````````````````````````````` + + +The tag on the first line can be partial, as long +as it is split where there would be whitespace: + +```````````````````````````````` example +
    +
    +. +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +
    +
    +. +
    +
    +```````````````````````````````` + + +An open tag need not be closed: +```````````````````````````````` example +
    +*foo* + +*bar* +. +
    +*foo* +

    bar

    +```````````````````````````````` + + + +A partial tag need not even be completed (garbage +in, garbage out): + +```````````````````````````````` example +
    +. + +```````````````````````````````` + + +```````````````````````````````` example +
    +foo +
    +. +
    +foo +
    +```````````````````````````````` + + +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +examples, what looks like a Markdown code block or block quote +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: + +```````````````````````````````` example +
    +``` c +int x = 33; +``` +. +
    +``` c +int x = 33; +``` +```````````````````````````````` + + +```````````````````````````````` example +
    not quoted text +. +
    not quoted text +```````````````````````````````` + + +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +In type 7 blocks, the [tag name] can be anything: + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` + + +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: + +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` + + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: + +```````````````````````````````` example + + +*foo* + + +. + +

    foo

    +
    +```````````````````````````````` + + +Finally, in these cases, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + +```````````````````````````````` example +*foo* +. +

    foo

    +```````````````````````````````` + +```````````````````````````````` example + +*foo* + +. +

    +foo +

    +```````````````````````````````` + + +HTML tags designed to contain literal content +(`pre`, `script`, `style`, `textarea`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: + +A pre tag (type 1): + +```````````````````````````````` example +
    
    +import Text.HTML.TagSoup
    +
    +main :: IO ()
    +main = print $ parseTags tags
    +
    +okay +. +
    
    +import Text.HTML.TagSoup
    +
    +main :: IO ()
    +main = print $ parseTags tags
    +
    +

    okay

    +```````````````````````````````` + + +A script tag (type 1): + +```````````````````````````````` example + +okay +. + +

    okay

    +```````````````````````````````` + + +A textarea tag (type 1): + +```````````````````````````````` example + +. + +```````````````````````````````` + +A style tag (type 1): + +```````````````````````````````` example + +okay +. + +

    okay

    +```````````````````````````````` + + +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + +```````````````````````````````` example + +*foo* +. + +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +*bar* +*baz* +. +*bar* +

    baz

    +```````````````````````````````` + + +Note that anything on the last line after the +end tag will be included in the [HTML block]: + +```````````````````````````````` example +1. *bar* +. +1. *bar* +```````````````````````````````` + + +A comment (type 2): + +```````````````````````````````` example + +okay +. + +

    okay

    +```````````````````````````````` + + + +A processing instruction (type 3): + +```````````````````````````````` example +'; + +?> +okay +. +'; + +?> +

    okay

    +```````````````````````````````` + + +A declaration (type 4): + +```````````````````````````````` example + +. + +```````````````````````````````` + + +CDATA (type 5): + +```````````````````````````````` example + +okay +. + +

    okay

    +```````````````````````````````` + + +The opening tag can be preceded by up to three spaces of indentation, but not +four: + +```````````````````````````````` example + + + +. + +
    <!-- foo -->
    +
    +```````````````````````````````` + + +```````````````````````````````` example +
    + +
    +. +
    +
    <div>
    +
    +```````````````````````````````` + + +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. + +```````````````````````````````` example +Foo +
    +bar +
    +. +

    Foo

    +
    +bar +
    +```````````````````````````````` + + +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, [above][HTML +block]: + +```````````````````````````````` example +
    +bar +
    +*foo* +. +
    +bar +
    +*foo* +```````````````````````````````` + + +HTML blocks of type 7 cannot interrupt a paragraph: + +```````````````````````````````` example +Foo + +baz +. +

    Foo + +baz

    +```````````````````````````````` + + +This rule differs from John Gruber's original Markdown syntax +specification, which says: + +> The only restrictions are that block-level HTML elements — +> e.g. `
    `, ``, `
    `, `

    `, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with spaces or tabs. + +In some ways Gruber's rule is more restrictive than the one given +here: + +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. + +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. + +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: + +Compare: + +```````````````````````````````` example +

    + +*Emphasized* text. + +
    +. +
    +

    Emphasized text.

    +
    +```````````````````````````````` + + +```````````````````````````````` example +
    +*Emphasized* text. +
    +. +
    +*Emphasized* text. +
    +```````````````````````````````` + + +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. + +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: + +```````````````````````````````` example +
    + + + + + + + +
    +Hi +
    +. + + + + +
    +Hi +
    +```````````````````````````````` + + +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: + +```````````````````````````````` example + + + + + + + + +
    + Hi +
    +. + + +
    <td>
    +  Hi
    +</td>
    +
    + +
    +```````````````````````````````` + + +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
    ` tags, but as described
    +[above][HTML blocks], raw HTML blocks starting with `
    `
    +*can* contain blank lines.
    +
    diff --git a/documents/markdown/commonmark/images.md b/documents/markdown/commonmark/images.md
    new file mode 100644
    index 0000000..7ef241f
    --- /dev/null
    +++ b/documents/markdown/commonmark/images.md
    @@ -0,0 +1,228 @@
    +## Images
    +
    +
    +Syntax for images is like the syntax for links, with one
    +difference. Instead of [link text], we have an
    +[image description](@).  The rules for this are the
    +same as for [link text], except that (a) an
    +image description starts with `![` rather than `[`, and
    +(b) an image description may contain links.
    +An image description has inline elements
    +as its contents.  When an image is rendered to HTML,
    +this is standardly used as the image's `alt` attribute.
    +
    +```````````````````````````````` example
    +![foo](/url "title")
    +.
    +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +![foo ![bar](/url)](/url2) +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +![foo [bar](/url)](/url2) +. +

    foo bar

    +```````````````````````````````` + + +Though this spec is concerned with parsing, not rendering, it is +recommended that in rendering to HTML, only the plain string content +of the [image description] be used. Note that in +the above example, the alt attribute's value is `foo bar`, not `foo +[bar](/url)` or `foo
    bar`. Only the plain string +content is rendered, without formatting. + +```````````````````````````````` example +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +![foo](train.jpg) +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +My ![foo bar](/path/to/train.jpg "title" ) +. +

    My foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +![foo]() +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +![](/url) +. +

    +```````````````````````````````` + + +Reference-style: + +```````````````````````````````` example +![foo][bar] + +[bar]: /url +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +![foo][bar] + +[BAR]: /url +. +

    foo

    +```````````````````````````````` + + +Collapsed: + +```````````````````````````````` example +![foo][] + +[foo]: /url "title" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar][] + +[*foo* bar]: /url "title" +. +

    foo bar

    +```````````````````````````````` + + +The labels are case-insensitive: + +```````````````````````````````` example +![Foo][] + +[foo]: /url "title" +. +

    Foo

    +```````````````````````````````` + + +As with reference links, spaces, tabs, and line endings, are not allowed +between the two sets of brackets: + +```````````````````````````````` example +![foo] +[] + +[foo]: /url "title" +. +

    foo +[]

    +```````````````````````````````` + + +Shortcut: + +```````````````````````````````` example +![foo] + +[foo]: /url "title" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar] + +[*foo* bar]: /url "title" +. +

    foo bar

    +```````````````````````````````` + + +Note that link labels cannot contain unescaped brackets: + +```````````````````````````````` example +![[foo]] + +[[foo]]: /url "title" +. +

    ![[foo]]

    +

    [[foo]]: /url "title"

    +```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +![Foo] + +[foo]: /url "title" +. +

    Foo

    +```````````````````````````````` + + +If you just want a literal `!` followed by bracketed text, you can +backslash-escape the opening `[`: + +```````````````````````````````` example +!\[foo] + +[foo]: /url "title" +. +

    ![foo]

    +```````````````````````````````` + + +If you want a link after a literal `!`, backslash-escape the +`!`: + +```````````````````````````````` example +\![foo] + +[foo]: /url "title" +. +

    !foo

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/indented-code-blocks.md b/documents/markdown/commonmark/indented-code-blocks.md new file mode 100644 index 0000000..1ff76d3 --- /dev/null +++ b/documents/markdown/commonmark/indented-code-blocks.md @@ -0,0 +1,201 @@ +## Indented code blocks + + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each preceded by four or more spaces of indentation. The contents of the code +block are the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) + +```````````````````````````````` example + a simple + indented code block +. +
    a simple
    +  indented code block
    +
    +```````````````````````````````` + + +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + +```````````````````````````````` example + - foo + + bar +. +
      +
    • +

      foo

      +

      bar

      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +1. foo + + - bar +. +
      +
    1. +

      foo

      +
        +
      • bar
      • +
      +
    2. +
    +```````````````````````````````` + + + +The contents of a code block are literal text, and do not get parsed +as Markdown: + +```````````````````````````````` example + + *hi* + + - one +. +
    <a/>
    +*hi*
    +
    +- one
    +
    +```````````````````````````````` + + +Here we have three chunks separated by blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 + + + + chunk3 +. +
    chunk1
    +
    +chunk2
    +
    +
    +
    +chunk3
    +
    +```````````````````````````````` + + +Any initial spaces or tabs beyond four spaces of indentation will be included in +the content, even in interior blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 +. +
    chunk1
    +  
    +  chunk2
    +
    +```````````````````````````````` + + +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) + +```````````````````````````````` example +Foo + bar + +. +

    Foo +bar

    +```````````````````````````````` + + +However, any non-blank line with fewer than four spaces of indentation ends +the code block immediately. So a paragraph may occur immediately +after indented code: + +```````````````````````````````` example + foo +bar +. +
    foo
    +
    +

    bar

    +```````````````````````````````` + + +And indented code can occur immediately before and after other kinds of +blocks: + +```````````````````````````````` example +# Heading + foo +Heading +------ + foo +---- +. +

    Heading

    +
    foo
    +
    +

    Heading

    +
    foo
    +
    +
    +```````````````````````````````` + + +The first line can be preceded by more than four spaces of indentation: + +```````````````````````````````` example + foo + bar +. +
        foo
    +bar
    +
    +```````````````````````````````` + + +Blank lines preceding or following an indented code block +are not included in it: + +```````````````````````````````` example + + + foo + + +. +
    foo
    +
    +```````````````````````````````` + + +Trailing spaces or tabs are included in the code block's content: + +```````````````````````````````` example + foo +. +
    foo  
    +
    +```````````````````````````````` + + + diff --git a/documents/markdown/commonmark/insecure-characters.md b/documents/markdown/commonmark/insecure-characters.md new file mode 100644 index 0000000..63c742b --- /dev/null +++ b/documents/markdown/commonmark/insecure-characters.md @@ -0,0 +1,7 @@ +## Insecure characters + + +For security reasons, the Unicode character `U+0000` must be replaced +with the REPLACEMENT CHARACTER (`U+FFFD`). + + diff --git a/documents/markdown/commonmark/link-reference-definitions.md b/documents/markdown/commonmark/link-reference-definitions.md new file mode 100644 index 0000000..aeabb69 --- /dev/null +++ b/documents/markdown/commonmark/link-reference-definitions.md @@ -0,0 +1,356 @@ +## Link reference definitions + + +A [link reference definition](@) +consists of a [link label], optionally preceded by up to three spaces of +indentation, followed +by a colon (`:`), optional spaces or tabs (including up to one +[line ending]), a [link destination], +optional spaces or tabs (including up to one +[line ending]), and an optional [link +title], which if it is present must be separated +from the [link destination] by spaces or tabs. +No further character may occur. + +A [link reference definition] +does not correspond to a structural element of a document. Instead, it +defines a label which can be used in [reference links] +and reference-style [images] elsewhere in the document. [Link +reference definitions] can come either before or after the links that use +them. + +```````````````````````````````` example +[foo]: /url "title" + +[foo] +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example + [foo]: + /url + 'the title' + +[foo] +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] +. +

    Foo*bar]

    +```````````````````````````````` + + +```````````````````````````````` example +[Foo bar]: + +'title' + +[Foo bar] +. +

    Foo bar

    +```````````````````````````````` + + +The title may extend over multiple lines: + +```````````````````````````````` example +[foo]: /url ' +title +line1 +line2 +' + +[foo] +. +

    foo

    +```````````````````````````````` + + +However, it must not contain a [blank line]: + +```````````````````````````````` example +[foo]: /url 'title + +with blank line' + +[foo] +. +

    [foo]: /url 'title

    +

    with blank line'

    +

    [foo]

    +```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[foo]: +/url + +[foo] +. +

    foo

    +```````````````````````````````` + + +The link destination must not be omitted: + +```````````````````````````````` example +[foo]: + +[foo] +. +

    [foo]:

    +

    [foo]

    +```````````````````````````````` + + However, an empty link destination may be specified using + angle brackets: + +```````````````````````````````` example +[foo]: <> + +[foo] +. +

    foo

    +```````````````````````````````` + +The title must be separated from the link destination by +spaces or tabs: + +```````````````````````````````` example +[foo]: (baz) + +[foo] +. +

    [foo]: (baz)

    +

    [foo]

    +```````````````````````````````` + + +Both title and destination can contain backslash escapes +and literal backslashes: + +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" + +[foo] +. +

    foo

    +```````````````````````````````` + + +A link can come before its corresponding definition: + +```````````````````````````````` example +[foo] + +[foo]: url +. +

    foo

    +```````````````````````````````` + + +If there are several matching definitions, the first one takes +precedence: + +```````````````````````````````` example +[foo] + +[foo]: first +[foo]: second +. +

    foo

    +```````````````````````````````` + + +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

    Foo

    +```````````````````````````````` + + +```````````````````````````````` example +[ΑΓΩ]: /φου + +[αγω] +. +

    αγω

    +```````````````````````````````` + + +Whether something is a [link reference definition] is +independent of whether the link reference it defines is +used in the document. Thus, for example, the following +document contains just a link reference definition, and +no visible content: + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +Here is another one: + +```````````````````````````````` example +[ +foo +]: /url +bar +. +

    bar

    +```````````````````````````````` + + +This is not a link reference definition, because there are +characters other than spaces or tabs after the title: + +```````````````````````````````` example +[foo]: /url "title" ok +. +

    [foo]: /url "title" ok

    +```````````````````````````````` + + +This is a link reference definition, but it has no title: + +```````````````````````````````` example +[foo]: /url +"title" ok +. +

    "title" ok

    +```````````````````````````````` + + +This is not a link reference definition, because it is indented +four spaces: + +```````````````````````````````` example + [foo]: /url "title" + +[foo] +. +
    [foo]: /url "title"
    +
    +

    [foo]

    +```````````````````````````````` + + +This is not a link reference definition, because it occurs inside +a code block: + +```````````````````````````````` example +``` +[foo]: /url +``` + +[foo] +. +
    [foo]: /url
    +
    +

    [foo]

    +```````````````````````````````` + + +A [link reference definition] cannot interrupt a paragraph. + +```````````````````````````````` example +Foo +[bar]: /baz + +[bar] +. +

    Foo +[bar]: /baz

    +

    [bar]

    +```````````````````````````````` + + +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. + +```````````````````````````````` example +# [Foo] +[foo]: /url +> bar +. +

    Foo

    +
    +

    bar

    +
    +```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +bar +=== +[foo] +. +

    bar

    +

    foo

    +```````````````````````````````` + +```````````````````````````````` example +[foo]: /url +=== +[foo] +. +

    === +foo

    +```````````````````````````````` + + +Several [link reference definitions] +can occur one after another, without intervening blank lines. + +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] +. +

    foo, +bar, +baz

    +```````````````````````````````` + + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: + +```````````````````````````````` example +[foo] + +> [foo]: /url +. +

    foo

    +
    +
    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/links.md b/documents/markdown/commonmark/links.md new file mode 100644 index 0000000..774fb9a --- /dev/null +++ b/documents/markdown/commonmark/links.md @@ -0,0 +1,1071 @@ +## Links + + +A link contains [link text] (the visible text), a [link destination] +(the URI that is the link destination), and optionally a [link title]. +There are two basic kinds of links in Markdown. In [inline links] the +destination and title are given immediately after the link text. In +[reference links] the destination and title are defined elsewhere in +the document. + +A [link text](@) consists of a sequence of zero or more +inline elements enclosed by square brackets (`[` and `]`). The +following rules apply: + +- Links cannot contain other links, at any level of nesting. If + multiple otherwise valid link definitions appear nested inside each + other, the inner-most definition is used. + +- Brackets are allowed in the [link text] only if (a) they + are backslash-escaped or (b) they appear as a matched pair of brackets, + with an open bracket `[`, a sequence of zero or more inlines, and + a close bracket `]`. + +- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly + than the brackets in link text. Thus, for example, + `` [foo`]` `` could not be a link text, since the second `]` + is part of a code span. + +- The brackets in link text bind more tightly than markers for + [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. + +A [link destination](@) consists of either + +- a sequence of zero or more characters between an opening `<` and a + closing `>` that contains no line endings or unescaped + `<` or `>` characters, or + +- a nonempty sequence of characters that does not start with `<`, + does not include [ASCII control characters][ASCII control character] + or [space] character, and includes parentheses only if (a) they are + backslash-escaped or (b) they are part of a balanced pair of + unescaped parentheses. + (Implementations may impose limits on parentheses nesting to + avoid performance issues, but at least three levels of nesting + should be supported.) + +A [link title](@) consists of either + +- a sequence of zero or more characters between straight double-quote + characters (`"`), including a `"` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between straight single-quote + characters (`'`), including a `'` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between matching parentheses + (`(...)`), including a `(` or `)` character only if it is + backslash-escaped. + +Although [link titles] may span multiple lines, they must not contain +a [blank line]. + +An [inline link](@) consists of a [link text] followed immediately +by a left parenthesis `(`, an optional [link destination], an optional +[link title], and a right parenthesis `)`. +These four components may be separated by spaces, tabs, and up to one line +ending. +If both [link destination] and [link title] are present, they *must* be +separated by spaces, tabs, and up to one line ending. + +The link's text consists of the inlines contained +in the [link text] (excluding the enclosing square brackets). +The link's URI consists of the link destination, excluding enclosing +`<...>` if present, with backslash-escapes in effect as described +above. The link's title consists of the link title, excluding its +enclosing delimiters, with backslash-escapes in effect as described +above. + +Here is a simple inline link: + +```````````````````````````````` example +[link](/uri "title") +. +

    link

    +```````````````````````````````` + + +The title, the link text and even +the destination may be omitted: + +```````````````````````````````` example +[link](/uri) +. +

    link

    +```````````````````````````````` + +```````````````````````````````` example +[](./target.md) +. +

    +```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

    link

    +```````````````````````````````` + + +```````````````````````````````` example +[link](<>) +. +

    link

    +```````````````````````````````` + + +```````````````````````````````` example +[]() +. +

    +```````````````````````````````` + +The destination can only contain spaces if it is +enclosed in angle brackets: + +```````````````````````````````` example +[link](/my uri) +. +

    [link](/my uri)

    +```````````````````````````````` + +```````````````````````````````` example +[link](
    ) +. +

    link

    +```````````````````````````````` + +The destination cannot contain line endings, +even if enclosed in angle brackets: + +```````````````````````````````` example +[link](foo +bar) +. +

    [link](foo +bar)

    +```````````````````````````````` + +```````````````````````````````` example +[link]() +. +

    [link]()

    +```````````````````````````````` + +The destination can contain `)` if it is enclosed +in angle brackets: + +```````````````````````````````` example +[a]() +. +

    a

    +```````````````````````````````` + +Angle brackets that enclose links must be unescaped: + +```````````````````````````````` example +[link]() +. +

    [link](<foo>)

    +```````````````````````````````` + +These are not links, because the opening angle bracket +is not matched properly: + +```````````````````````````````` example +[a]( +[a](c) +. +

    [a](<b)c +[a](<b)c> +[a](c)

    +```````````````````````````````` + +Parentheses inside the link destination may be escaped: + +```````````````````````````````` example +[link](\(foo\)) +. +

    link

    +```````````````````````````````` + +Any number of parentheses are allowed without escaping, as long as they are +balanced: + +```````````````````````````````` example +[link](foo(and(bar))) +. +

    link

    +```````````````````````````````` + +However, if you have unbalanced parentheses, you need to escape or use the +`<...>` form: + +```````````````````````````````` example +[link](foo(and(bar)) +. +

    [link](foo(and(bar))

    +```````````````````````````````` + + +```````````````````````````````` example +[link](foo\(and\(bar\)) +. +

    link

    +```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

    link

    +```````````````````````````````` + + +Parentheses and other symbols can also be escaped, as usual +in Markdown: + +```````````````````````````````` example +[link](foo\)\:) +. +

    link

    +```````````````````````````````` + + +A link can contain fragment identifiers and queries: + +```````````````````````````````` example +[link](#fragment) + +[link](https://example.com#fragment) + +[link](https://example.com?foo=3#frag) +. +

    link

    +

    link

    +

    link

    +```````````````````````````````` + + +Note that a backslash before a non-escapable character is +just a backslash: + +```````````````````````````````` example +[link](foo\bar) +. +

    link

    +```````````````````````````````` + + +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. Entity and +numerical character references in the destination will be parsed +into the corresponding Unicode code points, as usual. These may +be optionally URL-escaped when written as HTML, but this spec +does not enforce any particular policy for rendering URLs in +HTML or other formats. Renderers may make different decisions +about how to escape or normalize URLs in the output. + +```````````````````````````````` example +[link](foo%20bä) +. +

    link

    +```````````````````````````````` + + +Note that, because titles can often be parsed as destinations, +if you try to omit the destination and keep the title, you'll +get unexpected results: + +```````````````````````````````` example +[link]("title") +. +

    link

    +```````````````````````````````` + + +Titles may be in single quotes, double quotes, or parentheses: + +```````````````````````````````` example +[link](/url "title") +[link](/url 'title') +[link](/url (title)) +. +

    link +link +link

    +```````````````````````````````` + + +Backslash escapes and entity and numeric character references +may be used in titles: + +```````````````````````````````` example +[link](/url "title \""") +. +

    link

    +```````````````````````````````` + + +Titles must be separated from the link using spaces, tabs, and up to one line +ending. +Other [Unicode whitespace] like non-breaking space doesn't work. + +```````````````````````````````` example +[link](/url "title") +. +

    link

    +```````````````````````````````` + + +Nested balanced quotes are not allowed without escaping: + +```````````````````````````````` example +[link](/url "title "and" title") +. +

    [link](/url "title "and" title")

    +```````````````````````````````` + + +But it is easy to work around this by using a different quote type: + +```````````````````````````````` example +[link](/url 'title "and" title') +. +

    link

    +```````````````````````````````` + + +(Note: `Markdown.pl` did allow double quotes inside a double-quoted +title, and its test suite included a test demonstrating this. +But it is hard to see a good rationale for the extra complexity this +brings, since there are already many ways---backslash escaping, +entity and numeric character references, or using a different +quote type for the enclosing title---to write titles containing +double quotes. `Markdown.pl`'s handling of titles has a number +of other strange features. For example, it allows single-quoted +titles in inline links, but not reference links. And, in +reference links but not inline links, it allows a title to begin +with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows +titles with no closing quotation mark, though 1.0.2b8 does not. +It seems preferable to adopt a simple, rational rule that works +the same way in inline links and link reference definitions.) + +Spaces, tabs, and up to one line ending is allowed around the destination and +title: + +```````````````````````````````` example +[link]( /uri + "title" ) +. +

    link

    +```````````````````````````````` + + +But it is not allowed between the link text and the +following parenthesis: + +```````````````````````````````` example +[link] (/uri) +. +

    [link] (/uri)

    +```````````````````````````````` + + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]](/uri) +. +

    link [foo [bar]]

    +```````````````````````````````` + + +```````````````````````````````` example +[link] bar](/uri) +. +

    [link] bar](/uri)

    +```````````````````````````````` + + +```````````````````````````````` example +[link [bar](/uri) +. +

    [link bar

    +```````````````````````````````` + + +```````````````````````````````` example +[link \[bar](/uri) +. +

    link [bar

    +```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*](/uri) +. +

    link foo bar #

    +```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)](/uri) +. +

    moon

    +```````````````````````````````` + + +However, links cannot contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)](/uri) +. +

    [foo bar](/uri)

    +```````````````````````````````` + + +```````````````````````````````` example +[foo *[bar [baz](/uri)](/uri)*](/uri) +. +

    [foo [bar baz](/uri)](/uri)

    +```````````````````````````````` + + +```````````````````````````````` example +![[[foo](uri1)](uri2)](uri3) +. +

    [foo](uri2)

    +```````````````````````````````` + + +These cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*](/uri) +. +

    *foo*

    +```````````````````````````````` + + +```````````````````````````````` example +[foo *bar](baz*) +. +

    foo *bar

    +```````````````````````````````` + + +Note that brackets that *aren't* part of links do not take +precedence: + +```````````````````````````````` example +*foo [bar* baz] +. +

    foo [bar baz]

    +```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo +. +

    [foo

    +```````````````````````````````` + + +```````````````````````````````` example +[foo`](/uri)` +. +

    [foo](/uri)

    +```````````````````````````````` + + +```````````````````````````````` example +[foo +. +

    [foohttps://example.com/?search=](uri)

    +```````````````````````````````` + + +There are three kinds of [reference link](@)s: +[full](#full-reference-link), [collapsed](#collapsed-reference-link), +and [shortcut](#shortcut-reference-link). + +A [full reference link](@) +consists of a [link text] immediately followed by a [link label] +that [matches] a [link reference definition] elsewhere in the document. + +A [link label](@) begins with a left bracket (`[`) and ends +with the first right bracket (`]`) that is not backslash-escaped. +Between these brackets there must be at least one character that is not a space, +tab, or line ending. +Unescaped square bracket characters are not allowed inside the +opening and closing square brackets of [link labels]. A link +label can have at most 999 characters inside the square +brackets. + +One label [matches](@) +another just in case their normalized forms are equal. To normalize a +label, strip off the opening and closing brackets, +perform the *Unicode case fold*, strip leading and trailing +spaces, tabs, and line endings, and collapse consecutive internal +spaces, tabs, and line endings to a single space. If there are multiple +matching reference link definitions, the one that comes first in the +document is used. (It is desirable in such cases to emit a warning.) + +The link's URI and title are provided by the matching [link +reference definition]. + +Here is a simple example: + +```````````````````````````````` example +[foo][bar] + +[bar]: /url "title" +. +

    foo

    +```````````````````````````````` + + +The rules for the [link text] are the same as with +[inline links]. Thus: + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]][ref] + +[ref]: /uri +. +

    link [foo [bar]]

    +```````````````````````````````` + + +```````````````````````````````` example +[link \[bar][ref] + +[ref]: /uri +. +

    link [bar

    +```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*][ref] + +[ref]: /uri +. +

    link foo bar #

    +```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)][ref] + +[ref]: /uri +. +

    moon

    +```````````````````````````````` + + +However, links cannot contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)][ref] + +[ref]: /uri +. +

    [foo bar]ref

    +```````````````````````````````` + + +```````````````````````````````` example +[foo *bar [baz][ref]*][ref] + +[ref]: /uri +. +

    [foo bar baz]ref

    +```````````````````````````````` + + +(In the examples above, we have two [shortcut reference links] +instead of one [full reference link].) + +The following cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*][ref] + +[ref]: /uri +. +

    *foo*

    +```````````````````````````````` + + +```````````````````````````````` example +[foo *bar][ref]* + +[ref]: /uri +. +

    foo *bar*

    +```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

    [foo

    +```````````````````````````````` + + +```````````````````````````````` example +[foo`][ref]` + +[ref]: /uri +. +

    [foo][ref]

    +```````````````````````````````` + + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

    [foohttps://example.com/?search=][ref]

    +```````````````````````````````` + + +Matching is case-insensitive: + +```````````````````````````````` example +[foo][BaR] + +[bar]: /url "title" +. +

    foo

    +```````````````````````````````` + + +Unicode case fold is used: + +```````````````````````````````` example +[ẞ] + +[SS]: /url +. +

    +```````````````````````````````` + + +Consecutive internal spaces, tabs, and line endings are treated as one space for +purposes of determining matching: + +```````````````````````````````` example +[Foo + bar]: /url + +[Baz][Foo bar] +. +

    Baz

    +```````````````````````````````` + + +No spaces, tabs, or line endings are allowed between the [link text] and the +[link label]: + +```````````````````````````````` example +[foo] [bar] + +[bar]: /url "title" +. +

    [foo] bar

    +```````````````````````````````` + + +```````````````````````````````` example +[foo] +[bar] + +[bar]: /url "title" +. +

    [foo] +bar

    +```````````````````````````````` + + +This is a departure from John Gruber's original Markdown syntax +description, which explicitly allows whitespace between the link +text and the link label. It brings reference links in line with +[inline links], which (according to both original Markdown and +this spec) cannot have whitespace after the link text. More +importantly, it prevents inadvertent capture of consecutive +[shortcut reference links]. If whitespace is allowed between the +link text and the link label, then in the following we will have +a single reference link, not two shortcut reference links, as +intended: + +``` markdown +[foo] +[bar] + +[foo]: /url1 +[bar]: /url2 +``` + +(Note that [shortcut reference links] were introduced by Gruber +himself in a beta version of `Markdown.pl`, but never included +in the official syntax description. Without shortcut reference +links, it is harmless to allow space between the link text and +link label; but once shortcut references are introduced, it is +too dangerous to allow this, as it frequently leads to +unintended results.) + +When there are multiple matching [link reference definitions], +the first is used: + +```````````````````````````````` example +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] +. +

    bar

    +```````````````````````````````` + + +Note that matching is performed on normalized strings, not parsed +inline content. So the following does not match, even though the +labels define equivalent inline content: + +```````````````````````````````` example +[bar][foo\!] + +[foo!]: /url +. +

    [bar][foo!]

    +```````````````````````````````` + + +[Link labels] cannot contain brackets, unless they are +backslash-escaped: + +```````````````````````````````` example +[foo][ref[] + +[ref[]: /uri +. +

    [foo][ref[]

    +

    [ref[]: /uri

    +```````````````````````````````` + + +```````````````````````````````` example +[foo][ref[bar]] + +[ref[bar]]: /uri +. +

    [foo][ref[bar]]

    +

    [ref[bar]]: /uri

    +```````````````````````````````` + + +```````````````````````````````` example +[[[foo]]] + +[[[foo]]]: /url +. +

    [[[foo]]]

    +

    [[[foo]]]: /url

    +```````````````````````````````` + + +```````````````````````````````` example +[foo][ref\[] + +[ref\[]: /uri +. +

    foo

    +```````````````````````````````` + + +Note that in this example `]` is not backslash-escaped: + +```````````````````````````````` example +[bar\\]: /uri + +[bar\\] +. +

    bar\

    +```````````````````````````````` + + +A [link label] must contain at least one character that is not a space, tab, or +line ending: + +```````````````````````````````` example +[] + +[]: /uri +. +

    []

    +

    []: /uri

    +```````````````````````````````` + + +```````````````````````````````` example +[ + ] + +[ + ]: /uri +. +

    [ +]

    +

    [ +]: /uri

    +```````````````````````````````` + + +A [collapsed reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document, followed by the string `[]`. +The contents of the link label are parsed as inlines, +which are used as the link's text. The link's URI and title are +provided by the matching reference link definition. Thus, +`[foo][]` is equivalent to `[foo][foo]`. + +```````````````````````````````` example +[foo][] + +[foo]: /url "title" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar][] + +[*foo* bar]: /url "title" +. +

    foo bar

    +```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo][] + +[foo]: /url "title" +. +

    Foo

    +```````````````````````````````` + + + +As with full reference links, spaces, tabs, or line endings are not +allowed between the two sets of brackets: + +```````````````````````````````` example +[foo] +[] + +[foo]: /url "title" +. +

    foo +[]

    +```````````````````````````````` + + +A [shortcut reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document and is not followed by `[]` or a link label. +The contents of the link label are parsed as inlines, +which are used as the link's text. The link's URI and title +are provided by the matching link reference definition. +Thus, `[foo]` is equivalent to `[foo][]`. + +```````````````````````````````` example +[foo] + +[foo]: /url "title" +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar] + +[*foo* bar]: /url "title" +. +

    foo bar

    +```````````````````````````````` + + +```````````````````````````````` example +[[*foo* bar]] + +[*foo* bar]: /url "title" +. +

    [foo bar]

    +```````````````````````````````` + + +```````````````````````````````` example +[[bar [foo] + +[foo]: /url +. +

    [[bar foo

    +```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo] + +[foo]: /url "title" +. +

    Foo

    +```````````````````````````````` + + +A space after the link text should be preserved: + +```````````````````````````````` example +[foo] bar + +[foo]: /url +. +

    foo bar

    +```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening bracket to avoid links: + +```````````````````````````````` example +\[foo] + +[foo]: /url "title" +. +

    [foo]

    +```````````````````````````````` + + +Note that this is a link, because a link label ends with the first +following closing bracket: + +```````````````````````````````` example +[foo*]: /url + +*[foo*] +. +

    *foo*

    +```````````````````````````````` + + +Full and collapsed references take precedence over shortcut +references: + +```````````````````````````````` example +[foo][bar] + +[foo]: /url1 +[bar]: /url2 +. +

    foo

    +```````````````````````````````` + +```````````````````````````````` example +[foo][] + +[foo]: /url1 +. +

    foo

    +```````````````````````````````` + +Inline links also take precedence: + +```````````````````````````````` example +[foo]() + +[foo]: /url1 +. +

    foo

    +```````````````````````````````` + +```````````````````````````````` example +[foo](not a link) + +[foo]: /url1 +. +

    foo(not a link)

    +```````````````````````````````` + +In the following case `[bar][baz]` is parsed as a reference, +`[foo]` as normal text: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url +. +

    [foo]bar

    +```````````````````````````````` + + +Here, though, `[foo][bar]` is parsed as a reference, since +`[bar]` is defined: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 +. +

    foobaz

    +```````````````````````````````` + + +Here `[foo]` is not parsed as a shortcut reference, because it +is followed by a link label (even though `[bar]` is not defined): + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 +. +

    [foo]bar

    +```````````````````````````````` + + + diff --git a/documents/markdown/commonmark/list-items.md b/documents/markdown/commonmark/list-items.md new file mode 100644 index 0000000..2b419c1 --- /dev/null +++ b/documents/markdown/commonmark/list-items.md @@ -0,0 +1,1120 @@ +## List items + + +A [list marker](@) is a +[bullet list marker] or an [ordered list marker]. + +A [bullet list marker](@) +is a `-`, `+`, or `*` character. + +An [ordered list marker](@) +is a sequence of 1--9 arabic digits (`0-9`), followed by either a +`.` character or a `)` character. (The reason for the length +limit is that with 10 digits we start seeing integer overflows +in some browsers.) + +The following rules define [list items]: + +1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of + blocks *Bs* starting with a character other than a space or tab, and *M* is + a list marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces of indentation, + then the result of prepending *M* and the following spaces to the first line + of *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + list item with *Bs* as its contents. The type of the list item + (bullet or ordered) is determined by the type of its list marker. + If the list item is ordered, then it is also assigned a start + number, based on the ordered list marker. + + Exceptions: + + 1. When the first list item in a [list] interrupts + a paragraph---that is, when it starts on a line that would + otherwise count as [paragraph continuation text]---then (a) + the lines *Ls* must not begin with a blank line, and (b) if + the list item is ordered, the start number must be 1. + 2. If any line is a [thematic break][thematic breaks] then + that line is not a list item. + +For example, let *Ls* be the lines + +```````````````````````````````` example +A paragraph +with two lines. + + indented code + +> A block quote. +. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +```````````````````````````````` + + +And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says +that the following is an ordered list item with start number 1, +and the same contents as *Ls*: + +```````````````````````````````` example +1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
      +
    1. +

      A paragraph +with two lines.

      +
      indented code
      +
      +
      +

      A block quote.

      +
      +
    2. +
    +```````````````````````````````` + + +The most important thing to notice is that the position of +the text after the list marker determines how much indentation +is needed in subsequent blocks in the list item. If the list +marker takes up two spaces of indentation, and there are three spaces between +the list marker and the next character other than a space or tab, then blocks +must be indented five spaces in order to fall under the list +item. + +Here are some examples showing how far content must be indented to be +put under the list item: + +```````````````````````````````` example +- one + + two +. +
      +
    • one
    • +
    +

    two

    +```````````````````````````````` + + +```````````````````````````````` example +- one + + two +. +
      +
    • +

      one

      +

      two

      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
      +
    • one
    • +
    +
     two
    +
    +```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
      +
    • +

      one

      +

      two

      +
    • +
    +```````````````````````````````` + + +It is tempting to think of this in terms of columns: the continuation +blocks must be indented at least to the column of the first character other than +a space or tab after the list marker. However, that is not quite right. +The spaces of indentation after the list marker determine how much relative +indentation is needed. Which column this indentation reaches will depend on +how the list item is embedded in other constructions, as shown by +this example: + +```````````````````````````````` example + > > 1. one +>> +>> two +. +
    +
    +
      +
    1. +

      one

      +

      two

      +
    2. +
    +
    +
    +```````````````````````````````` + + +Here `two` occurs in the same column as the list marker `1.`, +but is actually contained in the list item, because there is +sufficient indentation after the last containing blockquote marker. + +The converse is also possible. In the following example, the word `two` +occurs far to the right of the initial text of the list item, `one`, but +it is not considered part of the list item, because it is not indented +far enough past the blockquote marker: + +```````````````````````````````` example +>>- one +>> + > > two +. +
    +
    +
      +
    • one
    • +
    +

    two

    +
    +
    +```````````````````````````````` + + +Note that at least one space or tab is needed between the list marker and +any following content, so these are not list items: + +```````````````````````````````` example +-one + +2.two +. +

    -one

    +

    2.two

    +```````````````````````````````` + + +A list item may contain blocks that are separated by more than +one blank line. + +```````````````````````````````` example +- foo + + + bar +. +
      +
    • +

      foo

      +

      bar

      +
    • +
    +```````````````````````````````` + + +A list item may contain any kind of block: + +```````````````````````````````` example +1. foo + + ``` + bar + ``` + + baz + + > bam +. +
      +
    1. +

      foo

      +
      bar
      +
      +

      baz

      +
      +

      bam

      +
      +
    2. +
    +```````````````````````````````` + + +A list item that contains an indented code block will preserve +empty lines within the code block verbatim. + +```````````````````````````````` example +- Foo + + bar + + + baz +. +
      +
    • +

      Foo

      +
      bar
      +
      +
      +baz
      +
      +
    • +
    +```````````````````````````````` + +Note that ordered list start numbers must be nine digits or less: + +```````````````````````````````` example +123456789. ok +. +
      +
    1. ok
    2. +
    +```````````````````````````````` + + +```````````````````````````````` example +1234567890. not ok +. +

    1234567890. not ok

    +```````````````````````````````` + + +A start number may begin with 0s: + +```````````````````````````````` example +0. ok +. +
      +
    1. ok
    2. +
    +```````````````````````````````` + + +```````````````````````````````` example +003. ok +. +
      +
    1. ok
    2. +
    +```````````````````````````````` + + +A start number must not be negative: + +```````````````````````````````` example +-1. not ok +. +

    -1. not ok

    +```````````````````````````````` + + + +2. **Item starting with indented code.** If a sequence of lines *Ls* + constitute a sequence of blocks *Bs* starting with an indented code + block, and *M* is a list marker of width *W* followed by + one space of indentation, then the result of prepending *M* and the + following space to the first line of *Ls*, and indenting subsequent lines + of *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +An indented code block will have to be preceded by four spaces of indentation +beyond the edge of the region where text will be included in the list item. +In the following case that is 6 spaces: + +```````````````````````````````` example +- foo + + bar +. +
      +
    • +

      foo

      +
      bar
      +
      +
    • +
    +```````````````````````````````` + + +And in this case it is 11 spaces: + +```````````````````````````````` example + 10. foo + + bar +. +
      +
    1. +

      foo

      +
      bar
      +
      +
    2. +
    +```````````````````````````````` + + +If the *first* block in the list item is an indented code block, +then by rule #2, the contents must be preceded by *one* space of indentation +after the list marker: + +```````````````````````````````` example + indented code + +paragraph + + more code +. +
    indented code
    +
    +

    paragraph

    +
    more code
    +
    +```````````````````````````````` + + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
      +
    1. +
      indented code
      +
      +

      paragraph

      +
      more code
      +
      +
    2. +
    +```````````````````````````````` + + +Note that an additional space of indentation is interpreted as space +inside the code block: + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
      +
    1. +
       indented code
      +
      +

      paragraph

      +
      more code
      +
      +
    2. +
    +```````````````````````````````` + + +Note that rules #1 and #2 only apply to two cases: (a) cases +in which the lines to be included in a list item begin with a +character other than a space or tab, and (b) cases in which +they begin with an indented code +block. In a case like the following, where the first block begins with +three spaces of indentation, the rules do not allow us to form a list item by +indenting the whole thing and prepending a list marker: + +```````````````````````````````` example + foo + +bar +. +

    foo

    +

    bar

    +```````````````````````````````` + + +```````````````````````````````` example +- foo + + bar +. +
      +
    • foo
    • +
    +

    bar

    +```````````````````````````````` + + +This is not a significant restriction, because when a block is preceded by up to +three spaces of indentation, the indentation can always be removed without +a change in interpretation, allowing rule #1 to be applied. So, in +the above case: + +```````````````````````````````` example +- foo + + bar +. +
      +
    • +

      foo

      +

      bar

      +
    • +
    +```````````````````````````````` + + +3. **Item starting with a blank line.** If a sequence of lines *Ls* + starting with a single [blank line] constitute a (possibly empty) + sequence of blocks *Bs*, and *M* is a list marker of width *W*, + then the result of prepending *M* to the first line of *Ls*, and + preceding subsequent lines of *Ls* by *W + 1* spaces of indentation, is a + list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +Here are some list items that start with a blank line but are not empty: + +```````````````````````````````` example +- + foo +- + ``` + bar + ``` +- + baz +. +
      +
    • foo
    • +
    • +
      bar
      +
      +
    • +
    • +
      baz
      +
      +
    • +
    +```````````````````````````````` + +When the list item starts with a blank line, the number of spaces +following the list marker doesn't change the required indentation: + +```````````````````````````````` example +- + foo +. +
      +
    • foo
    • +
    +```````````````````````````````` + + +A list item can begin with at most one blank line. +In the following example, `foo` is not part of the list +item: + +```````````````````````````````` example +- + + foo +. +
      +
    • +
    +

    foo

    +```````````````````````````````` + + +Here is an empty bullet list item: + +```````````````````````````````` example +- foo +- +- bar +. +
      +
    • foo
    • +
    • +
    • bar
    • +
    +```````````````````````````````` + + +It does not matter whether there are spaces or tabs following the [list marker]: + +```````````````````````````````` example +- foo +- +- bar +. +
      +
    • foo
    • +
    • +
    • bar
    • +
    +```````````````````````````````` + + +Here is an empty ordered list item: + +```````````````````````````````` example +1. foo +2. +3. bar +. +
      +
    1. foo
    2. +
    3. +
    4. bar
    5. +
    +```````````````````````````````` + + +A list may start or end with an empty list item: + +```````````````````````````````` example +* +. +
      +
    • +
    +```````````````````````````````` + +However, an empty list item cannot interrupt a paragraph: + +```````````````````````````````` example +foo +* + +foo +1. +. +

    foo +*

    +

    foo +1.

    +```````````````````````````````` + + +4. **Indentation.** If a sequence of lines *Ls* constitutes a list item + according to rule #1, #2, or #3, then the result of preceding each line + of *Ls* by up to three spaces of indentation (the same for each line) also + constitutes a list item with the same contents and attributes. If a line is + empty, then it need not be indented. + +Indented one space: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
      +
    1. +

      A paragraph +with two lines.

      +
      indented code
      +
      +
      +

      A block quote.

      +
      +
    2. +
    +```````````````````````````````` + + +Indented two spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
      +
    1. +

      A paragraph +with two lines.

      +
      indented code
      +
      +
      +

      A block quote.

      +
      +
    2. +
    +```````````````````````````````` + + +Indented three spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
      +
    1. +

      A paragraph +with two lines.

      +
      indented code
      +
      +
      +

      A block quote.

      +
      +
    2. +
    +```````````````````````````````` + + +Four spaces indent gives a code block: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    1.  A paragraph
    +    with two lines.
    +
    +        indented code
    +
    +    > A block quote.
    +
    +```````````````````````````````` + + + +5. **Laziness.** If a string of lines *Ls* constitute a [list + item](#list-items) with contents *Bs*, then the result of deleting + some or all of the indentation from one or more lines in which the + next character other than a space or tab after the indentation is + [paragraph continuation text] is a + list item with the same contents and attributes. The unindented + lines are called + [lazy continuation line](@)s. + +Here is an example with [lazy continuation lines]: + +```````````````````````````````` example + 1. A paragraph +with two lines. + + indented code + + > A block quote. +. +
      +
    1. +

      A paragraph +with two lines.

      +
      indented code
      +
      +
      +

      A block quote.

      +
      +
    2. +
    +```````````````````````````````` + + +Indentation can be partially deleted: + +```````````````````````````````` example + 1. A paragraph + with two lines. +. +
      +
    1. A paragraph +with two lines.
    2. +
    +```````````````````````````````` + + +These examples show how laziness can work in nested structures: + +```````````````````````````````` example +> 1. > Blockquote +continued here. +. +
    +
      +
    1. +
      +

      Blockquote +continued here.

      +
      +
    2. +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> 1. > Blockquote +> continued here. +. +
    +
      +
    1. +
      +

      Blockquote +continued here.

      +
      +
    2. +
    +
    +```````````````````````````````` + + + +6. **That's all.** Nothing that is not counted as a list item by rules + #1--5 counts as a [list item](#list-items). + +The rules for sublists follow from the general rules +[above][List items]. A sublist must be indented the same number +of spaces of indentation a paragraph would need to be in order to be included +in the list item. + +So, in this case we need two spaces indent: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
      +
    • foo +
        +
      • bar +
          +
        • baz +
            +
          • boo
          • +
          +
        • +
        +
      • +
      +
    • +
    +```````````````````````````````` + + +One is not enough: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
      +
    • foo
    • +
    • bar
    • +
    • baz
    • +
    • boo
    • +
    +```````````````````````````````` + + +Here we need four, because the list marker is wider: + +```````````````````````````````` example +10) foo + - bar +. +
      +
    1. foo +
        +
      • bar
      • +
      +
    2. +
    +```````````````````````````````` + + +Three is not enough: + +```````````````````````````````` example +10) foo + - bar +. +
      +
    1. foo
    2. +
    +
      +
    • bar
    • +
    +```````````````````````````````` + + +A list may be the first block in a list item: + +```````````````````````````````` example +- - foo +. +
      +
    • +
        +
      • foo
      • +
      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +1. - 2. foo +. +
      +
    1. +
        +
      • +
          +
        1. foo
        2. +
        +
      • +
      +
    2. +
    +```````````````````````````````` + + +A list item can contain a heading: + +```````````````````````````````` example +- # Foo +- Bar + --- + baz +. +
      +
    • +

      Foo

      +
    • +
    • +

      Bar

      +baz
    • +
    +```````````````````````````````` + + +### Motivation + +John Gruber's Markdown spec says the following about list items: + +1. "List markers typically start at the left margin, but may be indented + by up to three spaces. List markers must be followed by one or more + spaces or a tab." + +2. "To make lists look nice, you can wrap items with hanging indents.... + But if you don't want to, you don't have to." + +3. "List items may consist of multiple paragraphs. Each subsequent + paragraph in a list item must be indented by either 4 spaces or one + tab." + +4. "It looks nice if you indent every line of the subsequent paragraphs, + but here again, Markdown will allow you to be lazy." + +5. "To put a blockquote within a list item, the blockquote's `>` + delimiters need to be indented." + +6. "To put a code block within a list item, the code block needs to be + indented twice — 8 spaces or two tabs." + +These rules specify that a paragraph under a list item must be indented +four spaces (presumably, from the left margin, rather than the start of +the list marker, but this is not said), and that code under a list item +must be indented eight spaces instead of the usual four. They also say +that a block quote must be indented, but not by how much; however, the +example given has four spaces indentation. Although nothing is said +about other kinds of block-level content, it is certainly reasonable to +infer that *all* block elements under a list item, including other +lists, must be indented four spaces. This principle has been called the +*four-space rule*. + +The four-space rule is clear and principled, and if the reference +implementation `Markdown.pl` had followed it, it probably would have +become the standard. However, `Markdown.pl` allowed paragraphs and +sublists to start with only two spaces indentation, at least on the +outer level. Worse, its behavior was inconsistent: a sublist of an +outer-level list needed two spaces indentation, but a sublist of this +sublist needed three spaces. It is not surprising, then, that different +implementations of Markdown have developed very different rules for +determining what comes under a list item. (Pandoc and python-Markdown, +for example, stuck with Gruber's syntax description and the four-space +rule, while discount, redcarpet, marked, PHP Markdown, and others +followed `Markdown.pl`'s behavior more closely.) + +Unfortunately, given the divergences between implementations, there +is no way to give a spec for list items that will be guaranteed not +to break any existing documents. However, the spec given here should +correctly handle lists formatted with either the four-space rule or +the more forgiving `Markdown.pl` behavior, provided they are laid out +in a way that is natural for a human to read. + +The strategy here is to let the width and indentation of the list marker +determine the indentation necessary for blocks to fall under the list +item, rather than having a fixed and arbitrary number. The writer can +think of the body of the list item as a unit which gets indented to the +right enough to fit the list marker (and any indentation on the list +marker). (The laziness rule, #5, then allows continuation lines to be +unindented if needed.) + +This rule is superior, we claim, to any rule requiring a fixed level of +indentation from the margin. The four-space rule is clear but +unnatural. It is quite unintuitive that + +``` markdown +- foo + + bar + + - baz +``` + +should be parsed as two lists with an intervening paragraph, + +``` html +
      +
    • foo
    • +
    +

    bar

    +
      +
    • baz
    • +
    +``` + +as the four-space rule demands, rather than a single list, + +``` html +
      +
    • +

      foo

      +

      bar

      +
        +
      • baz
      • +
      +
    • +
    +``` + +The choice of four spaces is arbitrary. It can be learned, but it is +not likely to be guessed, and it trips up beginners regularly. + +Would it help to adopt a two-space rule? The problem is that such +a rule, together with the rule allowing up to three spaces of indentation for +the initial list marker, allows text that is indented *less than* the +original list marker to be included in the list item. For example, +`Markdown.pl` parses + +``` markdown + - one + + two +``` + +as a single list item, with `two` a continuation paragraph: + +``` html +
      +
    • +

      one

      +

      two

      +
    • +
    +``` + +and similarly + +``` markdown +> - one +> +> two +``` + +as + +``` html +
    +
      +
    • +

      one

      +

      two

      +
    • +
    +
    +``` + +This is extremely unintuitive. + +Rather than requiring a fixed indent from the margin, we could require +a fixed indent (say, two spaces, or even one space) from the list marker (which +may itself be indented). This proposal would remove the last anomaly +discussed. Unlike the spec presented above, it would count the following +as a list item with a subparagraph, even though the paragraph `bar` +is not indented as far as the first paragraph `foo`: + +``` markdown + 10. foo + + bar +``` + +Arguably this text does read like a list item with `bar` as a subparagraph, +which may count in favor of the proposal. However, on this proposal indented +code would have to be indented six spaces after the list marker. And this +would break a lot of existing Markdown, which has the pattern: + +``` markdown +1. foo + + indented code +``` + +where the code is indented eight spaces. The spec above, by contrast, will +parse this text as expected, since the code block's indentation is measured +from the beginning of `foo`. + +The one case that needs special treatment is a list item that *starts* +with indented code. How much indentation is required in that case, since +we don't have a "first paragraph" to measure from? Rule #2 simply stipulates +that in such cases, we require one space indentation from the list marker +(and then the normal four spaces for the indented code). This will match the +four-space rule in cases where the list marker plus its initial indentation +takes four spaces (a common case), but diverge in other cases. + diff --git a/documents/markdown/commonmark/lists.md b/documents/markdown/commonmark/lists.md new file mode 100644 index 0000000..fee2fc5 --- /dev/null +++ b/documents/markdown/commonmark/lists.md @@ -0,0 +1,650 @@ +## Lists + + +A [list](@) is a sequence of one or more +list items [of the same type]. The list items +may be separated by any number of blank lines. + +Two list items are [of the same type](@) +if they begin with a [list marker] of the same type. +Two list markers are of the +same type if (a) they are bullet list markers using the same character +(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same +delimiter (either `.` or `)`). + +A list is an [ordered list](@) +if its constituent list items begin with +[ordered list markers], and a +[bullet list](@) if its constituent list +items begin with [bullet list markers]. + +The [start number](@) +of an [ordered list] is determined by the list number of +its initial list item. The numbers of subsequent list items are +disregarded. + +A list is [loose](@) if any of its constituent +list items are separated by blank lines, or if any of its constituent +list items directly contain two block-level elements with a blank line +between them. Otherwise a list is [tight](@). +(The difference in HTML output is that paragraphs in a loose list are +wrapped in `

    ` tags, while paragraphs in a tight list are not.) + +Changing the bullet or ordered list delimiter starts a new list: + +```````````````````````````````` example +- foo +- bar ++ baz +. +

      +
    • foo
    • +
    • bar
    • +
    +
      +
    • baz
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +1. foo +2. bar +3) baz +. +
      +
    1. foo
    2. +
    3. bar
    4. +
    +
      +
    1. baz
    2. +
    +```````````````````````````````` + + +In CommonMark, a list can interrupt a paragraph. That is, +no blank line is needed to separate a paragraph from a following +list: + +```````````````````````````````` example +Foo +- bar +- baz +. +

    Foo

    +
      +
    • bar
    • +
    • baz
    • +
    +```````````````````````````````` + +`Markdown.pl` does not allow this, through fear of triggering a list +via a numeral in a hard-wrapped line: + +``` markdown +The number of windows in my house is +14. The number of doors is 6. +``` + +Oddly, though, `Markdown.pl` *does* allow a blockquote to +interrupt a paragraph, even though the same considerations might +apply. + +In CommonMark, we do allow lists to interrupt paragraphs, for +two reasons. First, it is natural and not uncommon for people +to start lists without blank lines: + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +Second, we are attracted to a + +> [principle of uniformity](@): +> if a chunk of text has a certain +> meaning, it will continue to have the same meaning when put into a +> container block (such as a list item or blockquote). + +(Indeed, the spec for [list items] and [block quotes] presupposes +this principle.) This principle implies that if + +``` markdown + * I need to buy + - new shoes + - a coat + - a plane ticket +``` + +is a list item containing a paragraph followed by a nested sublist, +as all Markdown implementations agree it is (though the paragraph +may be rendered without `

    ` tags, since the list is "tight"), +then + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +by itself should be a paragraph followed by a nested sublist. + +Since it is well established Markdown practice to allow lists to +interrupt paragraphs inside list items, the [principle of +uniformity] requires us to allow this outside list items as +well. ([reStructuredText](https://docutils.sourceforge.net/rst.html) +takes a different approach, requiring blank lines before lists +even inside other list items.) + +In order to solve the problem of unwanted lists in paragraphs with +hard-wrapped numerals, we allow only lists starting with `1` to +interrupt paragraphs. Thus, + +```````````````````````````````` example +The number of windows in my house is +14. The number of doors is 6. +. +

    The number of windows in my house is +14. The number of doors is 6.

    +```````````````````````````````` + +We may still get an unintended result in cases like + +```````````````````````````````` example +The number of windows in my house is +1. The number of doors is 6. +. +

    The number of windows in my house is

    +
      +
    1. The number of doors is 6.
    2. +
    +```````````````````````````````` + +but this rule should prevent most spurious list captures. + +There can be any number of blank lines between items: + +```````````````````````````````` example +- foo + +- bar + + +- baz +. +
      +
    • +

      foo

      +
    • +
    • +

      bar

      +
    • +
    • +

      baz

      +
    • +
    +```````````````````````````````` + +```````````````````````````````` example +- foo + - bar + - baz + + + bim +. +
      +
    • foo +
        +
      • bar +
          +
        • +

          baz

          +

          bim

          +
        • +
        +
      • +
      +
    • +
    +```````````````````````````````` + + +To separate consecutive lists of the same type, or to separate a +list from an indented code block that would otherwise be parsed +as a subparagraph of the final list item, you can insert a blank HTML +comment: + +```````````````````````````````` example +- foo +- bar + + + +- baz +- bim +. +
      +
    • foo
    • +
    • bar
    • +
    + +
      +
    • baz
    • +
    • bim
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +- foo + + notcode + +- foo + + + + code +. +
      +
    • +

      foo

      +

      notcode

      +
    • +
    • +

      foo

      +
    • +
    + +
    code
    +
    +```````````````````````````````` + + +List items need not be indented to the same level. The following +list items will be treated as items at the same list level, +since none is indented enough to belong to the previous list +item: + +```````````````````````````````` example +- a + - b + - c + - d + - e + - f +- g +. +
      +
    • a
    • +
    • b
    • +
    • c
    • +
    • d
    • +
    • e
    • +
    • f
    • +
    • g
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
      +
    1. +

      a

      +
    2. +
    3. +

      b

      +
    4. +
    5. +

      c

      +
    6. +
    +```````````````````````````````` + +Note, however, that list items must not be preceded by more than +three spaces of indentation. Here `- e` is treated as a paragraph continuation +line, because it is indented more than three spaces: + +```````````````````````````````` example +- a + - b + - c + - d + - e +. +
      +
    • a
    • +
    • b
    • +
    • c
    • +
    • d +- e
    • +
    +```````````````````````````````` + +And here, `3. c` is treated as in indented code block, +because it is indented four spaces and preceded by a +blank line. + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
      +
    1. +

      a

      +
    2. +
    3. +

      b

      +
    4. +
    +
    3. c
    +
    +```````````````````````````````` + + +This is a loose list, because there is a blank line between +two of the list items: + +```````````````````````````````` example +- a +- b + +- c +. +
      +
    • +

      a

      +
    • +
    • +

      b

      +
    • +
    • +

      c

      +
    • +
    +```````````````````````````````` + + +So is this, with a empty second item: + +```````````````````````````````` example +* a +* + +* c +. +
      +
    • +

      a

      +
    • +
    • +
    • +

      c

      +
    • +
    +```````````````````````````````` + + +These are loose lists, even though there are no blank lines between the items, +because one of the items directly contains two block-level elements +with a blank line between them: + +```````````````````````````````` example +- a +- b + + c +- d +. +
      +
    • +

      a

      +
    • +
    • +

      b

      +

      c

      +
    • +
    • +

      d

      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +- a +- b + + [ref]: /url +- d +. +
      +
    • +

      a

      +
    • +
    • +

      b

      +
    • +
    • +

      d

      +
    • +
    +```````````````````````````````` + + +This is a tight list, because the blank lines are in a code block: + +```````````````````````````````` example +- a +- ``` + b + + + ``` +- c +. +
      +
    • a
    • +
    • +
      b
      +
      +
      +
      +
    • +
    • c
    • +
    +```````````````````````````````` + + +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: + +```````````````````````````````` example +- a + - b + + c +- d +. +
      +
    • a +
        +
      • +

        b

        +

        c

        +
      • +
      +
    • +
    • d
    • +
    +```````````````````````````````` + + +This is a tight list, because the blank line is inside the +block quote: + +```````````````````````````````` example +* a + > b + > +* c +. +
      +
    • a +
      +

      b

      +
      +
    • +
    • c
    • +
    +```````````````````````````````` + + +This list is tight, because the consecutive block elements +are not separated by blank lines: + +```````````````````````````````` example +- a + > b + ``` + c + ``` +- d +. +
      +
    • a +
      +

      b

      +
      +
      c
      +
      +
    • +
    • d
    • +
    +```````````````````````````````` + + +A single-paragraph list is tight: + +```````````````````````````````` example +- a +. +
      +
    • a
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +- a + - b +. +
      +
    • a +
        +
      • b
      • +
      +
    • +
    +```````````````````````````````` + + +This list is loose, because of the blank line between the +two block elements in the list item: + +```````````````````````````````` example +1. ``` + foo + ``` + + bar +. +
      +
    1. +
      foo
      +
      +

      bar

      +
    2. +
    +```````````````````````````````` + + +Here the outer list is loose, the inner list tight: + +```````````````````````````````` example +* foo + * bar + + baz +. +
      +
    • +

      foo

      +
        +
      • bar
      • +
      +

      baz

      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example +- a + - b + - c + +- d + - e + - f +. +
      +
    • +

      a

      +
        +
      • b
      • +
      • c
      • +
      +
    • +
    • +

      d

      +
        +
      • e
      • +
      • f
      • +
      +
    • +
    +```````````````````````````````` + + +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in + +```````````````````````````````` example +`hi`lo` +. +

    hilo`

    +```````````````````````````````` + +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. + + + diff --git a/documents/markdown/commonmark/overview.md b/documents/markdown/commonmark/overview.md new file mode 100644 index 0000000..49f7521 --- /dev/null +++ b/documents/markdown/commonmark/overview.md @@ -0,0 +1,39 @@ +## Overview + + +Parsing has two phases: + +1. In the first phase, lines of input are consumed and the block +structure of the document---its division into paragraphs, block quotes, +list items, and so on---is constructed. Text is assigned to these +blocks but not parsed. Link reference definitions are parsed and a +map of links is constructed. + +2. In the second phase, the raw text contents of paragraphs and headings +are parsed into sequences of Markdown inline elements (strings, +code spans, links, emphasis, and so on), using the map of link +references constructed in phase 1. + +At each point in processing, the document is represented as a tree of +**blocks**. The root of the tree is a `document` block. The `document` +may have any number of other blocks as **children**. These children +may, in turn, have other blocks as children. The last child of a block +is normally considered **open**, meaning that subsequent lines of input +can alter its contents. (Blocks that are not open are **closed**.) +Here, for example, is a possible document tree, with the open blocks +marked by arrows: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + diff --git a/documents/markdown/commonmark/paragraphs.md b/documents/markdown/commonmark/paragraphs.md new file mode 100644 index 0000000..91f8ada --- /dev/null +++ b/documents/markdown/commonmark/paragraphs.md @@ -0,0 +1,111 @@ +## Paragraphs + + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +spaces or tabs. + +A simple example with two paragraphs: + +```````````````````````````````` example +aaa + +bbb +. +

    aaa

    +

    bbb

    +```````````````````````````````` + + +Paragraphs can contain multiple lines, but no blank lines: + +```````````````````````````````` example +aaa +bbb + +ccc +ddd +. +

    aaa +bbb

    +

    ccc +ddd

    +```````````````````````````````` + + +Multiple blank lines between paragraphs have no effect: + +```````````````````````````````` example +aaa + + +bbb +. +

    aaa

    +

    bbb

    +```````````````````````````````` + + +Leading spaces or tabs are skipped: + +```````````````````````````````` example + aaa + bbb +. +

    aaa +bbb

    +```````````````````````````````` + + +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. + +```````````````````````````````` example +aaa + bbb + ccc +. +

    aaa +bbb +ccc

    +```````````````````````````````` + + +However, the first line may be preceded by up to three spaces of indentation. +Four spaces of indentation is too many: + +```````````````````````````````` example + aaa +bbb +. +

    aaa +bbb

    +```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
    aaa
    +
    +

    bbb

    +```````````````````````````````` + + +Final spaces or tabs are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

    aaa
    +bbb

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/phase-1-block-structure.md b/documents/markdown/commonmark/phase-1-block-structure.md new file mode 100644 index 0000000..7061d86 --- /dev/null +++ b/documents/markdown/commonmark/phase-1-block-structure.md @@ -0,0 +1,143 @@ +## Phase 1: block structure + + +Each line that is processed has an effect on this tree. The line is +analyzed and, depending on its contents, the document may be altered +in one or more of the following ways: + +1. One or more open blocks may be closed. +2. One or more new blocks may be created as children of the + last open block. +3. Text may be added to the last (deepest) open block remaining + on the tree. + +Once a line has been incorporated into the tree in this way, +it can be discarded, so input can be read in a stream. + +For each line, we follow this procedure: + +1. First we iterate through the open blocks, starting with the +root document, and descending through last children down to the last +open block. Each block imposes a condition that the line must satisfy +if the block is to remain open. For example, a block quote requires a +`>` character. A paragraph requires a non-blank line. +In this phase we may match all or just some of the open +blocks. But we cannot close unmatched blocks yet, because we may have a +[lazy continuation line]. + +2. Next, after consuming the continuation markers for existing +blocks, we look for new block starts (e.g. `>` for a block quote). +If we encounter a new block start, we close any blocks unmatched +in step 1 before creating the new block as a child of the last +matched container block. + +3. Finally, we look at the remainder of the line (after block +markers like `>`, list markers, and indentation have been consumed). +This is text that can be incorporated into the last open +block (a paragraph, code block, heading, or raw HTML). + +Setext headings are formed when we see a line of a paragraph +that is a [setext heading underline]. + +Reference link definitions are detected when a paragraph is closed; +the accumulated text lines are parsed to see if they begin with +one or more reference link definitions. Any remainder becomes a +normal paragraph. + +We can see how this works by considering how the tree above is +generated by four lines of Markdown: + +``` markdown +> Lorem ipsum dolor +sit amet. +> - Qui *quodsi iracundia* +> - aliquando id +``` + +At the outset, our document model is just + +``` tree +-> document +``` + +The first line of our text, + +``` markdown +> Lorem ipsum dolor +``` + +causes a `block_quote` block to be created as a child of our +open `document` block, and a `paragraph` block as a child of +the `block_quote`. Then the text is added to the last open +block, the `paragraph`: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor" +``` + +The next line, + +``` markdown +sit amet. +``` + +is a "lazy continuation" of the open `paragraph`, so it gets added +to the paragraph's text: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor\nsit amet." +``` + +The third line, + +``` markdown +> - Qui *quodsi iracundia* +``` + +causes the `paragraph` block to be closed, and a new `list` block +opened as a child of the `block_quote`. A `list_item` is also +added as a child of the `list`, and a `paragraph` as a child of +the `list_item`. The text is then added to the new `paragraph`: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + -> list_item + -> paragraph + "Qui *quodsi iracundia*" +``` + +The fourth line, + +``` markdown +> - aliquando id +``` + +causes the `list_item` (and its child the `paragraph`) to be closed, +and a new `list_item` opened up as child of the `list`. A `paragraph` +is added as a child of the new `list_item`, to contain the text. +We thus obtain the final tree: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + diff --git a/documents/markdown/commonmark/phase-2-inline-structure.md b/documents/markdown/commonmark/phase-2-inline-structure.md new file mode 100644 index 0000000..c741ac6 --- /dev/null +++ b/documents/markdown/commonmark/phase-2-inline-structure.md @@ -0,0 +1,169 @@ +## Phase 2: inline structure + + +Once all of the input has been parsed, all open blocks are closed. + +We then "walk the tree," visiting every node, and parse raw +string contents of paragraphs and headings as inlines. At this +point we have seen all the link reference definitions, so we can +resolve reference links as we go. + +``` tree +document + block_quote + paragraph + str "Lorem ipsum dolor" + softbreak + str "sit amet." + list (type=bullet tight=true bullet_char=-) + list_item + paragraph + str "Qui " + emph + str "quodsi iracundia" + list_item + paragraph + str "aliquando id" +``` + +Notice how the [line ending] in the first paragraph has +been parsed as a `softbreak`, and the asterisks in the first list item +have become an `emph`. + +### An algorithm for parsing nested emphasis and links + +By far the trickiest part of inline parsing is handling emphasis, +strong emphasis, links, and images. This is done using the following +algorithm. + +When we're parsing inlines and we hit either + +- a run of `*` or `_` characters, or +- a `[` or `![` + +we insert a text node with these symbols as its literal content, and we +add a pointer to this text node to the [delimiter stack](@). + +The [delimiter stack] is a doubly linked list. Each +element contains a pointer to a text node, plus information about + +- the type of delimiter (`[`, `![`, `*`, `_`) +- the number of delimiters, +- whether the delimiter is "active" (all are active to start), and +- whether the delimiter is a potential opener, a potential closer, + or both (which depends on what sort of characters precede + and follow the delimiters). + +When we hit a `]` character, we call the *look for link or image* +procedure (see below). + +When we hit the end of the input, we call the *process emphasis* +procedure (see below), with `stack_bottom` = NULL. + +#### *look for link or image* + +Starting at the top of the delimiter stack, we look backwards +through the stack for an opening `[` or `![` delimiter. + +- If we don't find one, we return a literal text node `]`. + +- If we do find one, but it's not *active*, we remove the inactive + delimiter from the stack, and return a literal text node `]`. + +- If we find one and it's active, then we parse ahead to see if + we have an inline link/image, reference link/image, collapsed reference + link/image, or shortcut reference link/image. + + + If we don't, then we remove the opening delimiter from the + delimiter stack and return a literal text node `]`. + + + If we do, then + + * We return a link or image node whose children are the inlines + after the text node pointed to by the opening delimiter. + + * We run *process emphasis* on these inlines, with the `[` opener + as `stack_bottom`. + + * We remove the opening delimiter. + + * If we have a link (and not an image), we also set all + `[` delimiters before the opening delimiter to *inactive*. (This + will prevent us from getting links within links.) + +#### *process emphasis* + +Parameter `stack_bottom` sets a lower bound to how far we +descend in the [delimiter stack]. If it is NULL, we can +go all the way to the bottom. Otherwise, we stop before +visiting `stack_bottom`. + +Let `current_position` point to the element on the [delimiter stack] +just above `stack_bottom` (or the first element if `stack_bottom` +is NULL). + +We keep track of the `openers_bottom` for each delimiter +type (`*`, `_`), indexed to the length of the closing delimiter run +(modulo 3) and to whether the closing delimiter can also be an +opener. Initialize this to `stack_bottom`. + +Then we repeat the following until we run out of potential +closers: + +- Move `current_position` forward in the delimiter stack (if needed) + until we find the first potential closer with delimiter `*` or `_`. + (This will be the potential closer closest + to the beginning of the input -- the first one in parse order.) + +- Now, look back in the stack (staying above `stack_bottom` and + the `openers_bottom` for this delimiter type) for the + first valid potential opener, where being "valid" requires that: + + + the token is a potential opener; and + + + the token has the same delimiter type as the current potential + closer; and + + + any of the following are true: + + * the closer is not a potential opener and the opener is not a + potential closer; or + + * the original length of the closing delimiter run is a multiple + of 3; or + + * the original length of the opening delimiter run + the original + length of the closing delimiter run is not a multiple of 3 + +- If one is found: + + + Figure out whether we have emphasis or strong emphasis: + if both closer and opener spans have length >= 2, we have + strong, otherwise regular. + + + Insert an emph or strong emph node accordingly, after + the text node corresponding to the opener. + + + Remove any delimiters between the opener and closer from + the delimiter stack. + + + Remove 1 (for regular emph) or 2 (for strong emph) delimiters + from the opening and closing text nodes. If they become empty + as a result, remove them and remove the corresponding element + of the delimiter stack. If the closing node is removed, reset + `current_position` to the next element in the stack. + +- If none is found: + + + Set `openers_bottom` to the element before `current_position`. + (We know that there are no openers for this kind of closer up to and + including this point, so this puts a lower bound on future searches.) + + + If the closer at `current_position` is not a potential opener, + remove it from the delimiter stack (since we know it can't + be a closer either). + + + Advance `current_position` to the next element in the stack. + +After we're done, we remove all delimiters above `stack_bottom` from the +delimiter stack. diff --git a/documents/markdown/commonmark/precedence.md b/documents/markdown/commonmark/precedence.md new file mode 100644 index 0000000..ba95844 --- /dev/null +++ b/documents/markdown/commonmark/precedence.md @@ -0,0 +1,27 @@ +## Precedence + + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: + +```````````````````````````````` example +- `one +- two` +. +
      +
    • `one
    • +
    • two`
    • +
    +```````````````````````````````` + + +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + diff --git a/documents/markdown/commonmark/raw-html.md b/documents/markdown/commonmark/raw-html.md new file mode 100644 index 0000000..c277617 --- /dev/null +++ b/documents/markdown/commonmark/raw-html.md @@ -0,0 +1,277 @@ +## Raw HTML + + +Text between `<` and `>` that looks like an HTML tag is parsed as a +raw HTML tag and will be rendered in HTML without escaping. +Tag and attribute names are not limited to current HTML tags, +so custom tags (and even, say, DocBook tags) may be used. + +Here is the grammar for tags: + +A [tag name](@) consists of an ASCII letter +followed by zero or more ASCII letters, digits, or +hyphens (`-`). + +An [attribute](@) consists of spaces, tabs, and up to one line ending, +an [attribute name], and an optional +[attribute value specification]. + +An [attribute name](@) +consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII +letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML +specification restricted to ASCII. HTML5 is laxer.) + +An [attribute value specification](@) +consists of optional spaces, tabs, and up to one line ending, +a `=` character, optional spaces, tabs, and up to one line ending, +and an [attribute value]. + +An [attribute value](@) +consists of an [unquoted attribute value], +a [single-quoted attribute value], or a [double-quoted attribute value]. + +An [unquoted attribute value](@) +is a nonempty string of characters not +including spaces, tabs, line endings, `"`, `'`, `=`, `<`, `>`, or `` ` ``. + +A [single-quoted attribute value](@) +consists of `'`, zero or more +characters not including `'`, and a final `'`. + +A [double-quoted attribute value](@) +consists of `"`, zero or more +characters not including `"`, and a final `"`. + +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional spaces, tabs, and up to one line ending, +an optional `/` character, and a `>` character. + +A [closing tag](@) consists of the string ``. + +An [HTML comment](@) consists of ``, ``, or ``, and `-->` (see the +[HTML spec](https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state)). + +A [processing instruction](@) +consists of the string ``, and the string +`?>`. + +A [declaration](@) consists of the string ``, and the character `>`. + +A [CDATA section](@) consists of +the string ``, and the string `]]>`. + +An [HTML tag](@) is an [open tag], a [closing tag], +an [HTML comment], a [processing instruction], a [declaration], +or a [CDATA section]. + +Here are some simple open tags: + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +Empty elements: + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +Whitespace is allowed: + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +With attributes: + +```````````````````````````````` example + +. +

    +```````````````````````````````` + + +Custom tag names can be used: + +```````````````````````````````` example +Foo +. +

    Foo

    +```````````````````````````````` + + +Illegal tag names, not parsed as HTML: + +```````````````````````````````` example +<33> <__> +. +

    <33> <__>

    +```````````````````````````````` + + +Illegal attribute names: + +```````````````````````````````` example +
    +. +

    <a h*#ref="hi">

    +```````````````````````````````` + + +Illegal attribute values: + +```````````````````````````````` example +
    +. +

    </a href="foo">

    +```````````````````````````````` + + +Comments: + +```````````````````````````````` example +foo +. +

    foo

    +```````````````````````````````` + +```````````````````````````````` example +foo foo --> + +foo foo --> +. +

    foo foo -->

    +

    foo foo -->

    +```````````````````````````````` + + +Processing instructions: + +```````````````````````````````` example +foo +. +

    foo

    +```````````````````````````````` + + +Declarations: + +```````````````````````````````` example +foo +. +

    foo

    +```````````````````````````````` + + +CDATA sections: + +```````````````````````````````` example +foo &<]]> +. +

    foo &<]]>

    +```````````````````````````````` + + +Entity and numeric character references are preserved in HTML +attributes: + +```````````````````````````````` example +foo
    +. +

    foo

    +```````````````````````````````` + + +Backslash escapes do not work in HTML attributes: + +```````````````````````````````` example +foo +. +

    foo

    +```````````````````````````````` + + +```````````````````````````````` example + +. +

    <a href=""">

    +```````````````````````````````` + + +A block quote can prevent a line from being parsed as inline HTML, +even though line breaks are allowed in tags: + +```````````````````````````````` example +
    quoted text +. +

    <a

    +
    +

    quoted text

    +
    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/setext-headings.md b/documents/markdown/commonmark/setext-headings.md new file mode 100644 index 0000000..8729491 --- /dev/null +++ b/documents/markdown/commonmark/setext-headings.md @@ -0,0 +1,417 @@ +## Setext headings + + +A [setext heading](@) consists of one or more +lines of text, not interrupted by a blank line, of which the first line does not +have more than 3 spaces of indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. + +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces of indentation and any number of trailing spaces or tabs. + +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. + +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: + +```````````````````````````````` example +Foo *bar* +========= + +Foo *bar* +--------- +. +

    Foo bar

    +

    Foo bar

    +```````````````````````````````` + + +The content of the header may span more than one line: + +```````````````````````````````` example +Foo *bar +baz* +==== +. +

    Foo bar +baz

    +```````````````````````````````` + +The contents are the result of parsing the headings's raw +content as inlines. The heading's raw content is formed by +concatenating the lines and removing initial and final +spaces or tabs. + +```````````````````````````````` example + Foo *bar +baz*→ +==== +. +

    Foo bar +baz

    +```````````````````````````````` + + +The underlining can be any length: + +```````````````````````````````` example +Foo +------------------------- + +Foo += +. +

    Foo

    +

    Foo

    +```````````````````````````````` + + +The heading content can be preceded by up to three spaces of indentation, and +need not line up with the underlining: + +```````````````````````````````` example + Foo +--- + + Foo +----- + + Foo + === +. +

    Foo

    +

    Foo

    +

    Foo

    +```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + Foo + --- + + Foo +--- +. +
    Foo
    +---
    +
    +Foo
    +
    +
    +```````````````````````````````` + + +The setext heading underline can be preceded by up to three spaces of +indentation, and may have trailing spaces or tabs: + +```````````````````````````````` example +Foo + ---- +. +

    Foo

    +```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example +Foo + --- +. +

    Foo +---

    +```````````````````````````````` + + +The setext heading underline cannot contain internal spaces or tabs: + +```````````````````````````````` example +Foo += = + +Foo +--- - +. +

    Foo += =

    +

    Foo

    +
    +```````````````````````````````` + + +Trailing spaces or tabs in the content line do not cause a hard line break: + +```````````````````````````````` example +Foo +----- +. +

    Foo

    +```````````````````````````````` + + +Nor does a backslash at the end: + +```````````````````````````````` example +Foo\ +---- +. +

    Foo\

    +```````````````````````````````` + + +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: + +```````````````````````````````` example +`Foo +---- +` + +
    +. +

    `Foo

    +

    `

    +

    <a title="a lot

    +

    of dashes"/>

    +```````````````````````````````` + + +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: + +```````````````````````````````` example +> Foo +--- +. +
    +

    Foo

    +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> foo +bar +=== +. +
    +

    foo +bar +===

    +
    +```````````````````````````````` + + +```````````````````````````````` example +- Foo +--- +. +
      +
    • Foo
    • +
    +
    +```````````````````````````````` + + +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: + +```````````````````````````````` example +Foo +Bar +--- +. +

    Foo +Bar

    +```````````````````````````````` + + +But in general a blank line is not required before or after +setext headings: + +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
    +

    Foo

    +

    Bar

    +

    Baz

    +```````````````````````````````` + + +Setext headings cannot be empty: + +```````````````````````````````` example + +==== +. +

    ====

    +```````````````````````````````` + + +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: + +```````````````````````````````` example +--- +--- +. +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +- foo +----- +. +
      +
    • foo
    • +
    +
    +```````````````````````````````` + + +```````````````````````````````` example + foo +--- +. +
    foo
    +
    +
    +```````````````````````````````` + + +```````````````````````````````` example +> foo +----- +. +
    +

    foo

    +
    +
    +```````````````````````````````` + + +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: + +```````````````````````````````` example +\> foo +------ +. +

    > foo

    +```````````````````````````````` + + +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret + +``` markdown +Foo +bar +--- +baz +``` + +One can find four different interpretations: + +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" + +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: + +```````````````````````````````` example +Foo + +bar +--- +baz +. +

    Foo

    +

    bar

    +

    baz

    +```````````````````````````````` + + +Authors who want interpretation 2 can put blank lines around +the thematic break, + +```````````````````````````````` example +Foo +bar + +--- + +baz +. +

    Foo +bar

    +
    +

    baz

    +```````````````````````````````` + + +or use a thematic break that cannot count as a [setext heading +underline], such as + +```````````````````````````````` example +Foo +bar +* * * +baz +. +

    Foo +bar

    +
    +

    baz

    +```````````````````````````````` + + +Authors who want interpretation 3 can use backslash escapes: + +```````````````````````````````` example +Foo +bar +\--- +baz +. +

    Foo +bar +--- +baz

    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/soft-line-breaks.md b/documents/markdown/commonmark/soft-line-breaks.md new file mode 100644 index 0000000..dbb6f00 --- /dev/null +++ b/documents/markdown/commonmark/soft-line-breaks.md @@ -0,0 +1,36 @@ +## Soft line breaks + + +A regular line ending (not in a code span or HTML tag) that is not +preceded by two or more spaces or a backslash is parsed as a +[softbreak](@). (A soft line break may be rendered in HTML either as a +[line ending] or as a space. The result will be the same in +browsers. In the examples here, a [line ending] will be used.) + +```````````````````````````````` example +foo +baz +. +

    foo +baz

    +```````````````````````````````` + + +Spaces at the end of the line and beginning of the next line are +removed: + +```````````````````````````````` example +foo + baz +. +

    foo +baz

    +```````````````````````````````` + + +A conforming parser may render a soft line break in HTML either as a +line ending or as a space. + +A renderer may also provide an option to render soft line breaks +as hard line breaks. + diff --git a/documents/markdown/commonmark/tabs.md b/documents/markdown/commonmark/tabs.md new file mode 100644 index 0000000..7f38a87 --- /dev/null +++ b/documents/markdown/commonmark/tabs.md @@ -0,0 +1,137 @@ +## Tabs + + +Tabs in lines are not expanded to [spaces]. However, +in contexts where spaces help to define block structure, +tabs behave as if they were replaced by spaces with a tab stop +of 4 characters. + +Thus, for example, a tab can be used instead of four spaces +in an indented code block. (Note, however, that internal +tabs are passed through as literal tabs, not expanded to +spaces.) + +```````````````````````````````` example +→foo→baz→→bim +. +
    foo→baz→→bim
    +
    +```````````````````````````````` + +```````````````````````````````` example + →foo→baz→→bim +. +
    foo→baz→→bim
    +
    +```````````````````````````````` + +```````````````````````````````` example + a→a + ὐ→a +. +
    a→a
    +ὐ→a
    +
    +```````````````````````````````` + +In the following example, a continuation paragraph of a list +item is indented with a tab; this has exactly the same effect +as indentation with four spaces would: + +```````````````````````````````` example + - foo + +→bar +. +
      +
    • +

      foo

      +

      bar

      +
    • +
    +```````````````````````````````` + +```````````````````````````````` example +- foo + +→→bar +. +
      +
    • +

      foo

      +
        bar
      +
      +
    • +
    +```````````````````````````````` + +Normally the `>` that begins a block quote may be followed +optionally by a space, which is not considered part of the +content. In the following case `>` is followed by a tab, +which is treated as if it were expanded into three spaces. +Since one of these spaces is considered part of the +delimiter, `foo` is considered to be indented six spaces +inside the block quote context, so we get an indented +code block starting with two spaces. + +```````````````````````````````` example +>→→foo +. +
    +
      foo
    +
    +
    +```````````````````````````````` + +```````````````````````````````` example +-→→foo +. +
      +
    • +
        foo
      +
      +
    • +
    +```````````````````````````````` + + +```````````````````````````````` example + foo +→bar +. +
    foo
    +bar
    +
    +```````````````````````````````` + +```````````````````````````````` example + - foo + - bar +→ - baz +. +
      +
    • foo +
        +
      • bar +
          +
        • baz
        • +
        +
      • +
      +
    • +
    +```````````````````````````````` + +```````````````````````````````` example +#→Foo +. +

    Foo

    +```````````````````````````````` + +```````````````````````````````` example +*→*→*→ +. +
    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/textual-content.md b/documents/markdown/commonmark/textual-content.md new file mode 100644 index 0000000..b446912 --- /dev/null +++ b/documents/markdown/commonmark/textual-content.md @@ -0,0 +1,36 @@ +## Textual content + + +Any characters not given an interpretation by the above rules will +be parsed as plain textual content. + +```````````````````````````````` example +hello $.;'there +. +

    hello $.;'there

    +```````````````````````````````` + + +```````````````````````````````` example +Foo χρῆν +. +

    Foo χρῆν

    +```````````````````````````````` + + +Internal spaces are preserved verbatim: + +```````````````````````````````` example +Multiple spaces +. +

    Multiple spaces

    +```````````````````````````````` + + + + +# Appendix: A parsing strategy + +In this appendix we describe some features of the parsing strategy +used in the CommonMark reference implementations. + diff --git a/documents/markdown/commonmark/thematic-breaks.md b/documents/markdown/commonmark/thematic-breaks.md new file mode 100644 index 0000000..dcc5cc7 --- /dev/null +++ b/documents/markdown/commonmark/thematic-breaks.md @@ -0,0 +1,225 @@ +## Thematic breaks + + +A line consisting of optionally up to three spaces of indentation, followed by a +sequence of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces or tabs, forms a +[thematic break](@). + +```````````````````````````````` example +*** +--- +___ +. +
    +
    +
    +```````````````````````````````` + + +Wrong characters: + +```````````````````````````````` example ++++ +. +

    +++

    +```````````````````````````````` + + +```````````````````````````````` example +=== +. +

    ===

    +```````````````````````````````` + + +Not enough characters: + +```````````````````````````````` example +-- +** +__ +. +

    -- +** +__

    +```````````````````````````````` + + +Up to three spaces of indentation are allowed: + +```````````````````````````````` example + *** + *** + *** +. +
    +
    +
    +```````````````````````````````` + + +Four spaces of indentation is too many: + +```````````````````````````````` example + *** +. +
    ***
    +
    +```````````````````````````````` + + +```````````````````````````````` example +Foo + *** +. +

    Foo +***

    +```````````````````````````````` + + +More than three characters may be used: + +```````````````````````````````` example +_____________________________________ +. +
    +```````````````````````````````` + + +Spaces and tabs are allowed between the characters: + +```````````````````````````````` example + - - - +. +
    +```````````````````````````````` + + +```````````````````````````````` example + ** * ** * ** * ** +. +
    +```````````````````````````````` + + +```````````````````````````````` example +- - - - +. +
    +```````````````````````````````` + + +Spaces and tabs are allowed at the end: + +```````````````````````````````` example +- - - - +. +
    +```````````````````````````````` + + +However, no other characters may occur in the line: + +```````````````````````````````` example +_ _ _ _ a + +a------ + +---a--- +. +

    _ _ _ _ a

    +

    a------

    +

    ---a---

    +```````````````````````````````` + + +It is required that all of the characters other than spaces or tabs be the same. +So, this is not a thematic break: + +```````````````````````````````` example + *-* +. +

    -

    +```````````````````````````````` + + +Thematic breaks do not need blank lines before or after: + +```````````````````````````````` example +- foo +*** +- bar +. +
      +
    • foo
    • +
    +
    +
      +
    • bar
    • +
    +```````````````````````````````` + + +Thematic breaks can interrupt a paragraph: + +```````````````````````````````` example +Foo +*** +bar +. +

    Foo

    +
    +

    bar

    +```````````````````````````````` + + +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: + +```````````````````````````````` example +Foo +--- +bar +. +

    Foo

    +

    bar

    +```````````````````````````````` + + +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: + +```````````````````````````````` example +* Foo +* * * +* Bar +. +
      +
    • Foo
    • +
    +
    +
      +
    • Bar
    • +
    +```````````````````````````````` + + +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. +
      +
    • Foo
    • +
    • +
      +
    • +
    +```````````````````````````````` + + diff --git a/documents/markdown/commonmark/what-is-markdown.md b/documents/markdown/commonmark/what-is-markdown.md new file mode 100644 index 0000000..31b548d --- /dev/null +++ b/documents/markdown/commonmark/what-is-markdown.md @@ -0,0 +1,93 @@ +## What is Markdown? + + +Markdown is a plain text format for writing structured documents, +based on conventions for indicating formatting in email +and usenet posts. It was developed by John Gruber (with +help from Aaron Swartz) and released in 2004 in the form of a +[syntax description](https://daringfireball.net/projects/markdown/syntax) +and a Perl script (`Markdown.pl`) for converting Markdown to +HTML. In the next decade, dozens of implementations were +developed in many languages. Some extended the original +Markdown syntax with conventions for footnotes, tables, and +other document elements. Some allowed Markdown documents to be +rendered in formats other than HTML. Websites like Reddit, +StackOverflow, and GitHub had millions of people using Markdown. +And Markdown started to be used beyond the web, to author books, +articles, slide shows, letters, and lecture notes. + +What distinguishes Markdown from many other lightweight markup +syntaxes, which are often easier to write, is its readability. +As Gruber writes: + +> The overriding design goal for Markdown's formatting syntax is +> to make it as readable as possible. The idea is that a +> Markdown-formatted document should be publishable as-is, as +> plain text, without looking like it's been marked up with tags +> or formatting instructions. +> () + +The point can be illustrated by comparing a sample of +[AsciiDoc](https://asciidoc.org/) with +an equivalent sample of Markdown. Here is a sample of +AsciiDoc from the AsciiDoc manual: + +``` +1. List item one. ++ +List item one continued with a second paragraph followed by an +Indented block. ++ +................. +$ ls *.sh +$ mv *.sh ~/tmp +................. ++ +List item continued with a third paragraph. + +2. List item two continued with an open block. ++ +-- +This paragraph is part of the preceding list item. + +a. This list is nested and does not require explicit item +continuation. ++ +This paragraph is part of the preceding list item. + +b. List item b. + +This paragraph belongs to item two of the outer list. +-- +``` + +And here is the equivalent in Markdown: +``` +1. List item one. + + List item one continued with a second paragraph followed by an + Indented block. + + $ ls *.sh + $ mv *.sh ~/tmp + + List item continued with a third paragraph. + +2. List item two continued with an open block. + + This paragraph is part of the preceding list item. + + 1. This list is nested and does not require explicit item continuation. + + This paragraph is part of the preceding list item. + + 2. List item b. + + This paragraph belongs to item two of the outer list. +``` + +The AsciiDoc version is, arguably, easier to write. You don't need +to worry about indentation. But the Markdown version is much easier +to read. The nesting of list items is apparent to the eye in the +source, not just in the processed document. + diff --git a/documents/markdown/commonmark/why-is-a-spec-needed.md b/documents/markdown/commonmark/why-is-a-spec-needed.md new file mode 100644 index 0000000..0a0559c --- /dev/null +++ b/documents/markdown/commonmark/why-is-a-spec-needed.md @@ -0,0 +1,154 @@ +## Why is a spec needed? + + +John Gruber's [canonical description of Markdown's +syntax](https://daringfireball.net/projects/markdown/syntax) +does not specify the syntax unambiguously. Here are some examples of +questions it does not answer: + +1. How much indentation is needed for a sublist? The spec says that + continuation paragraphs need to be indented four spaces, but is + not fully explicit about sublists. It is natural to think that + they, too, must be indented four spaces, but `Markdown.pl` does + not require that. This is hardly a "corner case," and divergences + between implementations on this issue often lead to surprises for + users in real documents. (See [this comment by John + Gruber](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/1997).) + +2. Is a blank line needed before a block quote or heading? + Most implementations do not require the blank line. However, + this can lead to unexpected results in hard-wrapped text, and + also to ambiguities in parsing (note that some implementations + put the heading inside the blockquote, while others do not). + (John Gruber has also spoken [in favor of requiring the blank + lines](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2146).) + +3. Is a blank line needed before an indented code block? + (`Markdown.pl` requires it, but this is not mentioned in the + documentation, and some implementations do not require it.) + + ``` markdown + paragraph + code? + ``` + +4. What is the exact rule for determining when list items get + wrapped in `

    ` tags? Can a list be partially "loose" and partially + "tight"? What should we do with a list like this? + + ``` markdown + 1. one + + 2. two + 3. three + ``` + + Or this? + + ``` markdown + 1. one + - a + + - b + 2. two + ``` + + (There are some relevant comments by John Gruber + [here](https://web.archive.org/web/20170611172104/http://article.gmane.org/gmane.text.markdown.general/2554).) + +5. Can list markers be indented? Can ordered list markers be right-aligned? + + ``` markdown + 8. item 1 + 9. item 2 + 10. item 2a + ``` + +6. Is this one list with a thematic break in its second item, + or two lists separated by a thematic break? + + ``` markdown + * a + * * * * * + * b + ``` + +7. When list markers change from numbers to bullets, do we have + two lists or one? (The Markdown syntax description suggests two, + but the perl scripts and many other implementations produce one.) + + ``` markdown + 1. fee + 2. fie + - foe + - fum + ``` + +8. What are the precedence rules for the markers of inline structure? + For example, is the following a valid link, or does the code span + take precedence ? + + ``` markdown + [a backtick (`)](/url) and [another backtick (`)](/url). + ``` + +9. What are the precedence rules for markers of emphasis and strong + emphasis? For example, how should the following be parsed? + + ``` markdown + *foo *bar* baz* + ``` + +10. What are the precedence rules between block-level and inline-level + structure? For example, how should the following be parsed? + + ``` markdown + - `a long code span can contain a hyphen like this + - and it can screw things up` + ``` + +11. Can list items include section headings? (`Markdown.pl` does not + allow this, but does allow blockquotes to include headings.) + + ``` markdown + - # Heading + ``` + +12. Can list items be empty? + + ``` markdown + * a + * + * b + ``` + +13. Can link references be defined inside block quotes or list items? + + ``` markdown + > Blockquote [foo]. + > + > [foo]: /url + ``` + +14. If there are multiple definitions for the same reference, which takes + precedence? + + ``` markdown + [foo]: /url1 + [foo]: /url2 + + [foo][] + ``` + +In the absence of a spec, early implementers consulted `Markdown.pl` +to resolve these ambiguities. But `Markdown.pl` was quite buggy, and +gave manifestly bad results in many cases, so it was not a +satisfactory replacement for a spec. + +Because there is no unambiguous spec, implementations have diverged +considerably. As a result, users are often surprised to find that +a document that renders one way on one system (say, a GitHub wiki) +renders differently on another (say, converting to docbook using +pandoc). To make matters worse, because nothing in Markdown counts +as a "syntax error," the divergence often isn't discovered right away. + diff --git a/documents/markdown/docusaurus/configuration.md b/documents/markdown/docusaurus/configuration.md new file mode 100644 index 0000000..d84d07a --- /dev/null +++ b/documents/markdown/docusaurus/configuration.md @@ -0,0 +1,294 @@ +--- +description: Configuring your site's behavior through docusaurus.config.js and more. +--- + +# Configuration + +import TOCInline from '@theme/TOCInline'; + +:::info + +Check the [**`docusaurus.config.js` API reference**](api/docusaurus.config.js.mdx) for an exhaustive list of options. + +::: + +Docusaurus has a unique take on configurations. We encourage you to congregate information about your site into one place. We guard the fields of this file and facilitate making this data object accessible across your site. + +Keeping a well-maintained `docusaurus.config.js` helps you, your collaborators, and your open source contributors to be able to focus on documentation while still being able to customize the site. + +## Syntax to declare `docusaurus.config.js` {/* #syntax-to-declare-docusaurus-config */} + +The `docusaurus.config.js` file is run in Node.js and should export either: + +- a **config object** +- a **function** that creates the config object + +:::info + +The `docusaurus.config.js` file supports: + +- [**ES Modules**](https://flaviocopes.com/es-modules/) +- [**CommonJS**](https://flaviocopes.com/commonjs/) +- [**TypeScript**](./typescript-support.mdx#typing-config) + +Constraints: + +- **Required:** use `export default /* your config*/` (or `module.exports`) to export your Docusaurus config +- **Optional:** use `import Lib from 'lib'` (or `require('lib')`) to import Node.js packages + +::: + +Docusaurus gives us the ability to declare its configuration in various **equivalent ways**, and all the following config examples lead to the exact same result: + +```js title="docusaurus.config.js" +export default { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... +}; +``` + +```js title="docusaurus.config.js" +module.exports = { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... +}; +``` + +```ts title="docusaurus.config.ts" +import type {Config} from '@docusaurus/types'; + +export default { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... +} satisfies Config; +``` + +```js title="docusaurus.config.js" +const config = { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... +}; + +export default config; +``` + +```js title="docusaurus.config.js" +export default function configCreator() { + return { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... + }; +} +``` + +```js title="docusaurus.config.js" +export default async function createConfigAsync() { + return { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // your site config ... + }; +} +``` + +:::tip Using ESM-only packages + +Using an async config creator can be useful to import ESM-only modules (notably most Remark plugins). It is possible to import such modules thanks to dynamic imports: + +```js title="docusaurus.config.js" +export default async function createConfigAsync() { + // Use a dynamic import instead of require('esm-lib') + // highlight-next-line + const lib = await import('lib'); + + return { + title: 'Docusaurus', + url: 'https://docusaurus.io', + // rest of your site config... + }; +} +``` + +::: + +## What goes into a `docusaurus.config.js`? {/* #what-goes-into-a-docusaurusconfigjs */} + +You should not have to write your `docusaurus.config.js` from scratch even if you are developing your site. All templates come with a `docusaurus.config.js` that includes defaults for the common options. + +However, it can be helpful if you have a high-level understanding of how the configurations are designed and implemented. + +The high-level overview of Docusaurus configuration can be categorized into: + + + +### Site metadata {/* #site-metadata */} + +Site metadata contains the essential global metadata such as `title`, `url`, `baseUrl`, and `favicon`. + +They are used in several places such as your site's title and headings, browser tab icon, social sharing (Facebook, X) information or even to generate the correct path to serve your static files. + +### Deployment configurations {/* #deployment-configurations */} + +Deployment configurations such as `projectName`, `organizationName`, and optionally `deploymentBranch` are used when you deploy your site with the `deploy` command. + +It is recommended to check the [deployment docs](deployment.mdx) for more information. + +### Theme, plugin, and preset configurations {/* #theme-plugin-and-preset-configurations */} + +List the [themes](./using-plugins.mdx#using-themes), [plugins](./using-plugins.mdx), and [presets](./using-plugins.mdx#using-presets) for your site in the `themes`, `plugins`, and `presets` fields, respectively. These are typically npm packages: + +```js title="docusaurus.config.js" +export default { + // ... + plugins: [ + '@docusaurus/plugin-content-blog', + '@docusaurus/plugin-content-pages', + ], + themes: ['@docusaurus/theme-classic'], +}; +``` + +:::tip + +Docusaurus supports [**module shorthands**](./using-plugins.mdx#module-shorthands), allowing you to simplify the above configuration as: + +```js title="docusaurus.config.js" +export default { + // ... + plugins: ['content-blog', 'content-pages'], + themes: ['classic'], +}; +``` + +::: + +They can also be loaded from local directories: + +```js title="docusaurus.config.js" +import path from 'path'; + +export default { + // ... + themes: [path.resolve(__dirname, '/path/to/docusaurus-local-theme')], +}; +``` + +To specify options for a plugin or theme, replace the name of the plugin or theme in the config file with an array containing the name and an options object: + +```js title="docusaurus.config.js" +export default { + // ... + plugins: [ + [ + 'content-blog', + { + path: 'blog', + routeBasePath: 'blog', + include: ['*.md', '*.mdx'], + // ... + }, + ], + 'content-pages', + ], +}; +``` + +To specify options for a plugin or theme that is bundled in a preset, pass the options through the `presets` field. In this example, `docs` refers to `@docusaurus/plugin-content-docs` and `theme` refers to `@docusaurus/theme-classic`. + +```js title="docusaurus.config.js" +export default { + // ... + presets: [ + [ + '@docusaurus/preset-classic', + { + docs: { + sidebarPath: './sidebars.js', + }, + theme: { + customCss: ['./src/css/custom.css'], + }, + }, + ], + ], +}; +``` + +:::tip + +The `presets: [['classic', {...}]]` shorthand works as well. + +::: + +For further help configuring themes, plugins, and presets, see [Using Plugins](./using-plugins.mdx). + +### Custom configurations {/* #custom-configurations */} + +Docusaurus guards `docusaurus.config.js` from unknown fields. To add custom fields, define them in `customFields`. + +Example: + +```js title="docusaurus.config.js" +export default { + // ... + // highlight-start + customFields: { + image: '', + keywords: [], + }, + // highlight-end + // ... +}; +``` + +## Accessing configuration from components {/* #accessing-configuration-from-components */} + +Your configuration object will be made available to all the components of your site. And you may access them via React context as `siteConfig`. + +Basic example: + +```jsx +import React from 'react'; +// highlight-next-line +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; + +const Hello = () => { + // highlight-start + const {siteConfig} = useDocusaurusContext(); + // highlight-end + const {title, tagline} = siteConfig; + + return

    {`${title} · ${tagline}`}
    ; +}; +``` + +:::tip + +If you just want to use those fields on the client side, you could create your own JS files and import them as ES6 modules, there is no need to put them in `docusaurus.config.js`. + +::: + +## Customizing Babel Configuration {/* #customizing-babel-configuration */} + +Docusaurus transpiles your site's source code using Babel by default. If you want to customize the Babel configuration, you can do so by creating a `babel.config.js` file in your project root. + +To use the built-in preset as a base configuration, install the following package and use it + +```bash npm2yarn +npm install --save @docusaurus/babel +``` + +Then use the preset in your `babel.config.js` file: + +```js title="babel.config.js" +export default { + presets: ['@docusaurus/babel/preset'], +}; +``` + +Most of the time, the default preset configuration will work just fine. If you want to customize your Babel configuration (e.g. to add support for Flow), you can directly edit this file. For your changes to take effect, you need to restart the Docusaurus dev server. diff --git a/documents/markdown/docusaurus/creating-pages.md b/documents/markdown/docusaurus/creating-pages.md new file mode 100644 index 0000000..55a9e73 --- /dev/null +++ b/documents/markdown/docusaurus/creating-pages.md @@ -0,0 +1,140 @@ +--- +slug: /creating-pages +sidebar_label: Pages +--- + +# Creating Pages + +In this section, we will learn about creating pages in Docusaurus. + +The `@docusaurus/plugin-content-pages` plugin empowers you to create **one-off standalone pages** like a showcase page, playground page, or support page. You can use React components, or Markdown. + +:::note + +Pages do not have sidebars, only [docs](./docs/docs-introduction.mdx) do. + +::: + +:::info + +Check the [Pages Plugin API Reference documentation](./../api/plugins/plugin-content-pages.mdx) for an exhaustive list of options. + +::: + +## Add a React page {/* #add-a-react-page */} + +React is used as the UI library to create pages. Every page component should export a React component, and you can leverage the expressiveness of React to build rich and interactive content. + +Create a file `/src/pages/helloReact.js`: + +```jsx title="/src/pages/helloReact.js" +import React from 'react'; +import Layout from '@theme/Layout'; + +export default function Hello() { + return ( + +
    +

    + Edit pages/helloReact.js and save to reload. +

    +
    +
    + ); +} +``` + +Once you save the file, the development server will automatically reload the changes. Now open [`http://localhost:3000/helloReact`](http://localhost:3000/helloReact) and you will see the new page you just created. + +Each page doesn't come with any styling. You will need to import the `Layout` component from `@theme/Layout` and wrap your contents within that component if you want the navbar and/or footer to appear. + +:::tip + +You can also create TypeScript pages with the `.tsx` extension (`helloReact.tsx`). + +::: + +## Add a Markdown page {/* #add-a-markdown-page */} + +Create a file `/src/pages/helloMarkdown.md`: + +```md title="/src/pages/helloMarkdown.md" +--- +title: my hello page title +description: my hello page description +hide_table_of_contents: true +--- + +# Hello + +How are you? +``` + +In the same way, a page will be created at [`http://localhost:3000/helloMarkdown`](http://localhost:3000/helloMarkdown). + +Markdown pages are less flexible than React pages because it always uses the theme layout. + +Here's an [example Markdown page](/examples/markdownPageExample). + +:::tip + +You can use the full power of React in Markdown pages too, refer to the [MDX](https://mdxjs.com/) documentation. + +::: + +## Routing {/* #routing */} + +If you are familiar with other static site generators like Jekyll and Next, this routing approach will feel familiar to you. Any JavaScript file you create under `/src/pages/` directory will be automatically converted to a website page, following the `/src/pages/` directory hierarchy. For example: + +- `/src/pages/index.js` → `[baseUrl]` +- `/src/pages/foo.js` → `[baseUrl]/foo` +- `/src/pages/foo/test.js` → `[baseUrl]/foo/test` +- `/src/pages/foo/index.js` → `[baseUrl]/foo/` + +In this component-based development era, it is encouraged to co-locate your styling, markup, and behavior together into components. Each page is a component, and if you need to customize your page design with your own styles, we recommend co-locating your styles with the page component in its own directory. For example, to create a "Support" page, you could do one of the following: + +- Add a `/src/pages/support.js` file +- Create a `/src/pages/support/` directory and a `/src/pages/support/index.js` file. + +The latter is preferred as it has the benefits of letting you put files related to the page within that directory. For example, a CSS module file (`styles.module.css`) with styles meant to only be used on the "Support" page. + +:::note + +This is merely a recommended directory structure, and you will still need to manually import the CSS module file within your component module (`support/index.js`). + +::: + +By default, any Markdown or JavaScript file starting with `_` will be ignored and no routes will be created for that file (see the `exclude` option). + +```bash +my-website +├── src +│ └── pages +│ ├── styles.module.css +│ ├── index.js +│ ├── _ignored.js +│ ├── _ignored-folder +│ │ ├── Component1.js +│ │ └── Component2.js +│ └── support +│ ├── index.js +│ └── styles.module.css +. +``` + +:::warning + +All JavaScript/TypeScript files within the `src/pages/` directory will have corresponding website paths generated for them. If you want to create reusable components into that directory, use the `exclude` option (by default, files prefixed with `_`, test files(`.test.js`), and files in `__tests__` directory are not turned into pages). + +::: + +### Duplicate Routes {/* #duplicate-routes */} + +You may accidentally create multiple pages that are meant to be accessed on the same route. When this happens, Docusaurus will warn you about duplicate routes when you run `yarn start` or `yarn build` (behavior configurable through the [`onDuplicateRoutes`](../api/docusaurus.config.js.mdx#onDuplicateRoutes) config), but the site will still be built successfully. The page that was created last will be accessible, but it will override other conflicting pages. To resolve this issue, you should modify or remove any conflicting routes. diff --git a/documents/markdown/docusaurus/deployment.md b/documents/markdown/docusaurus/deployment.md new file mode 100644 index 0000000..290a62e --- /dev/null +++ b/documents/markdown/docusaurus/deployment.md @@ -0,0 +1,942 @@ +--- +description: Deploy your Docusaurus app for production on a range of static site hosting services. +--- + +# Deployment + +To build the static files of your website for production, run: + +```bash npm2yarn +npm run build +``` + +Once it finishes, the static files will be generated within the `build` directory. + +:::note + +The only responsibility of Docusaurus is to build your site and emit static files in `build`. + +It is now up to you to choose how to host those static files. + +::: + +You can deploy your site to static site hosting services such as [Vercel](https://vercel.com/), [GitHub Pages](https://pages.github.com/), [Netlify](https://www.netlify.com/), [Render](https://render.com/docs/static-sites), and [Surge](https://surge.sh/help/getting-started-with-surge). + +A Docusaurus site is statically rendered, and it can generally work without JavaScript! + +## Configuration {/* #configuration */} + +The following parameters are required in `docusaurus.config.js` to optimize routing and serve files from the correct location: + +| Name | Description | +| --- | --- | +| `url` | URL for your site. For a site deployed at `https://my-org.com/my-project/`, `url` is `https://my-org.com/`. | +| `baseUrl` | Base URL for your project, with a trailing slash. For a site deployed at `https://my-org.com/my-project/`, `baseUrl` is `/my-project/`. | + +## Testing your Build Locally {/* #testing-build-locally */} + +It is important to test your build locally before deploying it for production. Docusaurus provides a [`docusaurus serve`](cli.mdx#docusaurus-serve-sitedir) command for that: + +```bash npm2yarn +npm run serve +``` + +By default, this will load your site at [`http://localhost:3000/`](http://localhost:3000/). + +## Trailing slash configuration {/* #trailing-slashes */} + +Docusaurus has a [`trailingSlash` config](./api/docusaurus.config.js.mdx#trailingSlash) to allow customizing URLs/links and emitted filename patterns. + +The default value generally works fine. Unfortunately, each static hosting provider has a **different behavior**, and deploying the exact same site to various hosts can lead to distinct results. Depending on your host, it can be useful to change this config. + +:::tip + +Use [slorber/trailing-slash-guide](https://github.com/slorber/trailing-slash-guide) to understand better the behavior of your host and configure `trailingSlash` appropriately. + +::: + +## Using environment variables {/* #using-environment-variables */} + +Putting potentially sensitive information in the environment is common practice. However, in a typical Docusaurus website, the `docusaurus.config.js` file is the only interface to the Node.js environment (see [our architecture overview](advanced/architecture.mdx)), while everything else (MDX pages, React components, etc.) are client side and do not have direct access to the `process` global variable. In this case, you can consider using [`customFields`](api/docusaurus.config.js.mdx#customFields) to pass environment variables to the client side. + +```js title="docusaurus.config.js" +// If you are using dotenv (https://www.npmjs.com/package/dotenv) +import 'dotenv/config'; + +export default { + title: '...', + url: process.env.URL, // You can use environment variables to control site specifics as well + // highlight-start + customFields: { + // Put your custom environment here + teamEmail: process.env.EMAIL, + }, + // highlight-end +}; +``` + +```jsx title="home.jsx" +import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; + +export default function Home() { + const { + siteConfig: {customFields}, + } = useDocusaurusContext(); + return
    Contact us through {customFields.teamEmail}!
    ; +} +``` + +## Choosing a hosting provider {/* #choosing-a-hosting-provider */} + +There are a few common hosting options: + +- [Self hosting](#self-hosting) with an HTTP server like Apache2 or Nginx. +- Jamstack providers (e.g. [Netlify](#deploying-to-netlify) and [Vercel](#deploying-to-vercel)). We will use them as references, but the same reasoning can apply to other providers. +- [GitHub Pages](#deploying-to-github-pages) (by definition, it is also Jamstack, but we compare it separately). + +If you are unsure of which one to choose, ask the following questions: + +
    + + + How many resources (money, person-hours, etc.) am I willing to invest in this? + + +- 🔴 Self-hosting requires experience in networking as well as Linux and web server administration. It's the most difficult option, and would require the most time to manage successfully. Expense-wise, cloud services are almost never free, and purchasing/deploying an onsite server can be even more costly. +- 🟢 Jamstack providers can help you set up a working website in almost no time and offer features like server-side redirects that are easily configurable. Many providers offer generous build-time quotas even for free plans that you would almost never exceed. However, free plans have limits, and you would need to pay once you hit those limits. Check the pricing page of your provider for details. +- 🟡 The GitHub Pages deployment workflow can be tedious to set up. (Evidence: see the length of [Deploying to GitHub Pages](#deploying-to-github-pages)!) However, this service (including build and deployment) is always free for public repositories, and we have detailed instructions to help you make it work. + +
    + +
    + +How much server-side customization do I need? + +- 🟢 With self-hosting, you have access to the entire server's configuration. You can configure the virtual host to serve different content based on the request URL, you can do complicated server-side redirects, you can implement authentication, and so on. If you need a lot of server-side features, self-host your website. +- 🟡 Jamstack usually offers some server-side configuration (e.g. URL formatting (trailing slashes), server-side redirects, etc.). +- 🔴 GitHub Pages doesn't expose server-side configuration besides enforcing HTTPS and setting CNAME records. + +
    + +
    + +Do I need collaboration-friendly deployment workflows? + +- 🟡 Self-hosted services can leverage continuous deployment functionality like Netlify, but more heavy-lifting is involved. Usually, you would designate a specific person to manage the deployment, and the workflow wouldn't be very git-based as opposed to the other two options. +- 🟢 Netlify and Vercel have deploy previews for every pull request, which is useful for a team to review work before merging to production. You can also manage a team with different member access to the deployment. +- 🟡 GitHub Pages cannot do deploy previews in a non-convoluted way. One repo can only be associated with one site deployment. On the other hand, you can control who has write access to the site's deployment. + +
    + +There isn't a silver bullet. You need to weigh your needs and resources before making a choice. + +## Self-Hosting {/* #self-hosting */} + +Docusaurus can be self-hosted using [`docusaurus serve`](cli.mdx#docusaurus-serve-sitedir). Change port using `--port` and `--host` to change host. + +```bash npm2yarn +npm run serve -- --build --port 80 --host 0.0.0.0 +``` + +:::warning + +It is not the best option, compared to a static hosting provider / CDN. + +::: + +:::warning + +In the following sections, we will introduce a few common hosting providers and how they should be configured to deploy Docusaurus sites most efficiently. Docusaurus is not affiliated with any of these services, and this information is provided for convenience only. Some of the write-ups are provided by third-parties, and recent API changes may not be reflected on our side. If you see outdated content, PRs are welcome. + +Because we can only provide this content on a best-effort basis only, we have stopped accepting PRs adding new hosting options. You can, however, publish your writeup on a separate site (e.g. your blog, or the provider's official website), and ask us to include a link to your writeup. + +::: + +## Deploying to Netlify {/* #deploying-to-netlify */} + +To deploy your Docusaurus sites to [Netlify](https://www.netlify.com/), first make sure the following options are properly configured: + +```js title="docusaurus.config.js" +export default { + // highlight-start + url: 'https://docusaurus-2.netlify.app', // Url to your site with no trailing slash + baseUrl: '/', // Base directory of your site relative to your repo + // highlight-end + // ... +}; +``` + +Then, [create your site with Netlify](https://app.netlify.com/start). + +While you set up the site, specify the build commands and directories as follows: + +- build command: `npm run build` +- publish directory: `build` + +If you did not configure these build options, you may still go to "Site settings" -> "Build & deploy" after your site is created. + +Once properly configured with the above options, your site should deploy and automatically redeploy upon merging to your deploy branch, which defaults to `main`. + +:::warning + +Some Docusaurus sites put the `docs` folder outside of `website` (most likely former Docusaurus v1 sites): + +```bash +repo # git root +├── docs # MD files +└── website # Docusaurus root +``` + +If you decide to use the `website` folder as Netlify's base directory, Netlify will not trigger builds when you update the `docs` folder, and you need to configure a [custom `ignore` command](https://docs.netlify.com/configure-builds/common-configurations/ignore-builds/): + +```toml title="website/netlify.toml" +[build] + ignore = "git diff --quiet $CACHED_COMMIT_REF $COMMIT_REF . ../docs/" +``` + +::: + +:::warning + +By default, Netlify adds trailing slashes to Docusaurus URLs. + +It is recommended to disable the Netlify setting `Post Processing > Asset Optimization > Pretty Urls` to prevent lowercase URLs, unnecessary redirects, and 404 errors. + +**Be very careful**: the `Disable asset optimization` global checkbox is broken and does not really disable the `Pretty URLs` setting in practice. Please make sure to **uncheck it independently**. + +If you want to keep the `Pretty Urls` Netlify setting on, adjust the `trailingSlash` Docusaurus config appropriately. + +Refer to [slorber/trailing-slash-guide](https://github.com/slorber/trailing-slash-guide) for more information. + +::: + +## Deploying to Vercel {/* #deploying-to-vercel */} + +Deploying your Docusaurus project to [Vercel](https://vercel.com/) will provide you with [various benefits](https://vercel.com/) in the areas of performance and ease of use. + +To deploy your Docusaurus project with a [Vercel for Git Integration](https://vercel.com/docs/concepts/git), make sure it has been pushed to a Git repository. + +Import the project into Vercel using the [Import Flow](https://vercel.com/import/git). During the import, you will find all relevant options preconfigured for you; however, you can choose to change any of these [options](https://vercel.com/docs/build-step#build-&-development-settings). + +After your project has been imported, all subsequent pushes to branches will generate [Preview Deployments](https://vercel.com/docs/platform/deployments#preview), and all changes made to the [Production Branch](https://vercel.com/docs/git-integrations#production-branch) (usually "main" or "master") will result in a [Production Deployment](https://vercel.com/docs/platform/deployments#production). + +## Deploying to GitHub Pages {/* #deploying-to-github-pages */} + +Docusaurus provides an easy way to publish to [GitHub Pages](https://pages.github.com/), which comes free with every GitHub repository. + +### Overview {/* #github-pages-overview */} + +Usually, there are two repositories (at least two branches) involved in a publishing process: the branch containing the source files, and the branch containing the build output to be served with GitHub Pages. In the following tutorial, they will be referred to as **"source"** and **"deployment"**, respectively. + +Each GitHub repository is associated with a GitHub Pages service. If the deployment repository is called `my-org/my-project` (where `my-org` is the organization name or username), the deployed site will appear at `https://my-org.github.io/my-project/`. If the deployment repository is called `my-org/my-org.github.io` (the _organization GitHub Pages repo_), the site will appear at `https://my-org.github.io/`. + +:::info + +In case you want to use your custom domain for GitHub Pages, create a `CNAME` file in the `static` directory. Anything within the `static` directory will be copied to the root of the `build` directory for deployment. When using a custom domain, you should be able to move back from `baseUrl: '/projectName/'` to `baseUrl: '/'`, and also set your `url` to your custom domain. + +You may refer to GitHub Pages' documentation [User, Organization, and Project Pages](https://help.github.com/en/articles/user-organization-and-project-pages) for more details. + +::: + +GitHub Pages picks up deploy-ready files (the output from `docusaurus build`) from the default branch (`master` / `main`, usually) or the `gh-pages` branch, and either from the root or the `/docs` folder. You can configure that through `Settings > Pages` in your repository. This branch will be called the "deployment branch". + +We provide a `docusaurus deploy` command that helps you deploy your site from the source branch to the deployment branch in one command: clone, build, and commit. + +### `docusaurus.config.js` settings {/* #docusaurusconfigjs-settings */} + +First, modify your `docusaurus.config.js` and add the following params: + +| Name | Description | +| --- | --- | +| `organizationName` | The GitHub user or organization that owns the deployment repository. | +| `projectName` | The name of the deployment repository. | +| `deploymentBranch` | The name of the deployment branch. It defaults to `'gh-pages'` for non-organization GitHub Pages repos (`projectName` not ending in `.github.io`). Otherwise, it needs to be explicit as a config field or environment variable. | + +These fields also have their environment variable counterparts which have a higher priority: `ORGANIZATION_NAME`, `PROJECT_NAME`, and `DEPLOYMENT_BRANCH`. + +:::warning + +GitHub Pages adds a trailing slash to Docusaurus URLs by default. It is recommended to set a `trailingSlash` config (`true` or `false`, not `undefined`). + +::: + +Example: + +```js title="docusaurus.config.js" +export default { + // ... + url: 'https://endiliey.github.io', // Your website URL + baseUrl: '/', + // highlight-start + projectName: 'endiliey.github.io', + organizationName: 'endiliey', + trailingSlash: false, + // highlight-end + // ... +}; +``` + +:::warning + +By default, GitHub Pages runs published files through [Jekyll](https://jekyllrb.com/). Since Jekyll will discard any files that begin with `_`, it is recommended that you disable Jekyll by adding an empty file named `.nojekyll` file to your `static` directory. + +::: + +### Environment settings {/* #environment-settings */} + +| Name | Description | +| --- | --- | +| `USE_SSH` | Set to `true` to use SSH instead of the default HTTPS for the connection to the GitHub repo. If the source repo URL is an SSH URL (e.g. `git@github.com:facebook/docusaurus.git`), `USE_SSH` is inferred to be `true`. | +| `GIT_USER` | The username for a GitHub account that **has push access to the deployment repo**. For your own repositories, this will usually be your GitHub username. Required if not using SSH, and ignored otherwise. | +| `GIT_PASS` | Personal access token of the git user (specified by `GIT_USER`), to facilitate non-interactive deployment (e.g. continuous deployment) | +| `CURRENT_BRANCH` | The source branch. Usually, the branch will be `main` or `master`, but it could be any branch except for `gh-pages`. If nothing is set for this variable, then the current branch from which `docusaurus deploy` is invoked will be used. | +| `GIT_USER_NAME` | The `git config user.name` value to use when pushing to the deployment repo | +| `GIT_USER_EMAIL` | The `git config user.email` value to use when pushing to the deployment repo | + +GitHub enterprise installations should work in the same manner as github.com; you only need to set the organization's GitHub Enterprise host as an environment variable: + +| Name | Description | +| ------------- | ----------------------------------------------- | +| `GITHUB_HOST` | The domain name of your GitHub enterprise site. | +| `GITHUB_PORT` | The port of your GitHub enterprise site. | + +### Deploy {/* #deploy */} + +Finally, to deploy your site to GitHub Pages, run: + +```mdx-code-block + + +``` + +```bash +GIT_USER= yarn deploy +``` + +```mdx-code-block + + +``` + +```batch +cmd /C "set "GIT_USER=" && yarn deploy" +``` + +```mdx-code-block + + +``` + +```powershell +cmd /C 'set "GIT_USER=" && yarn deploy' +``` + +```mdx-code-block + + +``` + +:::warning + +Beginning in August 2021, GitHub requires every command-line sign-in to use the **personal access token** instead of the password. When GitHub prompts for your password, enter the PAT instead. See the [GitHub documentation](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) for more information. + +Alternatively, you can use SSH (`USE_SSH=true`) to log in. + +::: + +### Triggering deployment with GitHub Actions {/* #triggering-deployment-with-github-actions */} + +[GitHub Actions](https://help.github.com/en/actions) allow you to automate, customize, and execute your software development workflows right in your repository. + +The workflow examples below assume your website source resides in the `main` branch of your repository (the _source branch_ is `main`), and your [publishing source](https://help.github.com/en/github/working-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site) is configured for [publishing with a custom GitHub Actions Workflow](https://docs.github.com/en/pages/getting-started-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site#publishing-with-a-custom-github-actions-workflow). + +Our goal is that: + +1. When a new pull request is made to `main`, there's an action that ensures the site builds successfully, without actually deploying. This job will be called `test-deploy`. +2. When a pull request is merged to the `main` branch or someone pushes to the `main` branch directly, it will be built and deployed to GitHub Pages. This job will be called `deploy`. + +Here are two approaches to deploying your docs with GitHub Actions. Based on the location of your deployment repository, choose the relevant tab below: + +- Source repo and deployment repo are the **same** repository. +- The deployment repo is a **remote** repository, different from the source. Instructions for this scenario assume [publishing source](https://help.github.com/en/github/working-with-github-pages/configuring-a-publishing-source-for-your-github-pages-site) is the `gh-pages` branch. + +```mdx-code-block + + +``` + +While you can have both jobs defined in the same workflow file, the original `deploy` workflow will always be listed as skipped in the PR check suite status, which is not indicative of the actual status and provides no value to the review process. We therefore propose to manage them as separate workflows instead. + +
    +GitHub action files + +Add these two workflow files: + +:::warning Tweak the parameters for your setup + +If your Docusaurus project is not at the root of your repo, you may need to configure a [default working directory](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#example-set-the-default-shell-and-working-directory), and adjust the paths accordingly. + +::: + + + + +```yml title=".github/workflows/deploy.yml" +name: Deploy to GitHub Pages + +on: + push: + branches: + - main + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + build: + name: Build Docusaurus + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + + - name: Install dependencies + run: npm ci + - name: Build website + run: npm run build + + - name: Upload Build Artifact + uses: actions/upload-pages-artifact@v3 + with: + path: build + + deploy: + name: Deploy to GitHub Pages + needs: build + + # Grant GITHUB_TOKEN the permissions required to make a Pages deployment + permissions: + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source + + # Deploy to the github-pages environment + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + runs-on: ubuntu-latest + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 +``` + +```yml title=".github/workflows/test-deploy.yml" +name: Test deployment + +on: + pull_request: + branches: + - main + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + test-deploy: + name: Test deployment + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: npm + + - name: Install dependencies + run: npm ci + - name: Test build website + run: npm run build +``` + + + + +```yml title=".github/workflows/deploy.yml" +name: Deploy to GitHub Pages + +on: + push: + branches: + - main + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + build: + name: Build Docusaurus + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: yarn + + - name: Install dependencies + run: yarn install --frozen-lockfile + - name: Build website + run: yarn build + + - name: Upload Build Artifact + uses: actions/upload-pages-artifact@v3 + with: + path: build + + deploy: + name: Deploy to GitHub Pages + needs: build + + # Grant GITHUB_TOKEN the permissions required to make a Pages deployment + permissions: + pages: write # to deploy to Pages + id-token: write # to verify the deployment originates from an appropriate source + + # Deploy to the github-pages environment + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + + runs-on: ubuntu-latest + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 +``` + +```yml title=".github/workflows/test-deploy.yml" +name: Test deployment + +on: + pull_request: + branches: + - main + # Review gh actions docs if you want to further define triggers, paths, etc + # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on + +jobs: + test-deploy: + name: Test deployment + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: yarn + + - name: Install dependencies + run: yarn install --frozen-lockfile + - name: Test build website + run: yarn build +``` + + + + +
    + +```mdx-code-block +
    + +``` + +A cross-repo publish is more difficult to set up because you need to push to another repo with permission checks. We will be using SSH to do the authentication. + +1. Generate a new [SSH key](https://help.github.com/en/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent). Since this SSH key will be used in CI, make sure to not enter any passphrase. +2. By default, your public key should have been created in `~/.ssh/id_rsa.pub`; otherwise, use the name you've provided in the previous step to add your key to [GitHub deploy keys](https://developer.github.com/v3/guides/managing-deploy-keys/). +3. Copy the key to clipboard with `pbcopy < ~/.ssh/id_rsa.pub` and paste it as a [deploy key](https://developer.github.com/v3/guides/managing-deploy-keys/#deploy-keys) in the deployment repository. Copy the file content if the command line doesn't work for you. Check the box for `Allow write access` before saving your deployment key. +4. You'll need your private key as a [GitHub secret](https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets) to allow Docusaurus to run the deployment for you. +5. Copy your private key with `pbcopy < ~/.ssh/id_rsa` and paste a GitHub secret with the name `GH_PAGES_DEPLOY` on your source repository. Copy the file content if the command line doesn't work for you. Save your secret. +6. Create your [documentation workflow](https://docs.github.com/en/actions/use-cases-and-examples/creating-an-example-workflow) in the `.github/workflows/` directory. In this example it's the `deploy.yml` file. + +At this point, you should have: + +- the source repo with the GitHub workflow set with the private SSH key as the GitHub Secret, and +- your deployment repo set with the public SSH key in GitHub Deploy Keys. + +
    + +GitHub action file + +:::warning + +Please make sure that you replace `actions@github.com` with your GitHub email and `gh-actions` with your name. + +This file assumes you are using Yarn. If you use npm, change `cache: yarn`, `yarn install --frozen-lockfile`, `yarn build` to `cache: npm`, `npm ci`, `npm run build` accordingly. + +::: + +```yml title=".github/workflows/deploy.yml" +name: Deploy to GitHub Pages + +on: + pull_request: + branches: [main] + push: + branches: [main] + +permissions: + contents: write + +jobs: + test-deploy: + if: github.event_name != 'push' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: yarn + - name: Install dependencies + run: yarn install --frozen-lockfile + - name: Test build website + run: yarn build + deploy: + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: yarn + - uses: webfactory/ssh-agent@v0.5.0 + with: + ssh-private-key: ${{ secrets.GH_PAGES_DEPLOY }} + - name: Deploy to GitHub Pages + env: + USE_SSH: true + run: | + git config --global user.email "actions@github.com" + git config --global user.name "gh-actions" + yarn install --frozen-lockfile + yarn deploy +``` + +
    + +```mdx-code-block +
    +
    +``` + +
    + +Site not deployed properly? + +After pushing to main, if you don't see your site published at the desired location (for example, it says "There isn't a GitHub Pages site here", or it's showing your repo's README.md file), try the following: + +- Wait about three minutes and refresh. It may take a few minutes for GitHub pages to pick up the new files. +- Check your repo's landing page for a little green tick next to the last commit's title, indicating the CI has passed. If you see a cross, it means the build or deployment failed, and you should check the log for more debugging information. +- Click on the tick and make sure you see a "Deploy to GitHub Pages" workflow. Names like "pages build and deployment / deploy" are GitHub's default workflows, indicating your custom deployment workflow failed to be triggered at all. Make sure the YAML files are placed under the `.github/workflows` folder, and that the trigger condition is set correctly (e.g., if your default branch is "master" instead of "main", you need to change the `on.push` property). +- Under your repo's Settings > Pages, make sure the "Source" (which is the source for the _deployment_ files, not "source" as in our terminology) is set to "gh-pages" + "/ (root)", since we are using `gh-pages` as the deployment branch. + +If you are using a custom domain: + +- Verify that you have the correct DNS records set up if you're using a custom domain. See [GitHub pages documentation on configuring custom domains](https://docs.github.com/en/pages/configuring-a-custom-domain-for-your-github-pages-site/about-custom-domains-and-github-pages). Also, please be aware that it may take up to 24 hours for DNS changes to propagate through the internet. + +
    + +### Triggering deployment with Travis CI {/* #triggering-deployment-with-travis-ci */} + +Continuous integration (CI) services are typically used to perform routine tasks whenever new commits are checked in to source control. These tasks can be any combination of running unit tests and integration tests, automating builds, publishing packages to npm, and deploying changes to your website. All you need to do to automate the deployment of your website is to invoke the `yarn deploy` script whenever your website is updated. The following section covers how to do just that using [Travis CI](https://travis-ci.com/), a popular continuous integration service provider. + +1. Go to https://github.com/settings/tokens and generate a new [personal access token](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/). When creating the token, grant it the `repo` scope so that it has the permissions it needs. +2. Using your GitHub account, [add the Travis CI app](https://github.com/marketplace/travis-ci) to the repository you want to activate. +3. Open your Travis CI dashboard. The URL looks like `https://travis-ci.com/USERNAME/REPO`, and navigate to the `More options > Setting > Environment Variables` section of your repository. +4. Create a new environment variable named `GH_TOKEN` with your newly generated token as its value, then `GH_EMAIL` (your email address) and `GH_NAME` (your GitHub username). +5. Create a `.travis.yml` on the root of your repository with the following: + +```yml title=".travis.yml" +language: node_js +node_js: + - 20 +branches: + only: + - main +cache: + yarn: true +script: + - git config --global user.name "${GH_NAME}" + - git config --global user.email "${GH_EMAIL}" + - echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc + - yarn install + - GIT_USER="${GH_NAME}" yarn deploy +``` + +Now, whenever a new commit lands in `main`, Travis CI will run your suite of tests and if everything passes, your website will be deployed via the `yarn deploy` script. + +### Triggering deployment with Buddy {/* #triggering-deployment-with-buddy */} + +[Buddy](https://buddy.works/) is an easy-to-use CI/CD tool that allows you to automate the deployment of your portal to different environments, including GitHub Pages. + +Follow these steps to create a pipeline that automatically deploys a new version of your website whenever you push changes to the selected branch of your project: + +1. Go to https://github.com/settings/tokens and generate a new [personal access token](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/). When creating the token, grant it the `repo` scope so that it has the permissions it needs. +2. Sign in to your Buddy account and create a new project. +3. Choose GitHub as your git hosting provider and select the repository with the code of your website. +4. Using the left navigation panel, switch to the `Pipelines` view. +5. Create a new pipeline. Define its name, set the trigger mode to `On push`, and select the branch that triggers the pipeline execution. +6. Add a `Node.js` action. +7. Add these commands in the action's terminal: + +```bash +GIT_USER= +git config --global user.email "" +git config --global user.name "" +yarn deploy +``` + +After creating this simple pipeline, each new commit pushed to the branch you selected deploys your website to GitHub Pages using `yarn deploy`. Read [this guide](https://buddy.works/guides/react-docusaurus) to learn more about setting up a CI/CD pipeline for Docusaurus. + +### Using Azure Pipelines {/* #using-azure-pipelines */} + +1. Sign Up at [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) if you haven't already. +2. Create an organization. Within the organization, create a project and connect your repository from GitHub. +3. Go to https://github.com/settings/tokens and generate a new [personal access token](https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/) with the `repo` scope. +4. In the project page (which looks like `https://dev.azure.com/ORG_NAME/REPO_NAME/_build`), create a new pipeline with the following text. Also, click on edit and add a new environment variable named `GH_TOKEN` with your newly generated token as its value, then `GH_EMAIL` (your email address) and `GH_NAME` (your GitHub username). Make sure to mark them as secret. Alternatively, you can also add a file named `azure-pipelines.yml` at your repository root. + +```yml title="azure-pipelines.yml" +trigger: + - main + +pool: + vmImage: ubuntu-latest + +steps: + - checkout: self + persistCredentials: true + + - task: NodeTool@0 + inputs: + versionSpec: '20' + displayName: Install Node.js + + - script: | + git config --global user.name "${GH_NAME}" + git config --global user.email "${GH_EMAIL}" + git checkout -b main + echo "machine github.com login ${GH_NAME} password ${GH_TOKEN}" > ~/.netrc + yarn install + GIT_USER="${GH_NAME}" yarn deploy + env: + GH_NAME: $(GH_NAME) + GH_EMAIL: $(GH_EMAIL) + GH_TOKEN: $(GH_TOKEN) + displayName: Install and build +``` + +### Using Drone {/* #using-drone */} + +1. Create a new SSH key that will be the [deploy key](https://docs.github.com/en/free-pro-team@latest/developers/overview/managing-deploy-keys#deploy-keys) for your project. +2. Name your private and public keys to be specific and so that it does not overwrite your other [SSH keys](https://docs.github.com/en/free-pro-team@latest/github/authenticating-to-github/generating-a-new-ssh-key-and-adding-it-to-the-ssh-agent). +3. Go to `https://github.com/USERNAME/REPO/settings/keys` and add a new deploy key by pasting in the public key you just generated. +4. Open your Drone.io dashboard and log in. The URL looks like `https://cloud.drone.io/USERNAME/REPO`. +5. Click on the repository, click on activate repository, and add a secret called `git_deploy_private_key` with your private key value that you just generated. +6. Create a `.drone.yml` on the root of your repository with the below text. + +```yml title=".drone.yml" +kind: pipeline +type: docker +trigger: + event: + - tag +- name: Website + image: node + commands: + - mkdir -p $HOME/.ssh + - ssh-keyscan -t rsa github.com >> $HOME/.ssh/known_hosts + - echo "$GITHUB_PRIVATE_KEY" > "$HOME/.ssh/id_rsa" + - chmod 0600 $HOME/.ssh/id_rsa + - cd website + - yarn install + - yarn deploy + environment: + USE_SSH: true + GITHUB_PRIVATE_KEY: + from_secret: git_deploy_private_key +``` + +Now, whenever you push a new tag to GitHub, this trigger will start the drone CI job to publish your website. + +## Deploying to Flightcontrol {/* #deploying-to-flightcontrol */} + +[Flightcontrol](https://www.flightcontrol.dev/?ref=docusaurus) is a service that automatically builds and deploys your web apps to AWS Fargate directly from your Git repository. It gives you full access to inspect and make infrastructure changes without the limitations of a traditional PaaS. + +Get started by following [Flightcontrol's step-by-step Docusaurus guide](https://www.flightcontrol.dev/docs/reference/examples/docusaurus/?ref=docusaurus). + +## Deploying to Koyeb {/* #deploying-to-koyeb */} + +[Koyeb](https://www.koyeb.com) is a developer-friendly serverless platform to deploy apps globally. The platform lets you seamlessly run Docker containers, web apps, and APIs with git-based deployment, native autoscaling, a global edge network, and built-in service mesh and discovery. Check out the [Koyeb's Docusaurus deployment guide](https://www.koyeb.com/tutorials/deploy-docusaurus-on-koyeb) to get started. + +## Deploying to Render {/* #deploying-to-render */} + +[Render](https://render.com) offers [free static site hosting](https://render.com/docs/static-sites) with fully managed SSL, custom domains, a global CDN, and continuous auto-deploy from your Git repo. Get started in just a few minutes by following [Render's guide to deploying Docusaurus](https://render.com/docs/deploy-docusaurus). + +## Deploying to Qovery {/* #deploying-to-qovery */} + +[Qovery](https://www.qovery.com) is a fully-managed cloud platform that runs on your AWS, Digital Ocean, and Scaleway account where you can host static sites, backend APIs, databases, cron jobs, and all your other apps in one place. + +1. Create a Qovery account. Visit the [Qovery dashboard](https://console.qovery.com) to create an account if you don't already have one. +2. Create a project. + - Click on **Create project** and give a name to your project. + - Click on **Next**. +3. Create a new environment. + - Click on **Create environment** and give a name (e.g. staging, production). +4. Add an application. + - Click on **Create an application**, give a name and select your GitHub or GitLab repository where your Docusaurus app is located. + - Define the main branch name and the root application path. + - Click on **Create**. After the application is created: + - Navigate to your application **Settings** + - Select **Port** + - Add port used by your Docusaurus application +5. Deploy + - All you have to do now is to navigate to your application and click on **Deploy**. + +![Deploy the app](https://hub.qovery.com/img/heroku/heroku-1.png) + +That's it. Watch the status and wait till the app is deployed. To open the application in your browser, click on **Action** and **Open** in your application overview. + +## Deploying to Hostman {/* #deploying-to-hostman */} + +[Hostman](https://hostman.com/) allows you to host static websites for free. Hostman automates everything, you just need to connect your repository and follow these easy steps: + +1. Create a service. + + - To deploy a Docusaurus static website, click **Create** in the top-left corner of your [Dashboard](https://dashboard.hostman.com/) and choose **Front-end app or static website**. + +2. Select the project to deploy. + + - If you are logged in to Hostman with your GitHub, GitLab, or Bitbucket account, you will see the repository with your projects, including the private ones. + + - Choose the project you want to deploy. It must contain the directory with the project's files (e.g. `website`). + + - To access a different repository, click **Connect another repository**. + + - If you didn't use your Git account credentials to log in, you'll be able to access the necessary account now, and then select the project. + +3. Configure the build settings. + + - Next, the **Website customization** window will appear. Choose the **Static website** option from the list of frameworks. + + - The **Directory with app** points at the directory that will contain the project's files after the build. If you selected the repository with the contents of the website (or `my_website`) directory during Step 2, you can leave it empty. + + - The standard build command for Docusaurus is: + + ```bash npm2yarn + npm run build + ``` + + - You can modify the build command if needed. You can enter multiple commands separated by `&&`. + +4. Deploy. + + - Click **Deploy** to start the build process. + + - Once it starts, you will enter the deployment log. If there are any issues with the code, you will get warning or error messages in the log specifying the cause of the problem. Usually, the log contains all the debugging data you'll need. + + - When the deployment is complete, you will receive an email notification and also see a log entry. All done! Your project is up and ready. + +## Deploying to Surge {/* #deploying-to-surge */} + +Surge is a [static web hosting platform](https://surge.sh/help/getting-started-with-surge) that you can use to deploy your Docusaurus project from the command line in seconds. Deploying your project to Surge is easy and free (including custom domains and SSL certs). + +Deploy your app in a matter of seconds using Surge with the following steps: + +1. First, install Surge using npm by running the following command: + ```bash npm2yarn + npm install -g surge + ``` +2. To build the static files of your site for production in the root directory of your project, run: + ```bash npm2yarn + npm run build + ``` +3. Then, run this command inside the root directory of your project: + ```bash + surge build/ + ``` + +First-time users of Surge would be prompted to create an account from the command line (which happens only once). + +Confirm that the site you want to publish is in the `build` directory. A randomly generated subdomain `*.surge.sh subdomain` is always given (which can be edited). + +### Using your domain {/* #using-your-domain */} + +If you have a domain name you can deploy your site using the command: + +```bash +surge build/ your-domain.com +``` + +Your site is now deployed for free at `subdomain.surge.sh` or `your-domain.com` depending on the method you chose. + +### Setting up CNAME file {/* #setting-up-cname-file */} + +Store your domain in a CNAME file for future deployments with the following command: + +```bash +echo subdomain.surge.sh > CNAME +``` + +You can deploy any other changes in the future with the command `surge`. + +## Deploying to Stormkit {/* #deploying-to-stormkit */} + +You can deploy your Docusaurus project to [Stormkit](https://www.stormkit.io), a deployment platform for static websites, single-page applications (SPAs), and serverless functions. For detailed instructions, refer to this [guide](https://www.stormkit.io/blog/how-to-deploy-docusarous). + +## Deploying to QuantCDN {/* #deploying-to-quantcdn */} + +1. Install [Quant CLI](https://docs.quantcdn.io/docs/cli/get-started) +2. Create a QuantCDN account by [signing up](https://dashboard.quantcdn.io/register) +3. Initialize your project with `quant init` and fill in your credentials: + ```bash + quant init + ``` +4. Deploy your site. + ```bash + quant deploy + ``` + +See [docs](https://docs.quantcdn.io/docs/cli/continuous-integration) and [blog](https://www.quantcdn.io/blog) for more examples and use cases for deploying to QuantCDN. + +## Deploying to Cloudflare {/* #deploying-to-cloudflare */} + +[Cloudflare](https://cloudflare.com/) offers two approaches for deploying your Docusaurus site: **Cloudflare Workers** and **Cloudflare Pages**. + +- [Cloudflare's framework guide for deploying Docusaurus with Workers](https://developers.cloudflare.com/workers/framework-guides/web-apps/more-web-frameworks/docusaurus/). +- [Cloudflare's guide to deploying Docusaurus with Pages](https://developers.cloudflare.com/pages/framework-guides/deploy-a-docusaurus-site/). + +## Deploying to Azure Static Web Apps {/* #deploying-to-azure-static-web-apps */} + +[Azure Static Web Apps](https://docs.microsoft.com/en-us/azure/static-web-apps/overview) is a service that automatically builds and deploys full-stack web apps to Azure directly from the code repository, simplifying the developer experience for CI/CD. Static Web Apps separates the web application's static assets from its dynamic (API) endpoints. Static assets are served from globally-distributed content servers, making it faster for clients to retrieve files using servers nearby. Dynamic APIs are scaled with serverless architectures using an event-driven functions-based approach that is more cost-effective and scales on demand. Get started in a few minutes by following [this step-by-step guide](https://dev.to/azure/11-share-content-with-docusaurus-azure-static-web-apps-30hc). + +## Deploying to Kinsta {/* #deploying-to-kinsta */} + +[Kinsta Static Site Hosting](https://kinsta.com/static-site-hosting) lets you deploy up to 100 static sites for free, custom domains with SSL, 100 GB monthly bandwidth, and 260+ Cloudflare CDN locations. + +Get started in just a few clicks by following our [Docusaurus on Kinsta](https://kinsta.com/docs/docusaurus-example/) article. diff --git a/documents/markdown/docusaurus/docs-create-doc.md b/documents/markdown/docusaurus/docs-create-doc.md new file mode 100644 index 0000000..e659e36 --- /dev/null +++ b/documents/markdown/docusaurus/docs-create-doc.md @@ -0,0 +1,202 @@ +--- +id: create-doc +description: Create a Markdown Document +slug: /create-doc +--- + +# Create a doc + +Create a Markdown file, `greeting.md`, and place it under the `docs` directory. + +```bash +website # root directory of your site +├── docs +│ └── greeting.md +├── src +│ └── pages +├── docusaurus.config.js +├── ... +``` + +```md +--- +description: Create a doc page with rich content. +--- + +# Hello from Docusaurus + +Are you ready to create the documentation site for your open source project? + +## Headers + +will show up on the table of contents on the upper right + +So that your users will know what this page is all about without scrolling down or even without reading too much. + +## Only h2 and h3 will be in the TOC by default. + +You can configure the TOC heading levels either per-document or in the theme configuration. + +The headers are well-spaced so that the hierarchy is clear. + +- lists will help you +- present the key points +- that you want your users to remember + - and you may nest them + - multiple times +``` + +:::note + +All files prefixed with an underscore (`_`) under the `docs` directory are treated as "partial" pages and will be ignored by default. + +Read more about [importing partial pages](../markdown-features/markdown-features-react.mdx#importing-markdown). + +::: + +## Doc front matter {/* #doc-front-matter */} + +The [front matter](../markdown-features/markdown-features-intro.mdx#front-matter) is used to provide additional metadata for your doc page. Front matter is optional—Docusaurus will be able to infer all necessary metadata without the front matter. For example, the [doc tags](#doc-tags) feature introduced below requires using front matter. For all possible fields, see [the API documentation](../../api/plugins/plugin-content-docs.mdx#markdown-front-matter). + +## Doc tags {/* #doc-tags */} + +Tags are declared in the front matter and introduce another dimension of categorization in addition to the [docs sidebar](./sidebar/index.mdx). + +It is possible to define tags inline, or to reference predefined tags declared in a [`tags file`](../../api/plugins/plugin-content-docs.mdx#tags-file) (optional, usually `docs/tags.yml`). + +In the following example: + +- `docusaurus` references a predefined tag key declared in `docs/tags.yml` +- `Releases` is an inline tag, because it does not exist in `docs/tags.yml` + +```md title="docs/my-doc.md" +--- +tags: + - Releases + - docusaurus +--- + +# Title + +Content +``` + +```yml title="docs/tags.yml" +docusaurus: + label: 'Docusaurus' + permalink: '/docusaurus' + description: 'Docs related to the Docusaurus framework' +``` + +:::tip + +Tags can also be declared with `tags: [Demo, Getting started]`. + +Read more about all the possible [Yaml array syntaxes](https://www.w3schools.io/file/yaml-arrays/). + +::: + +## Organizing folder structure {/* #organizing-folder-structure */} + +How the Markdown files are arranged under the `docs` folder can have multiple impacts on Docusaurus content generation. However, most of them can be decoupled from the file structure. + +### Document ID {/* #document-id */} + +Every document has a unique `id`. By default, a document `id` is the name of the document (without the extension) relative to the root docs directory. + +For example, the ID of `greeting.md` is `greeting`, and the ID of `guide/hello.md` is `guide/hello`. + +```bash +website # Root directory of your site +└── docs + ├── greeting.md + └── guide + └── hello.md +``` + +However, the **last part** of the `id` can be defined by the user in the front matter. For example, if `guide/hello.md`'s content is defined as below, its final `id` is `guide/part1`. + +```md +--- +id: part1 +--- + +Lorem ipsum +``` + +The ID is used to refer to a document when hand-writing sidebars, or when using docs-related layout components or hooks. + +### Doc URLs {/* #doc-urls */} + +By default, the document's URL location is derived from the [document `id`](#document-id), which in turn is based on the document's file path. + +If a file is named one of the following, the file name won't be included in the URL: + +- Named as `index` (case-insensitive): `docs/Guides/index.md` +- Named as `README` (case-insensitive): `docs/Guides/README.mdx` +- Same name as parent folder: `docs/Guides/Guides.md` + +In all cases, the default `slug` would only be `/Guides`, without the `/index`, `/README`, or duplicate `/Guides` segment. + +:::note + +This convention is exactly the same as [the category index convention](./sidebar/autogenerated.mdx#category-index-convention). However, the `isCategoryIndex` configuration does _not_ affect the document URL. + +::: + +Use the `slug` front matter to provide an explicit document URL and override the default one. + +For example, suppose your site structure looks like this: + +```bash +website # Root directory of your site +└── docs + └── guide + └── hello.md +``` + +By default, `hello.md` will be available at `/docs/guide/hello`. You can change its URL location to `/docs/bonjour`: + +```md +--- +slug: /bonjour +--- + +Lorem ipsum +``` + +`slug` will be appended to the doc plugin's `routeBasePath`, which is `/docs` by default. See [Docs-only mode](docs-introduction.mdx#docs-only-mode) for how to remove the `/docs` part from the URL. + +:::note + +It is possible to use: + +- absolute slugs: `slug: /mySlug`, `slug: /`... +- relative slugs: `slug: mySlug`, `slug: ./../mySlug`... + +::: + +:::tip + +Changing a document's filename or `id`, will change its default URL. To prevent breaking permalinks when renaming files, we recommend setting an explicit `slug` to keep your URLs stable. + +::: + +#### Making a document available at the root {/* #making-a-document-available-at-the-root */} + +If you want a document to be available at the root, and have a path like `https://docusaurus.io/docs/`, you can use the slug front matter: + +```md +--- +id: my-home-doc +slug: / +--- + +Lorem ipsum +``` + +### Sidebars {/* #sidebars */} + +When using [autogenerated sidebars](./sidebar/autogenerated.mdx), the file structure will determine the sidebar structure. + +Our recommendation for file system organization is: make your file system mirror the sidebar structure (so you don't need to handwrite your `sidebars.js` file), and use the `slug` front matter to customize URLs of each document. diff --git a/documents/markdown/docusaurus/docs-introduction.md b/documents/markdown/docusaurus/docs-introduction.md new file mode 100644 index 0000000..f8cb4a0 --- /dev/null +++ b/documents/markdown/docusaurus/docs-introduction.md @@ -0,0 +1,120 @@ +--- +id: introduction +sidebar_label: Introduction +slug: /docs-introduction +--- + +# Docs Introduction + +The docs feature provides users with a way to organize Markdown files in a hierarchical format. + +:::info + +Check the [Docs Plugin API Reference documentation](./../../api/plugins/plugin-content-docs.mdx) for an exhaustive list of options. + +::: + +Your site's documentation is organized by four levels, from lowest to highest: + +1. Individual pages. +2. Sidebars. +3. Versions. +4. Plugin instances. + +The guide will introduce them in that order: starting from [how individual pages can be configured](./docs-create-doc.mdx), to [how to create a sidebar or multiple ones](./sidebar/index.mdx), to [how to create and manage versions](./versioning.mdx), to [how to use multiple docs plugin instances](./docs-multi-instance.mdx). + +## Docs-only mode {/* #docs-only-mode */} + +A freshly initialized Docusaurus site has the following structure: + +``` +example.com/ -> generated from `src/pages/index.js` + +example.com/docs/intro -> generated from `docs/intro.md` +example.com/docs/tutorial-basics/... -> generated from `docs/tutorial-basics/...` +... + +example.com/blog/2021/08/26/welcome -> generated from `blog/2021-08-26-welcome/index.md` +example.com/blog/2021/08/01/mdx-blog-post -> generated from `blog/2021-08-01-mdx-blog-post.mdx` +... +``` + +All docs will be served under the subroute `docs/`. But what if **your site only has docs**, or you want to prioritize your docs by putting them at the root? + +Assume that you have the following in your configuration: + +```js title="docusaurus.config.js" +export default { + // ... + presets: [ + [ + '@docusaurus/preset-classic', + { + docs: { + /* docs plugin options */ + }, + blog: { + /* blog plugin options */ + }, + // ... + }, + ], + ], +}; +``` + +To enter docs-only mode, change it to like this: + +```js title="docusaurus.config.js" +export default { + // ... + presets: [ + [ + '@docusaurus/preset-classic', + { + docs: { + // highlight-next-line + routeBasePath: '/', // Serve the docs at the site's root + /* other docs plugin options */ + }, + // highlight-next-line + blog: false, // Optional: disable the blog plugin + // ... + }, + ], + ], +}; +``` + +Note that you **don't necessarily have to give up on using the blog** or other plugins; all that `routeBasePath: '/'` does is that instead of serving the docs through `https://example.com/docs/some-doc`, they are now at the site root: `https://example.com/some-doc`. The blog, if enabled, can still be accessed through the `blog/` subroute. + +Don't forget to put some page at the root (`https://example.com/`) through adding the front matter: + +```md title="docs/intro.md" +--- +# highlight-next-line +slug: / +--- + +This page will be the home page when users visit https://example.com/. +``` + +:::warning + +If you added `slug: /` to a doc to make it the homepage, you should delete the existing homepage at `./src/pages/index.js`, or else there will be two files mapping to the same route! + +::: + +Now, the site's structure will be like the following: + +``` +example.com/ -> generated from `docs/intro.md` +example.com/tutorial-basics/... -> generated from `docs/tutorial-basics/...` +... +``` + +:::tip + +There's also a "blog-only mode" for those who only want to use the blog feature of Docusaurus. You can use the same method detailed above. Follow the setup instructions on [Blog-only mode](../../blog.mdx#blog-only-mode). + +::: diff --git a/documents/markdown/docusaurus/installation.md b/documents/markdown/docusaurus/installation.md new file mode 100644 index 0000000..0251d76 --- /dev/null +++ b/documents/markdown/docusaurus/installation.md @@ -0,0 +1,194 @@ +--- +description: How to install Docusaurus locally, and start a Docusaurus site in no time. +--- + +# Installation + +```mdx-code-block +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +``` + +Docusaurus consists of a set of npm [packages](https://github.com/facebook/docusaurus/tree/main/packages). + +:::tip + +Use the **[Fast Track](introduction.mdx#fast-track)** to understand Docusaurus in **5 minutes ⏱**! + +Use **[docusaurus.new](https://docusaurus.new)** to test Docusaurus immediately in your browser! + +::: + +## Requirements {/* #requirements */} + +- [Node.js](https://nodejs.org/en/download/) version 20.0 or above (which can be checked by running `node -v`). You can use [nvm](https://github.com/nvm-sh/nvm) to manage multiple Node.js versions on a single machine. + - When installing Node.js, it is recommended to check all checkboxes related to dependencies. + +## Scaffold project website {/* #scaffold-project-website */} + +The easiest way to install Docusaurus is to use the [`create-docusaurus`](./api/misc/create-docusaurus.mdx) command line tool that helps you scaffold a skeleton Docusaurus website. You can run this command anywhere in a new empty repository or within an existing repository, it will create a new directory containing the scaffolded files. + +```bash +npx create-docusaurus@latest my-website classic +``` + +We recommend the `classic` template so that you can get started quickly, and it contains features found in Docusaurus 1. The `classic` template contains `@docusaurus/preset-classic` which includes standard documentation, a blog, custom pages, and a CSS framework (with dark mode support). You can get up and running extremely quickly with the classic template and customize things later on when you have gained more familiarity with Docusaurus. + +You can also use the template's TypeScript variant by passing the `--typescript` flag. See [TypeScript support](./typescript-support.mdx) for more information. + +```bash +npx create-docusaurus@latest my-website classic --typescript +``` + +:::info Meta-Only + +If you are setting up a new Docusaurus website for a Meta open source project, run this command inside an internal repository, which comes with some useful Meta-specific defaults: + +```bash +scarf static-docs-bootstrap +``` + +::: + +
    + Alternative installation commands + +You can also initialize a new project using your preferred project manager: + +```bash npm2yarn +npm init docusaurus +``` + +
    + +Run `npx create-docusaurus@latest --help`, or check out its [API docs](./api/misc/create-docusaurus.mdx) for more information about all available flags. + +## Project structure {/* #project-structure */} + +Assuming you chose the classic template and named your site `my-website`, you will see the following files generated under a new directory `my-website/`: + +```bash +my-website +├── blog +│ ├── 2019-05-28-hola.md +│ ├── 2019-05-29-hello-world.md +│ └── 2020-05-30-welcome.md +├── docs +│ ├── doc1.md +│ ├── doc2.md +│ ├── doc3.md +│ └── mdx.md +├── src +│ ├── css +│ │ └── custom.css +│ └── pages +│ ├── styles.module.css +│ └── index.js +├── static +│ └── img +├── docusaurus.config.js +├── package.json +├── README.md +├── sidebars.js +└── yarn.lock +``` + +### Project structure rundown {/* #project-structure-rundown */} + +- `/blog/` - Contains the blog Markdown files. You can delete the directory if you've disabled the blog plugin, or you can change its name after setting the `path` option. More details can be found in the [blog guide](blog.mdx) +- `/docs/` - Contains the Markdown files for the docs. Customize the order of the docs sidebar in `sidebars.js`. You can delete the directory if you've disabled the docs plugin, or you can change its name after setting the `path` option. More details can be found in the [docs guide](./guides/docs/docs-introduction.mdx) +- `/src/` - Non-documentation files like pages or custom React components. You don't have to strictly put your non-documentation files here, but putting them under a centralized directory makes it easier to specify in case you need to do some sort of linting/processing + - `/src/pages` - Any JSX/TSX/MDX file within this directory will be converted into a website page. More details can be found in the [pages guide](guides/creating-pages.mdx) +- `/static/` - Static directory. Any contents inside here will be copied into the root of the final `build` directory +- `/docusaurus.config.js` - A config file containing the site configuration. This is the equivalent of `siteConfig.js` in Docusaurus v1 +- `/package.json` - A Docusaurus website is a React app. You can install and use any npm packages you like in it. +- `/sidebars.js` - Used by the documentation to specify the order of documents in the sidebar + +### Monorepos {/* #monorepos */} + +If you are using Docusaurus for documentation of an existing project, a monorepo may be the solution for you. Monorepos allow you to share dependencies between similar projects. For example, your website may use your local packages to showcase latest features instead of depending on a released version. Then, your contributors can update the docs as they implement features. An example monorepo folder structure is below: + +```bash +my-monorepo +├── package-a # Another package, your actual project +│ ├── src +│ └── package.json # Package A's dependencies +├── website # Docusaurus root +│ ├── docs +│ ├── src +│ └── package.json # Docusaurus' dependencies +├── package.json # Monorepo's shared dependencies +``` + +In this case, you should run `npx create-docusaurus` within the `./my-monorepo` folder. + +If you're using a hosting provider such as Netlify or Vercel, you will need to change the `Base directory` of the site to where your Docusaurus root is. In this case, that would be `./website`. Read more about configuring ignore commands in the [deployment docs](./deployment.mdx#deploying-to-netlify). + +Read more about monorepos in the [Yarn documentation](https://yarnpkg.com/features/workspaces) (Yarn is not the only way to set up a monorepo, but it's a common solution), or check out [Docusaurus](https://github.com/facebook/docusaurus) and [Jest](https://github.com/facebook/jest) for some real-world examples. + +## Running the development server {/* #running-the-development-server */} + +To preview your changes as you edit the files, you can run a local development server that will serve your website and reflect the latest changes. + +```bash npm2yarn +cd my-website +npm run start +``` + +By default, a browser window will open at [`http://localhost:3000`](http://localhost:3000). + +Congratulations! You have just created your first Docusaurus site! Browse around the site to see what's available. + +## Build {/* #build */} + +Docusaurus is a modern static website generator, so we need to build the website into a directory of static contents and put it on a web server so that it can be viewed. To build the website: + +```bash npm2yarn +npm run build +``` + +and contents will be generated within the `/build` directory, which can be copied to any static file hosting service like [GitHub pages](https://pages.github.com/), [Vercel](https://vercel.com/) or [Netlify](https://www.netlify.com/). Check out the docs on [deployment](deployment.mdx) for more details. + +## Updating your Docusaurus version {/* #updating-your-docusaurus-version */} + +There are many ways to update your Docusaurus version. One guaranteed way is to manually change the version number in `package.json` to the desired version. Note that all `@docusaurus/`-namespaced packages should be using the same version. + +import UpgradeGuide from '@site/src/components/UpgradeGuide'; + + + +Then, in the directory containing `package.json`, run your package manager's install command: + +```bash npm2yarn +npm install +``` + +:::tip + +`npm install` may report several vulnerabilities and recommend running `npm audit` to address them. Typically, these reported vulnerabilities, such as RegExp DOS vulnerabilities, are harmless and can be safely ignored. Also read this article, which reflects our thinking: [npm audit: Broken by Design](https://overreacted.io/npm-audit-broken-by-design/). + +::: + +To check that the update occurred successfully, run: + +```bash +npx docusaurus --version +``` + +You should see the correct version as output. + +Alternatively, if you are using Yarn, you can do: + +```bash +yarn add @docusaurus/core @docusaurus/preset-classic +``` + +:::tip + +Use new unreleased features of Docusaurus with the [`@canary` npm dist tag](/community/canary) + +::: + +## Problems? {/* #problems */} + +Ask for help on [Stack Overflow](https://stackoverflow.com/questions/tagged/docusaurus), on our [GitHub repository](https://github.com/facebook/docusaurus), our [Discord server](https://discordapp.com/invite/docusaurus), or [X](https://x.com/docusaurus). diff --git a/documents/markdown/docusaurus/markdown-features-admonitions.md b/documents/markdown/docusaurus/markdown-features-admonitions.md new file mode 100644 index 0000000..802f60a --- /dev/null +++ b/documents/markdown/docusaurus/markdown-features-admonitions.md @@ -0,0 +1,420 @@ +--- +id: admonitions +description: Handling admonitions/callouts in Docusaurus Markdown +slug: /markdown-features/admonitions +--- + +# Admonitions + +import BrowserWindow from '@site/src/components/BrowserWindow'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import Admonition from '@theme/Admonition'; + +In addition to the basic Markdown syntax, we have a special admonitions syntax by wrapping text with a set of 3 colons, followed by a label denoting its type. + +Example: + +```md +:::note + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::tip + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::info + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::warning + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::danger + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: +``` + +```mdx-code-block + + +:::note + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::tip + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::info + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::warning + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + +:::danger + +Some **content** with _Markdown_ `syntax`. Check [this `api`](#). + +::: + + +``` + +## Usage with Prettier {/* #usage-with-prettier */} + +If you use [Prettier](https://prettier.io) to format your Markdown files, Prettier might auto-format your code to invalid admonition syntax. To avoid this problem, add empty lines around the starting and ending directives. This is also why the examples we show here all have empty lines around the content. + +{/* prettier-ignore */} +```md + +:::note + +Hello world + +::: + + +:::note +Hello world +::: + + +::: note Hello world::: +``` + +## Specifying title {/* #specifying-title */} + +You may also specify an optional title. + +```md +:::note[Your Title **with** some _Markdown_ `syntax`!] + +Some **content** with some _Markdown_ `syntax`. + +::: +``` + +```mdx-code-block + + +:::note[Your Title **with** some _Markdown_ `syntax`!] + +Some **content** with some _Markdown_ `syntax`. + +::: + + +``` + +## Specifying attributes {/* #specifying-attributes */} + +You may also provide classes or IDs to admonitions. + +```md +:::note[With css classes]{.padding--lg .text--italic} + +Note the padding and the italicized text. + +::: + +:::note{#admonition-id} + +The admonition container has now the id `admonition-id`. + +::: + +:::note{.padding--lg #admonition-id-2} + +Use id and classes together. + +::: +``` + +```mdx-code-block + + +:::note[With css classes]{.padding--lg .text--italic} + +Note the padding and the italicized text. + +::: + +:::note{#admonition-id} + +The admonition container has now the id `admonition-id`. + +::: + +:::note{.padding--lg #admonition-id-2} + +Use id and classes together. + +::: + + +``` + +## Nested admonitions {/* #nested-admonitions */} + +Admonitions can be nested. Use more colons `:` for each parent admonition level. + +```md +:::::info Parent + +Parent content + +::::danger Child + +Child content + +:::tip Deep Child + +Deep child content + +::: + +:::: + +::::: +``` + +```mdx-code-block + + +:::::info Parent + +Parent content + +::::danger Child + +Child content + +:::tip Deep Child + +Deep child content + +::: + +:::: + +::::: + + +``` + +## Admonitions with MDX {/* #admonitions-with-mdx */} + +You can use MDX inside admonitions too! + +```jsx +import Tabs from '@theme/Tabs'; + +import TabItem from '@theme/TabItem'; + +:::tip[Use tabs in admonitions] + + + This is an apple 🍎 + This is an orange 🍊 + This is a banana 🍌 + + +::: +``` + +```mdx-code-block + + +:::tip[Use tabs in admonitions] + + + This is an apple 🍎 + This is an orange 🍊 + This is a banana 🍌 + + +::: + + +``` + +## Usage in JSX {/* #usage-in-jsx */} + +Outside of Markdown, you can use the `@theme/Admonition` component to get the same output. + +```jsx title="MyReactPage.jsx" +import Admonition from '@theme/Admonition'; + +export default function MyReactPage() { + return ( +
    + +

    Some information

    +
    +
    + ); +} +``` + +The types that are accepted are the same as above: `note`, `tip`, `danger`, `info`, `warning`. Optionally, you can specify an icon by passing a JSX element or a string, or a title: + +```jsx title="MyReactPage.jsx" + + Use plugins to introduce shorter syntax for the most commonly used JSX + elements in your project. + +``` + +```mdx-code-block + + + Use plugins to introduce shorter syntax for the most commonly used JSX + elements in your project. + + +``` + +## Customizing admonitions {/* #customizing-admonitions */} + +There are two kinds of customizations possible with admonitions: **parsing** and **rendering**. + +### Customizing rendering behavior {/* #customizing-rendering-behavior */} + +You can customize how each individual admonition type is rendered through [swizzling](../../swizzling.mdx). You can often achieve your goal through a simple wrapper. For example, in the follow example, we swap out the icon for `info` admonitions only. + +```jsx title="src/theme/Admonition.js" +import React from 'react'; +import Admonition from '@theme-original/Admonition'; +import MyCustomNoteIcon from '@site/static/img/info.svg'; + +export default function AdmonitionWrapper(props) { + if (props.type !== 'info') { + return ; + } + return } {...props} />; +} +``` + +### Customizing parsing behavior {/* #customizing-parsing-behavior */} + +Admonitions are implemented with a [Remark plugin](./markdown-features-plugins.mdx). The plugin is designed to be configurable. To customize the Remark plugin for a specific content plugin (docs, blog, pages), pass the options through the `admonitions` key. + +```js title="docusaurus.config.js" +export default { + presets: [ + [ + '@docusaurus/preset-classic', + { + docs: { + admonitions: { + keywords: ['note', 'tip', 'info', 'warning', 'danger'], + extendDefaults: true, + }, + }, + }, + ], + ], +}; +``` + +The plugin accepts the following options: + +- `keywords`: An array of keywords that can be used as the type for the admonition. +- `extendDefaults`: Should the provided options (such as `keywords`) be merged into the existing defaults. Defaults to `true`. + +The `keyword` will be passed as the `type` prop of the `Admonition` component. + +### Custom admonition type components {/* #custom-admonition-type-components */} + +By default, the theme doesn't know what do to with custom admonition keywords such as `:::my-custom-admonition`. It is your responsibility to map each admonition keyword to a React component so that the theme knows how to render them. + +If you registered a new admonition type `my-custom-admonition` via the following config: + +```js title="docusaurus.config.js" +export default { + // ... + presets: [ + [ + 'classic', + { + // ... + docs: { + admonitions: { + keywords: ['my-custom-admonition'], + extendDefaults: true, + }, + }, + }, + ], + ], +}; +``` + +You can provide the corresponding React component for `:::my-custom-admonition` by creating the following file (unfortunately, since it's not a React component file, it's not swizzlable): + +```js title="src/theme/Admonition/Types.js" +import React from 'react'; +import DefaultAdmonitionTypes from '@theme-original/Admonition/Types'; + +function MyCustomAdmonition(props) { + return ( +
    +
    {props.title}
    +
    {props.children}
    +
    + ); +} + +const AdmonitionTypes = { + ...DefaultAdmonitionTypes, + + // Add all your custom admonition types here... + // You can also override the default ones if you want + 'my-custom-admonition': MyCustomAdmonition, +}; + +export default AdmonitionTypes; +``` + +Now you can use your new admonition keyword in a Markdown file, and it will be parsed and rendered with your custom logic: + +```md +:::my-custom-admonition[My Title] + +It works! + +::: +``` + + + +:::my-custom-admonition[My Title] + +It works! + +::: + + diff --git a/documents/markdown/docusaurus/markdown-features-assets.md b/documents/markdown/docusaurus/markdown-features-assets.md new file mode 100644 index 0000000..7e89fb3 --- /dev/null +++ b/documents/markdown/docusaurus/markdown-features-assets.md @@ -0,0 +1,235 @@ +--- +id: assets +description: Handling assets in Docusaurus Markdown +slug: /markdown-features/assets +--- + +# Assets + +import BrowserWindow from '@site/src/components/BrowserWindow'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Sometimes you want to link to assets (e.g. docx files, images...) directly from Markdown files, and it is convenient to co-locate the asset next to the Markdown file using it. + +Let's imagine the following file structure: + +``` +# Your doc +/website/docs/myFeature.mdx + +# Some assets you want to use +/website/docs/assets/docusaurus-asset-example-banner.png +/website/docs/assets/docusaurus-asset-example.docx +``` + +## Images {/* #images */} + +You can display images in three different ways: Markdown syntax, CJS require, or ES imports syntax. + +```mdx-code-block + + +``` + +Display images using simple Markdown syntax: + +```md +![Example banner](./assets/docusaurus-asset-example-banner.png) +``` + +```mdx-code-block + + +``` + +Display images using inline CommonJS `require` in JSX image tag: + +```jsx +Example banner +``` + +```mdx-code-block + + +``` + +Display images using ES `import` syntax and JSX image tag: + +```jsx +import myImageUrl from './assets/docusaurus-asset-example-banner.png'; + +Example banner; +``` + +```mdx-code-block + + +``` + +All of the above result in displaying the image: + + + +![My image alternative text](../../assets/docusaurus-asset-example-banner.png) + + + +:::note + +If you are using [@docusaurus/plugin-ideal-image](../../api/plugins/plugin-ideal-image.mdx), you need to use the dedicated image component, as documented. + +::: + +## Files {/* #files */} + +In the same way, you can link to existing assets by `require`'ing them and using the returned URL in `video`s, `a` anchor links, etc. + +```md +# My Markdown page + +
    Download this docx + +or + +[Download this docx using Markdown](./assets/docusaurus-asset-example.docx) +``` + + + + + {'Download this docx'} + + +[Download this docx using Markdown](../../assets/docusaurus-asset-example.docx) + + + +:::info Markdown links are always file paths + +If you use the Markdown image or link syntax, all asset paths will be resolved as file paths by Docusaurus and automatically converted to `require()` calls. You don't need to use `require()` in Markdown unless you use the JSX syntax, which you do have to handle yourself. + +::: + +## Inline SVGs {/* #inline-svgs */} + +Docusaurus supports inlining SVGs out of the box. + +```jsx +import DocusaurusSvg from './docusaurus.svg'; + +; +``` + + + +import DocusaurusSvg from '@site/static/img/docusaurus.svg'; + + + + + +This can be useful if you want to alter the part of the SVG image via CSS. For example, you can change one of the SVG colors based on the current theme. + +```jsx +import DocusaurusSvg from './docusaurus.svg'; + +; +``` + +```css +[data-theme='light'] .themedDocusaurus [fill='#FFFF50'] { + fill: greenyellow; +} + +[data-theme='dark'] .themedDocusaurus [fill='#FFFF50'] { + fill: seagreen; +} +``` + + + + + +## Themed Images {/* #themed-images */} + +Docusaurus supports themed images: the `ThemedImage` component (included in the themes) allows you to switch the image source based on the current theme. + +```jsx +import useBaseUrl from '@docusaurus/useBaseUrl'; +import ThemedImage from '@theme/ThemedImage'; + +; +``` + +```mdx-code-block +import useBaseUrl from '@docusaurus/useBaseUrl'; +import ThemedImage from '@theme/ThemedImage'; + + + + +``` + +### GitHub-style themed images {/* #github-style-themed-images */} + +GitHub uses its own [image theming approach](https://github.blog/changelog/2021-11-24-specify-theme-context-for-images-in-markdown/) with path fragments, which you can easily implement yourself. + +To toggle the visibility of an image using the path fragment (for GitHub, it's `#gh-dark-mode-only` and `#gh-light-mode-only`), add the following to your custom CSS (you can also use your own suffix if you don't want to be coupled to GitHub): + +```css title="src/css/custom.css" +[data-theme='light'] img[src$='#gh-dark-mode-only'], +[data-theme='dark'] img[src$='#gh-light-mode-only'] { + display: none; +} +``` + +```md +![Docusaurus themed image](/img/docusaurus_keytar.svg#gh-light-mode-only)![Docusaurus themed image](/img/docusaurus_speed.svg#gh-dark-mode-only) +``` + + + +![Docusaurus themed image](/img/docusaurus_keytar.svg#gh-light-mode-only)![Docusaurus themed image](/img/docusaurus_speed.svg#gh-dark-mode-only) + + + +## Static assets {/* #static-assets */} + +If a Markdown link or image has an absolute path, the path will be seen as a file path and will be resolved from the static directories. For example, if you have configured [static directories](../../static-assets.mdx) to be `['public', 'static']`, then for the following image: + +```md title="my-doc.md" +![An image from the static](/img/docusaurus.png) +``` + +Docusaurus will try to look for it in both `static/img/docusaurus.png` and `public/img/docusaurus.png`. The link will then be converted to a `require()` call instead of staying as a URL. This is desirable in two regards: + +1. You don't have to worry about the base URL, which Docusaurus will take care of when serving the asset; +2. The image enters Webpack's build pipeline and its name will be appended by a hash, which enables browsers to aggressively cache the image and improves your site's performance. + +If you intend to write URLs, you can use the `pathname://` protocol to disable automatic asset linking. + +```md +![banner](pathname:///img/docusaurus-asset-example-banner.png) +``` + +This link will be generated as `banner`, without any processing or file existence checking. diff --git a/documents/markdown/docusaurus/markdown-features-code-blocks.md b/documents/markdown/docusaurus/markdown-features-code-blocks.md new file mode 100644 index 0000000..a5a3fb3 --- /dev/null +++ b/documents/markdown/docusaurus/markdown-features-code-blocks.md @@ -0,0 +1,848 @@ +--- +id: code-blocks +description: Handling code blocks in Docusaurus Markdown +slug: /markdown-features/code-blocks +--- + +# Code blocks + +import BrowserWindow from '@site/src/components/BrowserWindow'; +import CodeBlock from '@theme/CodeBlock'; + +Code blocks within documentation are super-powered 💪. + +## Code title {/* #code-title */} + +You can add a title to the code block by adding a `title` key after the language (leave a space between them). + +````md +```jsx title="/src/components/HelloCodeTitle.js" +function HelloCodeTitle(props) { + return

    Hello, {props.name}

    ; +} +``` +```` + +```mdx-code-block + +``` + +```jsx title="/src/components/HelloCodeTitle.js" +function HelloCodeTitle(props) { + return

    Hello, {props.name}

    ; +} +``` + +```mdx-code-block +
    +``` + +## Syntax highlighting {/* #syntax-highlighting */} + +Code blocks are text blocks wrapped around by strings of 3 backticks. You may check out [this reference](https://mdxjs.com/docs/) for the specifications of MDX. + +````md +```js +console.log('Every repo must come with a mascot.'); +``` +```` + +Use the matching language meta string for your code block, and Docusaurus will pick up syntax highlighting automatically, powered by [Prism React Renderer](https://github.com/FormidableLabs/prism-react-renderer). + + + +```js +console.log('Every repo must come with a mascot.'); +``` + + + +### Theming {/* #theming */} + +By default, the Prism [syntax highlighting theme](https://github.com/FormidableLabs/prism-react-renderer#theming) we use is [Palenight](https://github.com/FormidableLabs/prism-react-renderer/blob/master/packages/prism-react-renderer/src/themes/palenight.ts). You can change this to another theme by passing `theme` field in `prism` as `themeConfig` in your docusaurus.config.js. + +For example, if you prefer to use the `dracula` highlighting theme: + +```js title="docusaurus.config.js" +import {themes as prismThemes} from 'prism-react-renderer'; + +export default { + themeConfig: { + prism: { + // highlight-next-line + theme: prismThemes.dracula, + }, + }, +}; +``` + +Because a Prism theme is just a JS object, you can also write your own theme if you are not satisfied with the default. Docusaurus enhances the `github` and `vsDark` themes to provide richer highlight, and you can check our implementations for the [light](https://github.com/facebook/docusaurus/blob/main/website/src/utils/prismLight.ts) and [dark](https://github.com/facebook/docusaurus/blob/main/website/src/utils/prismDark.ts) code block themes. + +### Supported Languages {/* #supported-languages */} + +By default, Docusaurus comes with a subset of [commonly used languages](https://github.com/FormidableLabs/prism-react-renderer/blob/master/packages/generate-prism-languages/index.ts#L10-L25). + +:::warning + +Some popular languages like Java, C#, or PHP are not enabled by default. + +::: + +To add syntax highlighting for any of the other [Prism-supported languages](https://prismjs.com/#supported-languages), define it in an array of additional languages. + +:::note + +Each additional language has to be a valid Prism component name. For example, Prism would map the _language_ `cs` to `csharp`, but only `prism-csharp.js` exists as a _component_, so you need to use `additionalLanguages: ['csharp']`. You can look into `node_modules/prismjs/components` to find all components (languages) available. + +::: + +For example, if you want to add highlighting for the PowerShell language: + +```js title="docusaurus.config.js" +export default { + // ... + themeConfig: { + prism: { + // highlight-next-line + additionalLanguages: ['powershell'], + }, + // ... + }, +}; +``` + +After adding `additionalLanguages`, restart Docusaurus. + +If you want to add highlighting for languages not yet supported by Prism, you can swizzle `prism-include-languages`: + +```bash npm2yarn +npm run swizzle @docusaurus/theme-classic prism-include-languages +``` + +It will produce `prism-include-languages.js` in your `src/theme` folder. You can add highlighting support for custom languages by editing `prism-include-languages.js`: + +```js title="src/theme/prism-include-languages.js" +const prismIncludeLanguages = (Prism) => { + // ... + + additionalLanguages.forEach((lang) => { + require(`prismjs/components/prism-${lang}`); + }); + + // highlight-next-line + require('/path/to/your/prism-language-definition'); + + // ... +}; +``` + +You can refer to [Prism's official language definitions](https://github.com/PrismJS/prism/tree/master/components) when you are writing your own language definitions. + +When adding a custom language definition, you do not need to add the language to the `additionalLanguages` config array, since Docusaurus only looks up the `additionalLanguages` strings in languages that Prism provides. Adding the language import in `prism-include-languages.js` is sufficient. + +## Line highlighting {/* #line-highlighting */} + +### Highlighting with comments {/* #highlighting-with-comments */} + +You can use comments with `highlight-next-line`, `highlight-start`, and `highlight-end` to select which lines are highlighted. + +````md +```js +function HighlightSomeText(highlight) { + if (highlight) { + // highlight-next-line + return 'This text is highlighted!'; + } + + return 'Nothing highlighted'; +} + +function HighlightMoreText(highlight) { + // highlight-start + if (highlight) { + return 'This range is highlighted!'; + } + // highlight-end + + return 'Nothing highlighted'; +} +``` +```` + +```mdx-code-block + +``` + +```js +function HighlightSomeText(highlight) { + if (highlight) { + // highlight-next-line + return 'This text is highlighted!'; + } + + return 'Nothing highlighted'; +} + +function HighlightMoreText(highlight) { + // highlight-start + if (highlight) { + return 'This range is highlighted!'; + } + // highlight-end + + return 'Nothing highlighted'; +} +``` + +```mdx-code-block + +``` + +Supported commenting syntax: + +| Style | Syntax | +| ---------- | ------------------------ | +| C-style | `/* ... */` and `// ...` | +| JSX-style | `{/* ... */}` | +| Bash-style | `# ...` | +| HTML-style | `` | + +We will do our best to infer which set of comment styles to use based on the language, and default to allowing _all_ comment styles. If there's a comment style that is not currently supported, we are open to adding them! Pull requests welcome. Note that different comment styles have no semantic difference, only their content does. + +You can set your own background color for highlighted code line in your `src/css/custom.css` which will better fit to your selected syntax highlighting theme. The color given below works for the default highlighting theme (Palenight), so if you are using another theme, you will have to tweak the color accordingly. + +```css title="/src/css/custom.css" +:root { + --docusaurus-highlighted-code-line-bg: rgb(72, 77, 91); +} + +/* If you have a different syntax highlighting theme for dark mode. */ +[data-theme='dark'] { + /* Color which works with dark mode syntax highlighting theme */ + --docusaurus-highlighted-code-line-bg: rgb(100, 100, 100); +} +``` + +If you also need to style the highlighted code line in some other way, you can target on `theme-code-block-highlighted-line` CSS class. + +### Highlighting with metadata string {/* #highlighting-with-metadata-string */} + +You can also specify highlighted line ranges within the language meta string (leave a space after the language). To highlight multiple lines, separate the line numbers by commas or use the range syntax to select a chunk of lines. This feature uses the `parse-number-range` library and you can find [more syntax](https://www.npmjs.com/package/parse-numeric-range) on their project details. + +````md +```jsx {1,4-6,11} +import React from 'react'; + +function MyComponent(props) { + if (props.isBar) { + return
    Bar
    ; + } + + return
    Foo
    ; +} + +export default MyComponent; +``` +```` + +```mdx-code-block + +``` + +```jsx {1,4-6,11} +import React from 'react'; + +function MyComponent(props) { + if (props.isBar) { + return
    Bar
    ; + } + + return
    Foo
    ; +} + +export default MyComponent; +``` + +```mdx-code-block +
    +``` + +:::tip prefer comments + +Prefer highlighting with comments where you can. By inlining highlight in the code, you don't have to manually count the lines if your code block becomes long. If you add/remove lines, you also don't have to offset your line ranges. + +````diff +- ```jsx {3} ++ ```jsx {4} + function HighlightSomeText(highlight) { + if (highlight) { ++ console.log('Highlighted text found'); + return 'This text is highlighted!'; + } + + return 'Nothing highlighted'; + } + ``` +```` + +Below, we will introduce how the magic comment system can be extended to define custom directives and their functionalities. The magic comments would only be parsed if a highlight metastring is not present. + +::: + +### Custom magic comments {/* #custom-magic-comments */} + +`// highlight-next-line` and `// highlight-start` etc. are called "magic comments", because they will be parsed and removed, and their purposes are to add metadata to the next line, or the section that the pair of start- and end-comments enclose. + +You can declare custom magic comments through theme config. For example, you can register another magic comment that adds a `code-block-error-line` class name: + +```mdx-code-block + + +``` + +```js +export default { + themeConfig: { + prism: { + magicComments: [ + // Remember to extend the default highlight class name as well! + { + className: 'theme-code-block-highlighted-line', + line: 'highlight-next-line', + block: {start: 'highlight-start', end: 'highlight-end'}, + }, + // highlight-start + { + className: 'code-block-error-line', + line: 'This will error', + }, + // highlight-end + ], + }, + }, +}; +``` + +```mdx-code-block + + +``` + +```css +.code-block-error-line { + background-color: #ff000020; + display: block; + margin: 0 calc(-1 * var(--ifm-pre-padding)); + padding: 0 var(--ifm-pre-padding); + border-left: 3px solid #ff000080; +} +``` + +```mdx-code-block + + +``` + +````md +In JavaScript, trying to access properties on `null` will error. + +```js +const name = null; +// This will error +console.log(name.toUpperCase()); +// Uncaught TypeError: Cannot read properties of null (reading 'toUpperCase') +``` +```` + +```mdx-code-block + + +``` + +```mdx-code-block + +``` + +In JavaScript, trying to access properties on `null` will error. + +```js +const name = null; +// This will error +console.log(name.toUpperCase()); +// Uncaught TypeError: Cannot read properties of null (reading 'toUpperCase') +``` + +```mdx-code-block + +``` + +If you use number ranges in metastring (the `{1,3-4}` syntax), Docusaurus will apply the **first `magicComments` entry**'s class name. This, by default, is `theme-code-block-highlighted-line`, but if you change the `magicComments` config and use a different entry as the first one, the meaning of the metastring range will change as well. + +You can disable the default line highlighting comments with `magicComments: []`. If there's no magic comment config, but Docusaurus encounters a code block containing a metastring range, it will error because there will be no class name to apply—the highlighting class name, after all, is just a magic comment entry. + +Every magic comment entry will contain three keys: `className` (required), `line`, which applies to the directly next line, or `block` (containing `start` and `end`), which applies to the entire block enclosed by the two comments. + +Using CSS to target the class can already do a lot, but you can unlock the full potential of this feature through [swizzling](../../swizzling.mdx). + +```bash npm2yarn +npm run swizzle @docusaurus/theme-classic CodeBlock/Line +``` + +The `Line` component will receive the list of class names, based on which you can conditionally render different markup. + +## Line numbering {/* #line-numbering */} + +You can enable line numbering for your code block by using `showLineNumbers` key within the language meta string (don't forget to add space directly before the key). + +````md +```jsx showLineNumbers +import React from 'react'; + +export default function MyComponent(props) { + return
    Foo
    ; +} +``` +```` + +```mdx-code-block + +``` + +```jsx showLineNumbers +import React from 'react'; + +export default function MyComponent(props) { + return
    Foo
    ; +} +``` + +```mdx-code-block +
    +``` + +By default, the counter starts at line number 1. It's possible to pass a custom counter start value to split large code blocks for readability: + +````md +```jsx showLineNumbers=3 +export default function MyComponent(props) { + return
    Foo
    ; +} +``` +```` + +```mdx-code-block + +``` + +```jsx showLineNumbers=3 +export default function MyComponent(props) { + return
    Foo
    ; +} +``` + +```mdx-code-block +
    +``` + +## Interactive code editor {/* #interactive-code-editor */} + +(Powered by [React Live](https://github.com/FormidableLabs/react-live)) + +You can create an interactive coding editor with the `@docusaurus/theme-live-codeblock` plugin. First, add the plugin to your package. + +```bash npm2yarn +npm install --save @docusaurus/theme-live-codeblock +``` + +You will also need to add the plugin to your `docusaurus.config.js`. + +```js {3} +export default { + // ... + themes: ['@docusaurus/theme-live-codeblock'], + // ... +}; +``` + +To use the plugin, create a code block with `live` attached to the language meta string. + +````md +```jsx live +function Clock(props) { + const [date, setDate] = useState(new Date()); + + useEffect(() => { + const id = setInterval(() => { + setDate(new Date()); + }, 1000); + return () => clearInterval(id); + }, []); + + return

    It is {date.toLocaleTimeString()}.

    ; +} +``` +```` + +The code block will be rendered as an interactive editor. Changes to the code will reflect on the result panel live. + +```mdx-code-block + +``` + +```jsx live +function Clock(props) { + const [date, setDate] = useState(new Date()); + + useEffect(() => { + const id = setInterval(() => { + setDate(new Date()); + }, 1000); + return () => clearInterval(id); + }, []); + + return

    It is {date.toLocaleTimeString()}.

    ; +} +``` + +```mdx-code-block +
    +``` + +### Imports {/* #imports */} + +:::warning react-live and imports + +It is not possible to import components directly from the react-live code editor, you have to define available imports upfront. + +::: + +By default, all React imports are available. If you need more imports available, swizzle the react-live scope: + +```bash npm2yarn +npm run swizzle @docusaurus/theme-live-codeblock ReactLiveScope -- --eject +``` + +```jsx title="src/theme/ReactLiveScope/index.js" +import React from 'react'; + +// highlight-start +const ButtonExample = (props) => ( +