From 6b69d58d7571343385dc10a5091bfcad0e28a4e7 Mon Sep 17 00:00:00 2001 From: Radin Hamidi Rad Date: Fri, 22 May 2026 18:16:45 -0400 Subject: [PATCH] leaderboard: port to Pyserini-2cr-inspired layout with in-place reproduce panels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrite the home matrix and per-X pages (datasets/methods/models/retrievers) on a new `.lb-*` table layout: sticky two-row thead + sticky axis cols, segmented metric control (Both/nDCG/Recall), grouped dataset multi-select dropdown, two-level dataset/metric header, expandable rows with tabbed reproduce panels (one per dataset) that render the three-step pipeline (reformulate → retrieve → evaluate) inline. Clicking a row replaces a navigation to `/runs/[id]` with copy-ready commands in place. - Add `src/lib/reproduce.ts` to centralize the three-step command generation shared by every expand panel. - Single-scrollbar layout: `Default.astro` body is fixed-viewport flex column, table card fills the remaining space, only the inner table scrolls. Sticky thead anchors to the scroll container's top. - Shared `Header.astro` / `Footer.astro` widened to `w-[90%]` so the leaderboard can use the full viewport. - Remove unused `InteractiveTable.astro`, `FilterChips.astro`, `MatrixCell.astro` and the `.qg-table-card` / `.qg-axis-*` / `.qg-cell-best` / `.qg-itable` CSS blocks they backed. Co-Authored-By: Claude Opus 4.7 --- .gitignore | 4 +- .../site/src/components/FilterChips.astro | 133 ---- .../src/components/InteractiveTable.astro | 198 ------ .../site/src/components/MatrixCell.astro | 51 -- .../site/src/layouts/Default.astro | 4 +- reproducibility/site/src/lib/reproduce.ts | 127 ++++ .../site/src/pages/datasets/[id].astro | 412 +++++++++--- reproducibility/site/src/pages/index.astro | 631 +++++++++++++++--- .../site/src/pages/methods/[id].astro | 572 ++++++++++++++-- .../site/src/pages/models/[id].astro | 559 ++++++++++++++-- .../site/src/pages/retrievers/[id].astro | 561 ++++++++++++++-- reproducibility/site/src/styles/global.css | 527 ++++++++++++--- web/shared/components/Footer.astro | 2 +- web/shared/components/Header.astro | 2 +- 14 files changed, 2897 insertions(+), 886 deletions(-) delete mode 100644 reproducibility/site/src/components/FilterChips.astro delete mode 100644 reproducibility/site/src/components/InteractiveTable.astro delete mode 100644 reproducibility/site/src/components/MatrixCell.astro create mode 100644 reproducibility/site/src/lib/reproduce.ts diff --git a/.gitignore b/.gitignore index b403297..609f10c 100644 --- a/.gitignore +++ b/.gitignore @@ -171,4 +171,6 @@ new_baselines.py outputs/ docs/leaderboard-demo.html -test_examples.jsonl \ No newline at end of file +test_examples.jsonl +# local mockups, design scratchpads — not for the repo +tmp/ diff --git a/reproducibility/site/src/components/FilterChips.astro b/reproducibility/site/src/components/FilterChips.astro deleted file mode 100644 index a9aa251..0000000 --- a/reproducibility/site/src/components/FilterChips.astro +++ /dev/null @@ -1,133 +0,0 @@ ---- -/** - * Chip-style filter bar for any leaderboard table. - * - * Each group corresponds to a column on the table's attributes - * (e.g. data-method, data-model). Clicking a chip hides rows whose attribute - * doesn't match, by toggling the .qg-chip-hidden class and dispatching - * "qg-itable-reapply" on the nearest .qg-itable wrapper so InteractiveTable - * re-syncs its row-visibility + shown-count. - * - * The optional `metric` group is special: it swaps .qg-cell-primary / - * .qg-cell-secondary visibility and the matching column-label spans across - * the whole page, then re-keys cells' data-sort-value to the now-visible - * metric so sort follows what's on screen. - */ -interface ChipValue { - value: string; - label: string; -} -interface ChipGroup { - /** "method" | "model" | "retriever" | "metric"; matches */ - key: string; - /** Visible header text. */ - label: string; - /** First item is shown as the active default. For `metric`, use - * [{value:"primary", label:"nDCG@10"}, {value:"secondary", label:"Recall"}]. */ - values: ChipValue[]; -} -interface Props { - /** id of the table to filter (used to scope row queries to this table). */ - tableId: string; - groups: ChipGroup[]; -} -const { tableId, groups } = Astro.props; ---- - -
- {groups.map((g) => ( -
- {g.label}: -
- {g.values.map((v, i) => ( - - ))} -
-
- ))} -
- - - - diff --git a/reproducibility/site/src/components/InteractiveTable.astro b/reproducibility/site/src/components/InteractiveTable.astro deleted file mode 100644 index 03dc988..0000000 --- a/reproducibility/site/src/components/InteractiveTable.astro +++ /dev/null @@ -1,198 +0,0 @@ ---- -/** - * Wraps a server-rendered with a global search input + click-to-sort - * column headers. Vanilla JS — no framework island. - * - * Conventions inside the wrapped table: - * - Every is sortable. Add data-sort-skip on a in is searched against the input by concatenated - * textContent (case-insensitive). - */ - -interface Props { - /** Search placeholder text. */ - searchPlaceholder?: string; - /** Initial sort: { columnIndex, direction } */ - initialSort?: { column: number; direction: "asc" | "desc" }; -} - -const { searchPlaceholder = "Search rows…", initialSort } = Astro.props; -const initialSortAttr = initialSort - ? `${initialSort.column}:${initialSort.direction}` - : ""; ---- - -
-
-
- - -
- - 0 / 0 rows - -
- -
- - - - diff --git a/reproducibility/site/src/components/MatrixCell.astro b/reproducibility/site/src/components/MatrixCell.astro deleted file mode 100644 index 8dbd542..0000000 --- a/reproducibility/site/src/components/MatrixCell.astro +++ /dev/null @@ -1,51 +0,0 @@ ---- -/** - * One cell in any of the leaderboard tables. - * - * Always renders two metric spans (primary + secondary) layered on top of - * each other; FilterChips' metric-toggle flips visibility via the global - * .qg-cell-primary / .qg-cell-secondary classes. The cell exposes - * data-primary-value and data-secondary-value so InteractiveTable's sort - * picks up whichever metric is currently visible. Missing values render - * as a muted em-dash so layout stays stable when one metric is absent. - */ -interface Cell { - value: number; - best: boolean; -} -interface Props { - primary?: Cell; - secondary?: Cell | null; - runId?: string | null; - digits?: number; -} -const { primary, secondary, runId, digits = 4 } = Astro.props; -const primaryValue = primary?.value ?? ""; -const secondaryValue = secondary?.value ?? ""; -const hasPrimary = primary !== undefined; -const hasSecondary = secondary != null && secondary !== undefined; -const linkable = runId && (hasPrimary || hasSecondary); -const primaryText = hasPrimary ? primary!.value.toFixed(digits) : "—"; -const secondaryText = hasSecondary ? secondary!.value.toFixed(digits) : "—"; -const primaryClass = `qg-cell-primary${primary?.best ? " qg-cell-best" : ""}`; -const secondaryClass = `qg-cell-secondary hidden${secondary?.best ? " qg-cell-best" : ""}`; ---- - - diff --git a/reproducibility/site/src/layouts/Default.astro b/reproducibility/site/src/layouts/Default.astro index d425a5a..9a306c9 100644 --- a/reproducibility/site/src/layouts/Default.astro +++ b/reproducibility/site/src/layouts/Default.astro @@ -36,13 +36,13 @@ const navLinks = [ - +
-
+
diff --git a/reproducibility/site/src/lib/reproduce.ts b/reproducibility/site/src/lib/reproduce.ts new file mode 100644 index 0000000..ba77217 --- /dev/null +++ b/reproducibility/site/src/lib/reproduce.ts @@ -0,0 +1,127 @@ +/** + * Build the three-step reproduce commands (reformulate → retrieve → evaluate) + * from a run summary. Shared by the home-matrix expand panel and the + * `/runs/[run_id]` detail page so the commands stay in sync. + */ + +export interface RunLike { + run_id?: string; + dataset_id: string; + method_id: string; + model: string; + retriever_id?: string; + metrics?: Record; + config?: { + method_params?: Record; + llm_config?: { temperature?: number; max_tokens?: number; [k: string]: unknown }; + dataset_config?: { topics?: string; index?: string; [k: string]: unknown }; + retrieval?: { + paradigm?: string; + retriever_id?: string; + params?: Record; + }; + }; +} + +export interface ReproduceCmds { + reformulate: string; + retrieve: string | null; + evaluate: string; + paradigm: string; + qrels: string; +} + +// method_params we surface in the reproduce snippet — strip locally-pathy keys +// that won't apply on a fresh checkout. +const PARAM_KEYS_TO_DROP = new Set([ + "judge_rel_mode", + "collection_path", + "train_queries_path", + "train_qrels_path", + "dataset_type", +]); + +export function buildReproduceCmds(run: RunLike): ReproduceCmds { + const cfg = run.config ?? {}; + const retrieval = cfg.retrieval ?? {}; + const dsCfg = cfg.dataset_config ?? {}; + const llm = cfg.llm_config ?? {}; + const methodParams = (cfg.method_params ?? {}) as Record; + + const cleanParams: Record = {}; + for (const [k, v] of Object.entries(methodParams)) { + if (!PARAM_KEYS_TO_DROP.has(k)) cleanParams[k] = v; + } + const paramsJson = Object.keys(cleanParams).length + ? JSON.stringify(cleanParams) + : null; + + const reformulate = `python examples/querygym_pyserini/pipeline.py \\ + --dataset ${run.dataset_id} \\ + --method ${run.method_id} \\ + --model ${run.model} \\ + --steps reformulate \\ + --temperature ${llm.temperature ?? 1.0} \\ + --max-tokens ${llm.max_tokens ?? 128} \\${paramsJson ? ` + --method-params '${paramsJson}' \\` : ""} + --output-dir outputs/reproduce`; + + const paradigm = retrieval.paradigm ?? ""; + const params = (retrieval.params ?? {}) as Record; + // BEIR BM25 indexes carry a `.flat` suffix; SPLADE/BGE variants drop it. + const baseIndex = String(dsCfg.index ?? "").replace(/\.flat$/, ""); + + let retrieve: string | null = null; + if (paradigm === "lexical") { + retrieve = `python -m pyserini.search.lucene \\ + --threads 16 --batch-size 128 \\ + --index ${dsCfg.index ?? ""} \\ + --topics outputs/reproduce/queries/reformulated_queries.tsv \\ + --bm25 --k1 ${params.k1 ?? 0.9} --b ${params.b ?? 0.4} \\ + --output run.txt \\ + --hits 1000`; + } else if (paradigm === "learned_sparse") { + retrieve = `python -m pyserini.search.lucene \\ + --threads 16 --batch-size 128 \\ + --index ${baseIndex || ""}.splade-pp-ed \\ + --topics outputs/reproduce/queries/reformulated_queries.tsv \\ + --encoder ${params.model ?? "naver/splade-cocondenser-ensembledistil"} \\ + --output run.txt \\ + --hits 1000 --impact`; + } else if (paradigm === "dense") { + retrieve = `python -m pyserini.search.faiss \\ + --threads 16 --batch-size 128 \\ + --index ${baseIndex || ""}.bge-base-en-v1.5 \\ + --topics outputs/reproduce/queries/reformulated_queries.tsv \\ + --encoder ${params.encoder ?? "BAAI/bge-base-en-v1.5"} \\ + --output run.txt \\ + --hits 1000`; + } + + const trecMetrics = Object.keys(run.metrics ?? {}) + .map((m) => m.replace(/_/g, ".")) + .join(" -m "); + const qrels = dsCfg.topics ?? ""; + const evaluate = `python -m pyserini.eval.trec_eval -c -m ${trecMetrics || "ndcg_cut.10"} \\ + ${qrels} run.txt`; + + return { reformulate, retrieve, evaluate, paradigm, qrels }; +} + +/** Pretty hint for the retrieve step header. */ +export function retrieveHint(retrieverDisplay: string, paradigm: string): string { + return `pyserini · ${retrieverDisplay}${paradigm ? ` (${paradigm})` : ""}`; +} + +/** Pretty hint for the evaluate step header. */ +export function evaluateHint(metricKeys: string[]): string { + if (!metricKeys.length) return "trec_eval"; + const pretty = metricKeys.map((k) => + k === "ndcg_cut_10" ? "nDCG@10" + : k === "recall_100" ? "R@100" + : k === "recall_1000" ? "R@1k" + : k === "map" ? "MAP" + : k + ); + return `trec_eval · ${pretty.join(" + ")}`; +} diff --git a/reproducibility/site/src/pages/datasets/[id].astro b/reproducibility/site/src/pages/datasets/[id].astro index ae78727..936d1d4 100644 --- a/reproducibility/site/src/pages/datasets/[id].astro +++ b/reproducibility/site/src/pages/datasets/[id].astro @@ -1,9 +1,9 @@ --- import Default from "../../layouts/Default.astro"; import EmptyState from "../../components/EmptyState.astro"; -import InteractiveTable from "../../components/InteractiveTable.astro"; -import FilterChips from "../../components/FilterChips.astro"; import datasetsList from "../../data/datasets.json"; +import runs from "../../data/runs.json"; +import { buildReproduceCmds, retrieveHint, evaluateHint, type RunLike } from "../../lib/reproduce"; const shards = import.meta.glob<{ default: any }>( "../../data/views/dataset-*.json", @@ -26,113 +26,341 @@ const METRIC_LABEL: Record = { map: "MAP", }; +const SHORT: Record = { + "msmarco-v1-passage.trecdl2019": "DL 2019", + "msmarco-v1-passage.trecdl2020": "DL 2020", + "msmarco-v1-passage.dlhard": "DL-HARD", + "beir-v1.0.0-scifact": "SciFact", + "beir-v1.0.0-arguana": "ArguAna", + "beir-v1.0.0-trec-covid": "COVID", + "beir-v1.0.0-fiqa": "FiQA", + "beir-v1.0.0-dbpedia-entity": "DBPedia", + "beir-v1.0.0-trec-news": "News", +}; + const { id } = Astro.params; const view = shardFor(id!); const datasetMeta = datasetsList.find((d: any) => d.id === id); -const runs = (view?.runs ?? []) as any[]; +const dataRows = (view?.runs ?? []) as any[]; const metricCols: string[] = view?.metric_columns ?? []; -const tableId = "qg-dataset-table"; +const title = SHORT[id!] ?? datasetMeta?.name ?? id ?? "Dataset"; const uniq = (xs: any[], key: string, displayKey?: string) => { const m = new Map(); for (const r of xs) m.set(r[key], r[displayKey ?? key] ?? r[key]); return Array.from(m.entries()).sort((a, b) => a[0].localeCompare(b[0])); }; -const methodChoices = uniq(runs, "method_id", "method_display"); -const modelChoices = uniq(runs, "model", "model_display"); -const retrieverChoices = uniq(runs, "retriever_id", "retriever_display"); +const methodChoices = uniq(dataRows, "method_id", "method_display"); +const modelChoices = uniq(dataRows, "model", "model_display"); +const retrieverChoices = uniq(dataRows, "retriever_id", "retriever_display"); + +const runsMap = runs as Record; +type Step = { num: number; title: string; hint: string; code: string }; +const reproCache: Record = {}; +function stepsFor(runId: string): Step[] | null { + if (!runId) return null; + if (reproCache[runId]) return reproCache[runId]; + const r = runsMap[runId] as RunLike | undefined; + if (!r) return null; + const cmds = buildReproduceCmds(r); + const retrieverDisp = (r as any).retriever_display ?? (r as any).retriever_id ?? ""; + const steps: Step[] = [ + { num: 1, title: "reformulate", hint: "querygym → reformulated_queries.tsv", code: cmds.reformulate }, + ]; + if (cmds.retrieve) { + steps.push({ num: 2, title: "retrieve", hint: retrieveHint(retrieverDisp, cmds.paradigm), code: cmds.retrieve }); + } + steps.push({ + num: cmds.retrieve ? 3 : 2, + title: "evaluate", + hint: evaluateHint(Object.keys(r.metrics ?? {})), + code: cmds.evaluate, + }); + reproCache[runId] = steps; + return steps; +} + +const colCount = 4 + metricCols.length; --- - - ← All datasets -

- {datasetMeta?.name ?? id} -

-
{id}
- - {runs.length === 0 ? ( -
- -
+ + {dataRows.length === 0 ? ( + <> + ← All datasets +
+ +
+ ) : ( -
-
- ({ value: v, label: l }))] }, - { key: "model", label: "Model", - values: [{ value: "", label: "All" }, ...modelChoices.map(([v, l]) => ({ value: v, label: l }))] }, - { key: "retriever", label: "Retriever", - values: [{ value: "", label: "All" }, ...retrieverChoices.map(([v, l]) => ({ value: v, label: l }))] }, - ]} - /> + <> +
+
+ ← Datasets +

{title}

+ {id} +
+ + All results produced by + QueryGym + · fully reproducible! + +
+

+ {dataRows.length} (method × LLM × retriever) configurations evaluated on this dataset. +
+ Click any row or the + button to expand. The three steps + (reformulate → retrieve → evaluate) for that run appear inline. +

+
+ +
+
+
+ Retriever + + {retrieverChoices.map(([v, l]) => ( + + ))} +
+
+ Model + + {modelChoices.map(([v, l]) => ( + + ))} +
+
+ Method + + {methodChoices.map(([v, l]) => ( + + ))} +
+
+
+
+
+ + +
+ {dataRows.length} / {dataRows.length} runs +
+ best in column +
- -
-
-
in
to skip it. - * - Each can have data-sort-value="" to override the - * visible text for sorting (useful for cells containing links or formatted - * numbers). When absent, the cell's textContent is used. - * - Each
- {linkable ? ( - - {primaryText} - {secondaryText} - - ) : ( - <> - {primaryText} - {secondaryText} - - )} -
- - - - - - {metricCols.map((m) => ( - - ))} - - - - {runs.map((r: any) => ( - - - - - {metricCols.map((m) => { - const v = r.metrics?.[m]; - const isBest = !!r.best_for?.[m]; - const runId = r.run_ids?.[m] ?? r.run_id; - return ( - - ); - })} - +
+
+
MethodModelRetriever - {METRIC_LABEL[m] ?? m} -
{r.method_display ?? r.method_id}{r.model_display ?? r.model}{r.retriever_display ?? r.retriever_id} - {v == null ? ( - - ) : runId ? ( - - {v.toFixed(4)} - - ) : ( - {v.toFixed(4)} - )} -
+ + + + + + + {metricCols.map((m, i) => ( + ))} - -
MethodLLMRetriever + {METRIC_LABEL[m] ?? m} +
- + + + + {dataRows.map((r: any, i: number) => { + const steps = stepsFor(r.run_id); + return ( + <> + + + {r.method_display ?? r.method_id} + {r.model_display ?? r.model} + {r.retriever_display ?? r.retriever_id} + {metricCols.map((m, mi) => { + const v = r.metrics?.[m]; + const isBest = !!r.best_for?.[m]; + return ( + {v == null ? "—" : v.toFixed(4)} + ); + })} + + + +
+
+
+ method{r.method_display ?? r.method_id} + llm{r.model_display ?? r.model} + retriever{r.retriever_display ?? r.retriever_id} + dataset{title} +
+ {steps ? ( + <> + {steps.map((st) => ( +
+
+
+ {st.num} + {st.title} + {st.hint} +
+ +
+
{st.code}
+
+ ))} + + + ) : ( +
No run config available.
+ )} +
+
+ + + + ); + })} + + - - + + )} + + diff --git a/reproducibility/site/src/pages/index.astro b/reproducibility/site/src/pages/index.astro index 10feda8..46e6a72 100644 --- a/reproducibility/site/src/pages/index.astro +++ b/reproducibility/site/src/pages/index.astro @@ -1,13 +1,12 @@ --- import Default from "../layouts/Default.astro"; -import Stat from "../components/Stat.astro"; -import InteractiveTable from "../components/InteractiveTable.astro"; -import FilterChips from "../components/FilterChips.astro"; -import MatrixCell from "../components/MatrixCell.astro"; import overview from "../data/overview.json"; import matrix from "../data/matrix.json"; +import runs from "../data/runs.json"; import retrievers from "../data/retrievers.json"; import models from "../data/models.json"; +import methods from "../data/methods.json"; +import { buildReproduceCmds, retrieveHint, evaluateHint, type RunLike } from "../lib/reproduce"; const populated = overview.run_count > 0; @@ -37,124 +36,548 @@ const rows = [...matrix.rows].sort((a: any, b: any) => { }); const datasetCols = matrix.dataset_columns; + +const beirCols = datasetCols.filter((d: any) => d.id.startsWith("beir-")); +const dlCols = datasetCols.filter((d: any) => !d.id.startsWith("beir-")); + +// pre-build reproduce cmds for every (row, dataset) cell that has a run. +// Keyed by run_id so the same lookup powers all tabs. +const runsMap = runs as Record; +type Step = { num: number; title: string; hint: string; code: string }; +const reproCache: Record = {}; +function stepsFor(runId: string): Step[] | null { + if (!runId) return null; + if (reproCache[runId]) return reproCache[runId]; + const r = runsMap[runId] as RunLike | undefined; + if (!r) return null; + const cmds = buildReproduceCmds(r); + const retrieverDisp = (r as any).retriever_display ?? (r as any).retriever_id ?? ""; + const steps: Step[] = [ + { num: 1, title: "reformulate", hint: "querygym → reformulated_queries.tsv", code: cmds.reformulate }, + ]; + if (cmds.retrieve) { + steps.push({ num: 2, title: "retrieve", hint: retrieveHint(retrieverDisp, cmds.paradigm), code: cmds.retrieve }); + } + steps.push({ + num: cmds.retrieve ? 3 : 2, + title: "evaluate", + hint: evaluateHint(Object.keys(r.metrics ?? {})), + code: cmds.evaluate, + }); + reproCache[runId] = steps; + return steps; +} --- -
-

QueryGym Leaderboard

-

- Reproducible LLM-based query reformulation results across MS MARCO DL, - DL-HARD, and BEIR — for BM25, SPLADE++, and BGE retrievers. Click any - score to see how to reproduce that run. -

-
- -
- - - - - -
+ {populated && ( +
+
+

Main Results

+
+ + All results produced by + QueryGym + · fully reproducible! + +
+

+ Query reformulation methods × LLMs × retrievers benchmarked across BEIR, MS MARCO DL, and DL-HARD. +
+ Click any row or the + button to expand. Tabs switch dataset + context. The three steps (reformulate → retrieve → evaluate) update accordingly. +

+
+ )} {populated && ( -
- ({ value: r.id, label: r.display_name })), - ], - }, - { - key: "model", - label: "Model", - values: [ - { value: "", label: "All" }, - ...models.map((m: any) => ({ value: m.id, label: m.display ?? m.id })), - ], - }, - { - key: "metric", - label: "Metric", - values: [ - { value: "primary", label: "nDCG@10" }, - { value: "secondary", label: "Recall" }, - ], - }, - ]} - /> +
+
+
+ Retriever + + {retrievers.map((r: any) => ( + + ))} +
+
+ Model + + {models.map((m: any) => ( + + ))} +
+
+ Method + + {methods.map((m: any) => ( + + ))} +
+
+ Datasets +
+ + +
+
+
+ Metric +
+ + + +
+
+
+
+
+
+ + +
+ {rows.length} / {rows.length} configs · {overview.run_count} runs +
+ best in column +
)} {populated ? ( - -
-
- - - - - - - {datasetCols.map((d: any) => ( - + + + {rows.map((row: any, i: number) => { + const datasetsWithRuns = datasetCols.filter((d: any) => row.run_ids?.[d.id]); + const firstDsId = datasetsWithRuns[0]?.id; + return ( + <> + + + + + + {datasetCols.map((d: any, di: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + const s = d.secondary_metric ? cell[d.secondary_metric] : null; + const pTxt = p ? p.value.toFixed(4) : "—"; + const sTxt = s ? s.value.toFixed(4) : "—"; + const pBest = p?.best ? " best" : ""; + const sBest = s?.best ? " best" : ""; + const first = di === 0 ? " first" : ""; + return ( + <> + + + + ); + })} + + + + + + ); + })} + +
MethodModelRetriever -
{SHORT[d.id] ?? d.name}
-
- {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} -
- +
+
+ + + + + + + + {datasetCols.map((d: any) => ( + + ))} + + + {datasetCols.map((d: any, di: number) => ( + <> + - ))} - - - - {rows.map((row: any) => ( - - - - - {datasetCols.map((d: any) => { - const cell = row.values?.[d.id] ?? {}; - return ( - - ); - })} - + + ))} - -
MethodLLMRetriever{SHORT[d.id] ?? d.name}
+ {METRIC_LABEL[d.primary_metric] ?? d.primary_metric}
{row.method_display ?? row.method_id}{row.model_display ?? row.model}{row.retriever_display ?? row.retriever_id}
+ {METRIC_LABEL[d.secondary_metric] ?? d.secondary_metric} +
-
+
{row.method_display ?? row.method_id}{row.model_display ?? row.model}{row.retriever_display ?? row.retriever_id}{pTxt}{sTxt}
+
+
+
+ method{row.method_display ?? row.method_id} + llm{row.model_display ?? row.model} + retriever{row.retriever_display ?? row.retriever_id} +
+ {datasetsWithRuns.length > 0 ? ( + <> +
+ {datasetsWithRuns.map((d: any, ti: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + return ( + + ); + })} +
+
+ {datasetsWithRuns.map((d: any, ti: number) => { + const runId = row.run_ids[d.id]; + const steps = stepsFor(runId); + return ( +
+ {steps?.map((st) => ( +
+
+
+ {st.num} + {st.title} + {st.hint} +
+ +
+
{st.code}
+
+ ))} + +
+ ); + })} +
+ + ) : ( +
No runs recorded for this configuration.
+ )} +
+
+
- +
) : (
No runs yet. The matrix will populate when results land.
)} + + + diff --git a/reproducibility/site/src/pages/methods/[id].astro b/reproducibility/site/src/pages/methods/[id].astro index 6e14178..add49d2 100644 --- a/reproducibility/site/src/pages/methods/[id].astro +++ b/reproducibility/site/src/pages/methods/[id].astro @@ -1,10 +1,10 @@ --- import Default from "../../layouts/Default.astro"; import EmptyState from "../../components/EmptyState.astro"; -import InteractiveTable from "../../components/InteractiveTable.astro"; -import FilterChips from "../../components/FilterChips.astro"; -import MatrixCell from "../../components/MatrixCell.astro"; import methods from "../../data/methods.json"; +import runs from "../../data/runs.json"; +import matrix from "../../data/matrix.json"; +import { buildReproduceCmds, retrieveHint, evaluateHint, type RunLike } from "../../lib/reproduce"; const shards = import.meta.glob<{ default: any }>( "../../data/views/method-*.json", @@ -40,10 +40,13 @@ const METRIC_LABEL: Record = { ndcg_cut_10: "nDCG@10", recall_1000: "R@1k", recall_100: "R@100", map: "MAP", }; -const datasetCols = (await import("../../data/matrix.json")).default.dataset_columns; -const title = view?.method_display ?? meta?.display ?? id ?? "Method"; -const tableId = "qg-method-table"; +const datasetCols = matrix.dataset_columns; +const beirCols = datasetCols.filter((d: any) => d.id.startsWith("beir-")); +const dlCols = datasetCols.filter((d: any) => !d.id.startsWith("beir-")); +const title = view?.method_display ?? meta?.display_name ?? meta?.display ?? id ?? "Method"; + +// distinct axis values for chip filters const uniq = (xs: any[], key: string, displayKey?: string) => { const m = new Map(); for (const r of xs) m.set(r[key], r[displayKey ?? key] ?? r[key]); @@ -51,85 +54,506 @@ const uniq = (xs: any[], key: string, displayKey?: string) => { }; const modelChoices = uniq(rows, "model", "model_display"); const retrieverChoices = uniq(rows, "retriever_id", "retriever_display"); + +// build the reproduce steps for each (row, dataset) cell that has a run +const runsMap = runs as Record; +type Step = { num: number; title: string; hint: string; code: string }; +const reproCache: Record = {}; +function stepsFor(runId: string): Step[] | null { + if (!runId) return null; + if (reproCache[runId]) return reproCache[runId]; + const r = runsMap[runId] as RunLike | undefined; + if (!r) return null; + const cmds = buildReproduceCmds(r); + const retrieverDisp = (r as any).retriever_display ?? (r as any).retriever_id ?? ""; + const steps: Step[] = [ + { num: 1, title: "reformulate", hint: "querygym → reformulated_queries.tsv", code: cmds.reformulate }, + ]; + if (cmds.retrieve) { + steps.push({ num: 2, title: "retrieve", hint: retrieveHint(retrieverDisp, cmds.paradigm), code: cmds.retrieve }); + } + steps.push({ + num: cmds.retrieve ? 3 : 2, + title: "evaluate", + hint: evaluateHint(Object.keys(r.metrics ?? {})), + code: cmds.evaluate, + }); + reproCache[runId] = steps; + return steps; +} --- - ← All methods -

{title}

-
{id}
-
{rows.length} model × retriever combinations
- {rows.length === 0 ? ( -
- -
+ <> + ← All methods +
+ ) : ( <> -
- ({ value: v, label: l }))] }, - { key: "retriever", label: "Retriever", - values: [{ value: "", label: "All" }, ...retrieverChoices.map(([v, l]) => ({ value: v, label: l }))] }, - { key: "metric", label: "Metric", - values: [{ value: "primary", label: "nDCG@10" }, { value: "secondary", label: "Recall" }] }, - ]} - /> +
+
+ ← Methods +

{title}

+ {id} +
+ + All results produced by + QueryGym + · fully reproducible! + +
+

+ {rows.length} model × retriever configurations for this method across BEIR, MS MARCO DL, and DL-HARD. +
+ Click any row or the + button to expand. Tabs switch dataset + context. The three steps (reformulate → retrieve → evaluate) update accordingly. +

+
+ +
+
+
+ Retriever + + {retrieverChoices.map(([v, l]) => ( + + ))} +
+
+ Model + + {modelChoices.map(([v, l]) => ( + + ))} +
+
+ Datasets +
+ + +
+
+
+ Metric +
+ + + +
+
+
+
+
+
+ + +
+ {rows.length} / {rows.length} configs +
+ best in column +
- -
-
- - - - - - {datasetCols.map((d: any) => ( - + + + {rows.map((row: any, i: number) => { + const datasetsWithRuns = datasetCols.filter((d: any) => row.run_ids?.[d.id]); + return ( + <> + + + + + {datasetCols.map((d: any, di: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + const s = d.secondary_metric ? cell[d.secondary_metric] : null; + const pTxt = p ? p.value.toFixed(4) : "—"; + const sTxt = s ? s.value.toFixed(4) : "—"; + return ( + <> + + + + ); + })} + + + + + + ); + })} + +
ModelRetriever -
{SHORT[d.id] ?? d.name}
-
- {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} -
- +
+
+ + + + + + + {datasetCols.map((d: any) => ( + + ))} + + + {datasetCols.map((d: any, di: number) => ( + <> + - ))} - - - - {rows.map((row: any) => ( - - - - {datasetCols.map((d: any) => { - const cell = row.values?.[d.id] ?? {}; - return ( - - ); - })} - + + ))} - -
ModelRetriever{SHORT[d.id] ?? d.name}
+ {METRIC_LABEL[d.primary_metric] ?? d.primary_metric}
{row.model_display ?? row.model}{row.retriever_display ?? row.retriever_id}
+ {METRIC_LABEL[d.secondary_metric] ?? d.secondary_metric} +
-
+
{row.model_display ?? row.model}{row.retriever_display ?? row.retriever_id}{pTxt}{sTxt}
+
+
+
+ method{title} + llm{row.model_display ?? row.model} + retriever{row.retriever_display ?? row.retriever_id} +
+ {datasetsWithRuns.length > 0 ? ( + <> +
+ {datasetsWithRuns.map((d: any, ti: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + return ( + + ); + })} +
+
+ {datasetsWithRuns.map((d: any, ti: number) => { + const runId = row.run_ids[d.id]; + const steps = stepsFor(runId); + return ( +
+ {steps?.map((st) => ( +
+
+
+ {st.num} + {st.title} + {st.hint} +
+ +
+
{st.code}
+
+ ))} + +
+ ); + })} +
+ + ) : ( +
No runs recorded.
+ )} +
+
+
- +
)} + + diff --git a/reproducibility/site/src/pages/models/[id].astro b/reproducibility/site/src/pages/models/[id].astro index b7ae9c9..1d554fb 100644 --- a/reproducibility/site/src/pages/models/[id].astro +++ b/reproducibility/site/src/pages/models/[id].astro @@ -1,10 +1,10 @@ --- import Default from "../../layouts/Default.astro"; import EmptyState from "../../components/EmptyState.astro"; -import InteractiveTable from "../../components/InteractiveTable.astro"; -import FilterChips from "../../components/FilterChips.astro"; -import MatrixCell from "../../components/MatrixCell.astro"; import models from "../../data/models.json"; +import runs from "../../data/runs.json"; +import matrix from "../../data/matrix.json"; +import { buildReproduceCmds, retrieveHint, evaluateHint, type RunLike } from "../../lib/reproduce"; const shards = import.meta.glob<{ default: any }>( "../../data/views/model-*.json", @@ -40,9 +40,11 @@ const METRIC_LABEL: Record = { ndcg_cut_10: "nDCG@10", recall_1000: "R@1k", recall_100: "R@100", map: "MAP", }; -const datasetCols = (await import("../../data/matrix.json")).default.dataset_columns; +const datasetCols = matrix.dataset_columns; +const beirCols = datasetCols.filter((d: any) => d.id.startsWith("beir-")); +const dlCols = datasetCols.filter((d: any) => !d.id.startsWith("beir-")); + const title = meta?.display ?? view?.model ?? id ?? "Model"; -const tableId = "qg-model-table"; const uniq = (xs: any[], key: string, displayKey?: string) => { const m = new Map(); @@ -51,84 +53,497 @@ const uniq = (xs: any[], key: string, displayKey?: string) => { }; const methodChoices = uniq(rows, "method_id", "method_display"); const retrieverChoices = uniq(rows, "retriever_id", "retriever_display"); + +const runsMap = runs as Record; +type Step = { num: number; title: string; hint: string; code: string }; +const reproCache: Record = {}; +function stepsFor(runId: string): Step[] | null { + if (!runId) return null; + if (reproCache[runId]) return reproCache[runId]; + const r = runsMap[runId] as RunLike | undefined; + if (!r) return null; + const cmds = buildReproduceCmds(r); + const retrieverDisp = (r as any).retriever_display ?? (r as any).retriever_id ?? ""; + const steps: Step[] = [ + { num: 1, title: "reformulate", hint: "querygym → reformulated_queries.tsv", code: cmds.reformulate }, + ]; + if (cmds.retrieve) { + steps.push({ num: 2, title: "retrieve", hint: retrieveHint(retrieverDisp, cmds.paradigm), code: cmds.retrieve }); + } + steps.push({ + num: cmds.retrieve ? 3 : 2, + title: "evaluate", + hint: evaluateHint(Object.keys(r.metrics ?? {})), + code: cmds.evaluate, + }); + reproCache[runId] = steps; + return steps; +} --- - ← All models -

{title}

-
{rows.length} method × retriever combinations
- {rows.length === 0 ? ( -
- -
+ <> + ← All models +
+ ) : ( <> -
- ({ value: v, label: l }))] }, - { key: "retriever", label: "Retriever", - values: [{ value: "", label: "All" }, ...retrieverChoices.map(([v, l]) => ({ value: v, label: l }))] }, - { key: "metric", label: "Metric", - values: [{ value: "primary", label: "nDCG@10" }, { value: "secondary", label: "Recall" }] }, - ]} - /> +
+
+ ← Models +

{title}

+
+ + All results produced by + QueryGym + · fully reproducible! + +
+

+ {rows.length} method × retriever configurations using this LLM across BEIR, MS MARCO DL, and DL-HARD. +
+ Click any row or the + button to expand. Tabs switch dataset + context. The three steps (reformulate → retrieve → evaluate) update accordingly. +

+
+ +
+
+
+ Retriever + + {retrieverChoices.map(([v, l]) => ( + + ))} +
+
+ Method + + {methodChoices.map(([v, l]) => ( + + ))} +
+
+ Datasets +
+ + +
+
+
+ Metric +
+ + + +
+
+
+
+
+
+ + +
+ {rows.length} / {rows.length} configs +
+ best in column +
- -
-
- - - - - - {datasetCols.map((d: any) => ( - + + + {rows.map((row: any, i: number) => { + const datasetsWithRuns = datasetCols.filter((d: any) => row.run_ids?.[d.id]); + return ( + <> + + + + + {datasetCols.map((d: any, di: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + const s = d.secondary_metric ? cell[d.secondary_metric] : null; + const pTxt = p ? p.value.toFixed(4) : "—"; + const sTxt = s ? s.value.toFixed(4) : "—"; + return ( + <> + + + + ); + })} + + + + + + ); + })} + +
MethodRetriever -
{SHORT[d.id] ?? d.name}
-
- {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} -
- +
+
+ + + + + + + {datasetCols.map((d: any) => ( + + ))} + + + {datasetCols.map((d: any, di: number) => ( + <> + + - ))} - - - - {rows.map((row: any) => ( - - - - {datasetCols.map((d: any) => { - const cell = row.values?.[d.id] ?? {}; - return ( - - ); - })} - + ))} - -
MethodRetriever{SHORT[d.id] ?? d.name}
+ {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} + + {METRIC_LABEL[d.secondary_metric] ?? d.secondary_metric}
{row.method_display ?? row.method_id}{row.retriever_display ?? row.retriever_id}
-
+
{row.method_display ?? row.method_id}{row.retriever_display ?? row.retriever_id}{pTxt}{sTxt}
+
+
+
+ method{row.method_display ?? row.method_id} + llm{title} + retriever{row.retriever_display ?? row.retriever_id} +
+ {datasetsWithRuns.length > 0 ? ( + <> +
+ {datasetsWithRuns.map((d: any, ti: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + return ( + + ); + })} +
+
+ {datasetsWithRuns.map((d: any, ti: number) => { + const runId = row.run_ids[d.id]; + const steps = stepsFor(runId); + return ( +
+ {steps?.map((st) => ( +
+
+
+ {st.num} + {st.title} + {st.hint} +
+ +
+
{st.code}
+
+ ))} + +
+ ); + })} +
+ + ) : ( +
No runs recorded.
+ )} +
+
+
- +
)} + + diff --git a/reproducibility/site/src/pages/retrievers/[id].astro b/reproducibility/site/src/pages/retrievers/[id].astro index 5166c7b..145a9a9 100644 --- a/reproducibility/site/src/pages/retrievers/[id].astro +++ b/reproducibility/site/src/pages/retrievers/[id].astro @@ -1,10 +1,10 @@ --- import Default from "../../layouts/Default.astro"; import EmptyState from "../../components/EmptyState.astro"; -import InteractiveTable from "../../components/InteractiveTable.astro"; -import FilterChips from "../../components/FilterChips.astro"; -import MatrixCell from "../../components/MatrixCell.astro"; import retrievers from "../../data/retrievers.json"; +import runs from "../../data/runs.json"; +import matrix from "../../data/matrix.json"; +import { buildReproduceCmds, retrieveHint, evaluateHint, type RunLike } from "../../lib/reproduce"; const shards = import.meta.glob<{ default: any }>( "../../data/views/retriever-*.json", @@ -40,9 +40,11 @@ const METRIC_LABEL: Record = { ndcg_cut_10: "nDCG@10", recall_1000: "R@1k", recall_100: "R@100", map: "MAP", }; -const datasetCols = (await import("../../data/matrix.json")).default.dataset_columns; +const datasetCols = matrix.dataset_columns; +const beirCols = datasetCols.filter((d: any) => d.id.startsWith("beir-")); +const dlCols = datasetCols.filter((d: any) => !d.id.startsWith("beir-")); + const title = meta?.display_name ?? id ?? "Retriever"; -const tableId = "qg-retriever-table"; const uniq = (xs: any[], key: string, displayKey?: string) => { const m = new Map(); @@ -51,85 +53,498 @@ const uniq = (xs: any[], key: string, displayKey?: string) => { }; const methodChoices = uniq(rows, "method_id", "method_display"); const modelChoices = uniq(rows, "model", "model_display"); + +const runsMap = runs as Record; +type Step = { num: number; title: string; hint: string; code: string }; +const reproCache: Record = {}; +function stepsFor(runId: string): Step[] | null { + if (!runId) return null; + if (reproCache[runId]) return reproCache[runId]; + const r = runsMap[runId] as RunLike | undefined; + if (!r) return null; + const cmds = buildReproduceCmds(r); + const retrieverDisp = (r as any).retriever_display ?? (r as any).retriever_id ?? ""; + const steps: Step[] = [ + { num: 1, title: "reformulate", hint: "querygym → reformulated_queries.tsv", code: cmds.reformulate }, + ]; + if (cmds.retrieve) { + steps.push({ num: 2, title: "retrieve", hint: retrieveHint(retrieverDisp, cmds.paradigm), code: cmds.retrieve }); + } + steps.push({ + num: cmds.retrieve ? 3 : 2, + title: "evaluate", + hint: evaluateHint(Object.keys(r.metrics ?? {})), + code: cmds.evaluate, + }); + reproCache[runId] = steps; + return steps; +} --- - ← All retrievers -

{title}

-
{id} · {meta?.paradigm}
-
{rows.length} method × model combinations
- {rows.length === 0 ? ( -
- -
+ <> + ← All retrievers +
+ ) : ( <> -
- ({ value: v, label: l }))] }, - { key: "model", label: "Model", - values: [{ value: "", label: "All" }, ...modelChoices.map(([v, l]) => ({ value: v, label: l }))] }, - { key: "metric", label: "Metric", - values: [{ value: "primary", label: "nDCG@10" }, { value: "secondary", label: "Recall" }] }, - ]} - /> +
+
+ ← Retrievers +

{title}

+ {meta?.paradigm ?? ""} +
+ + All results produced by + QueryGym + · fully reproducible! + +
+

+ {rows.length} method × LLM configurations using this retriever across BEIR, MS MARCO DL, and DL-HARD. +
+ Click any row or the + button to expand. Tabs switch dataset + context. The three steps (reformulate → retrieve → evaluate) update accordingly. +

+
+ +
+
+
+ Model + + {modelChoices.map(([v, l]) => ( + + ))} +
+
+ Method + + {methodChoices.map(([v, l]) => ( + + ))} +
+
+ Datasets +
+ + +
+
+
+ Metric +
+ + + +
+
+
+
+
+
+ + +
+ {rows.length} / {rows.length} configs +
+ best in column +
- -
-
- - - - - - {datasetCols.map((d: any) => ( - + + + {rows.map((row: any, i: number) => { + const datasetsWithRuns = datasetCols.filter((d: any) => row.run_ids?.[d.id]); + return ( + <> + + + + + {datasetCols.map((d: any, di: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + const s = d.secondary_metric ? cell[d.secondary_metric] : null; + const pTxt = p ? p.value.toFixed(4) : "—"; + const sTxt = s ? s.value.toFixed(4) : "—"; + return ( + <> + + + + ); + })} + + + + + + ); + })} + +
MethodModel -
{SHORT[d.id] ?? d.name}
-
- {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} -
- +
+
+ + + + + + + {datasetCols.map((d: any) => ( + + ))} + + + {datasetCols.map((d: any, di: number) => ( + <> + + - ))} - - - - {rows.map((row: any) => ( - - - - {datasetCols.map((d: any) => { - const cell = row.values?.[d.id] ?? {}; - return ( - - ); - })} - + ))} - -
MethodLLM{SHORT[d.id] ?? d.name}
+ {METRIC_LABEL[d.primary_metric] ?? d.primary_metric} + + {METRIC_LABEL[d.secondary_metric] ?? d.secondary_metric}
{row.method_display ?? row.method_id}{row.model_display ?? row.model}
-
+
{row.method_display ?? row.method_id}{row.model_display ?? row.model}{pTxt}{sTxt}
+
+
+
+ method{row.method_display ?? row.method_id} + llm{row.model_display ?? row.model} + retriever{title} +
+ {datasetsWithRuns.length > 0 ? ( + <> +
+ {datasetsWithRuns.map((d: any, ti: number) => { + const cell = row.values?.[d.id] ?? {}; + const p = cell[d.primary_metric]; + return ( + + ); + })} +
+
+ {datasetsWithRuns.map((d: any, ti: number) => { + const runId = row.run_ids[d.id]; + const steps = stepsFor(runId); + return ( +
+ {steps?.map((st) => ( +
+
+
+ {st.num} + {st.title} + {st.hint} +
+ +
+
{st.code}
+
+ ))} + +
+ ); + })} +
+ + ) : ( +
No runs recorded.
+ )} +
+
+
- +
)} + + diff --git a/reproducibility/site/src/styles/global.css b/reproducibility/site/src/styles/global.css index 90940d2..6ea3a4c 100644 --- a/reproducibility/site/src/styles/global.css +++ b/reproducibility/site/src/styles/global.css @@ -10,120 +10,479 @@ .qg-card { @apply rounded-xl border border-qg-border bg-qg-bg-soft p-6 transition hover:border-qg-accent; } +} - /* Leaderboard table — the card that wraps a scrollable table with sticky - * thead and sticky axis columns. Fixed height keeps the filter card + - * page chrome in view while rows scroll inside. */ - .qg-table-card { - @apply flex h-[600px] flex-col overflow-hidden rounded-xl border border-qg-border bg-qg-bg-soft; - } - .qg-table-scroll { - @apply flex-grow overflow-auto; - } +/* ===================================================================== + * Leaderboard table — Pyserini-2cr-inspired layout (.lb-* namespace). + * Used by the home matrix and every per-X page. See + * tmp/mockups/leaderboard-pyserini-style.html for the design source. + * ===================================================================== */ - /* Filter strip — wraps chips for axes + metric toggle in one card. */ - .qg-filter-card { - @apply mb-4 rounded-xl border border-qg-border bg-qg-bg-soft p-4; - } +/* ---------- filter card -------------------------------------------------- */ +.lb-filter-card { + background: var(--qg-bg-soft); + border: 1px solid var(--qg-border); + border-radius: 12px; + padding: 12px 14px; + margin-bottom: 12px; +} +.lb-filter-row { display: flex; flex-wrap: wrap; align-items: center; gap: 12px; row-gap: 10px; } +.lb-filter-row + .lb-filter-row { + margin-top: 10px; padding-top: 10px; + border-top: 1px solid var(--qg-border-soft, var(--qg-border)); +} +.lb-filter-group { display: flex; flex-wrap: wrap; align-items: center; gap: 6px; } +.lb-filter-label { + color: var(--qg-fg-muted); font-size: 10.5px; + text-transform: uppercase; letter-spacing: 0.06em; + font-weight: 600; margin-right: 1px; } -/* ---------- Scrollbar styling ------------------------------------------- */ +.lb-chip { + font-size: 11.5px; padding: 4px 9px; border-radius: 999px; + border: 1px solid var(--qg-border); color: var(--qg-fg); + background: var(--qg-bg); cursor: pointer; user-select: none; + font-family: inherit; + transition: background 0.15s, border-color 0.15s, color 0.15s; +} +.lb-chip:hover { border-color: var(--qg-accent); } +.lb-chip.active { + background: var(--qg-accent); border-color: var(--qg-accent); color: #fff; +} -.qg-table-scroll::-webkit-scrollbar { - width: 8px; - height: 8px; +/* multi-select dropdown (Datasets) */ +.lb-multi { position: relative; } +.lb-multi-trigger { + display: inline-flex; align-items: center; gap: 7px; + font-size: 11.5px; padding: 4px 10px; border-radius: 999px; + border: 1px solid var(--qg-border); color: var(--qg-fg); + background: var(--qg-bg); cursor: pointer; user-select: none; + font-family: inherit; + transition: background 0.15s, border-color 0.15s, color 0.15s; } -.qg-table-scroll::-webkit-scrollbar-track { - background: var(--qg-bg-soft); +.lb-multi-trigger:hover { border-color: var(--qg-accent); } +.lb-multi-trigger.has-filter { + background: var(--qg-accent); border-color: var(--qg-accent); color: #fff; } -.qg-table-scroll::-webkit-scrollbar-thumb { - background: var(--qg-border); - border-radius: 4px; +.lb-multi-trigger .caret { font-size: 9px; opacity: 0.7; } +.lb-multi-panel { + position: absolute; top: calc(100% + 6px); left: 0; + min-width: 220px; + background: var(--qg-bg); + border: 1px solid var(--qg-border); + border-radius: 10px; + box-shadow: 0 10px 24px rgba(0,0,0,0.35); + padding: 8px; + z-index: 60; + display: none; +} +.lb-multi.open .lb-multi-panel { display: block; } +.lb-multi-section + .lb-multi-section { + border-top: 1px solid var(--qg-border-soft, var(--qg-border)); + margin-top: 4px; padding-top: 8px; +} +.lb-multi-section-head { + display: flex; align-items: center; justify-content: space-between; + padding: 0 4px 4px; +} +.lb-multi-section-head .label { + font-size: 10px; font-weight: 600; text-transform: uppercase; + letter-spacing: 0.06em; color: var(--qg-fg-muted); +} +.lb-multi-section-head .actions { + display: inline-flex; gap: 4px; align-items: center; font-size: 10px; + color: var(--qg-fg-muted); +} +.lb-multi-section-head .actions button { + background: transparent; border: 0; color: var(--qg-fg-muted); + cursor: pointer; padding: 0; font-family: inherit; font-size: 10px; +} +.lb-multi-section-head .actions button:hover { color: var(--qg-accent); } +.lb-multi-item { + display: flex; align-items: center; gap: 8px; + padding: 5px 6px; border-radius: 6px; + cursor: pointer; color: var(--qg-fg); font-size: 12px; } -.qg-table-scroll::-webkit-scrollbar-thumb:hover { - background: var(--qg-fg-muted); +.lb-multi-item:hover { background: var(--qg-bg-soft); } +.lb-multi-item input[type="checkbox"] { + width: 14px; height: 14px; + accent-color: var(--qg-accent); + margin: 0; cursor: pointer; } -.qg-table-scroll { + +/* segmented metric control */ +.lb-seg { + display: inline-flex; + border: 1px solid var(--qg-border); + border-radius: 8px; + overflow: hidden; + background: var(--qg-bg); + height: 26px; +} +.lb-seg-btn { + padding: 0 11px; font-size: 11.5px; + border: 0; background: transparent; + color: var(--qg-fg-muted); cursor: pointer; + font-family: inherit; font-weight: 500; + border-right: 1px solid var(--qg-border); + transition: background 0.15s, color 0.15s; + line-height: 24px; +} +.lb-seg-btn:last-child { border-right: 0; } +.lb-seg-btn:hover:not(.active) { color: var(--qg-fg); background: var(--qg-bg-soft); } +.lb-seg-btn.active { background: var(--qg-accent); color: #fff; } + +/* search input */ +.lb-search-wrap { display: flex; align-items: center; gap: 10px; flex-wrap: wrap; } +.lb-search-input { + display: flex; align-items: center; gap: 8px; + background: var(--qg-bg); border: 1px solid var(--qg-border); + border-radius: 8px; padding: 5px 9px; min-width: 240px; +} +.lb-search-input input { + background: transparent; border: 0; outline: 0; color: var(--qg-fg); + font-size: 12px; flex: 1; font-family: inherit; +} +.lb-search-input svg { color: var(--qg-fg-muted); } +.lb-row-count { font-size: 11px; color: var(--qg-fg-muted); white-space: nowrap; } + +/* best-in-column legend */ +.lb-best-legend { + display: inline-flex; align-items: center; gap: 7px; + font-size: 11px; color: var(--qg-fg-muted); + letter-spacing: 0.02em; white-space: nowrap; +} +.lb-best-legend .dot { + width: 7px; height: 7px; border-radius: 50%; + background: var(--qg-accent); + box-shadow: 0 0 5px rgba(236,72,153,0.55); +} + +/* ---------- table card --------------------------------------------------- + The card is sized by its flex parent (main) which fills the viewport. + It takes whatever vertical space remains after the filter card etc. + Inner scroll on both axes; sticky thead anchors to top:0 of the scroll + container; sticky axis cols anchor to its left. min-height:0 on the + scroll is the standard flex+overflow fix so it can actually shrink. */ +.lb-table-card { + background: var(--qg-bg-soft); + border: 1px solid var(--qg-border); + border-radius: 12px; + overflow: hidden; + display: flex; flex-direction: column; + flex: 1; + min-height: 0; +} +.lb-table-scroll { + flex: 1; overflow: auto; + min-height: 0; scrollbar-width: thin; scrollbar-color: var(--qg-border) var(--qg-bg-soft); } +.lb-table-scroll::-webkit-scrollbar { width: 9px; height: 9px; } +.lb-table-scroll::-webkit-scrollbar-track { background: var(--qg-bg-soft); } +.lb-table-scroll::-webkit-scrollbar-thumb { background: var(--qg-border); border-radius: 6px; } +.lb-table-scroll::-webkit-scrollbar-thumb:hover { background: var(--qg-fg-muted); } -/* ---------- Sticky thead inside the scroll container ------------------- */ +table.lb { + width: 100%; border-collapse: separate; border-spacing: 0; font-size: 12.5px; + --axis-w-1: 30px; --axis-w-2: 80px; --axis-w-3: 132px; --axis-w-4: 94px; +} -.qg-table-scroll thead th { +.lb thead th { position: sticky; - top: 0; - z-index: 10; - background: var(--qg-bg-soft); - box-shadow: inset 0 -1px 0 var(--qg-border); + z-index: 12; + background: var(--qg-bg-softer, var(--qg-bg-soft)); + color: var(--qg-fg-muted); + font-size: 10.5px; font-weight: 600; text-transform: uppercase; + letter-spacing: 0.05em; text-align: left; + padding: 8px 8px; + border-bottom: 1px solid var(--qg-border); + white-space: nowrap; +} +/* Two-row sticky thead: top row at top:0, bot row stacked just below it. + Without the explicit offset, both rows would pin at top:0 and the bot row + (later in DOM) would paint over the top row. */ +.lb thead tr.top th { top: 0; padding-bottom: 6px; } +.lb thead tr.bot th { + top: 28px; + padding-top: 7px; font-size: 10px; color: var(--qg-fg-muted); + border-bottom: 1px solid var(--qg-border); font-weight: 500; } +.lb thead tr.top th.group { + text-align: center; color: var(--qg-fg); + border-left: 1px solid var(--qg-border-soft, var(--qg-border)); +} +.lb thead tr.top th.group:first-of-type { border-left: 0; } +/* center-align matches the value cells; padding-left:15 compensates for the + ~13px of right-side space reserved by the inline sort arrow, so the name's + visual centerline stays aligned with the value below it. (Scope is just + `.lb thead .metric` — works whether the metric th is in tr.top or tr.bot.) */ +.lb thead .metric { + text-align: center; + padding-left: 15px; +} +.lb thead .metric.first { border-left: 1px solid var(--qg-border-soft, var(--qg-border)); } +.lb .hidden, +.lb .lb-hide-ds, +.lb .lb-hide-metric { display: none !important; } +/* tabs/panes outside the .lb table also need these */ +.lb-hide-ds, .lb-hide-metric { display: none !important; } -/* ---------- Sticky axis columns ---------------------------------------- * - * Each page declares per-table widths via inline style on the : - * style="--qg-axis-w-1: 120px; --qg-axis-w-2: 180px;" - * Then applies .qg-axis-1 / -2 / -3 on the relevant
+ . */ +/* small count badge for axis col headers ("Method (10)") */ +.lb-count { + display: inline-flex; align-items: center; justify-content: center; + min-width: 16px; padding: 0 5px; height: 14px; + margin-left: 5px; + border-radius: 999px; + background: var(--qg-bg); + border: 1px solid var(--qg-border); + font-size: 9px; font-weight: 500; + color: var(--qg-fg-muted); + letter-spacing: 0; + text-transform: none; + vertical-align: middle; +} -.qg-axis-1, -.qg-axis-2, -.qg-axis-3 { +/* sortable metric header — arrow always reserves 9px, visible only when sorted */ +.lb thead .metric { cursor: pointer; user-select: none; transition: color 0.15s; } +.lb thead .metric:hover { color: var(--qg-fg); } +.lb thead .metric .name { display: inline-block; } +.lb thead .metric .arrow { + display: inline-block; + margin-left: 4px; + width: 9px; text-align: center; + font-size: 9px; + visibility: hidden; + vertical-align: middle; + pointer-events: none; +} +.lb thead .metric.sort-asc .arrow, +.lb thead .metric.sort-desc .arrow { visibility: visible; } +.lb thead .metric.sort-asc, .lb thead .metric.sort-desc { color: var(--qg-accent); } +.lb thead .metric.sort-asc .arrow::after { content: '▲'; } +.lb thead .metric.sort-desc .arrow::after { content: '▼'; } + +/* sticky axis cols (width:1px + nowrap pins them to content width). + Z-index ladder: + thead axis intersection: 14 (covers everything when both scrolls move) + thead non-axis (datasets): 12 (from .lb thead th) + tbody axis (sticky-left): 11 + tbody non-axis: (auto, normal flow) +*/ +.lb .ax { position: sticky; - z-index: 20; - background: var(--qg-bg-soft); + background: var(--qg-bg-softer, var(--qg-bg-soft)); + z-index: 11; + width: 1px; + white-space: nowrap; } -.qg-axis-1 { left: 0; min-width: var(--qg-axis-w-1, 120px); } -.qg-axis-2 { left: var(--qg-axis-w-1, 120px); min-width: var(--qg-axis-w-2, 180px); } -.qg-axis-3 { - left: calc(var(--qg-axis-w-1, 120px) + var(--qg-axis-w-2, 180px)); - border-right: 1px solid var(--qg-border); +/* corner cells (axis × thead): stick both ways, above all */ +.lb thead .ax { z-index: 14; } +.lb tbody .ax { background: var(--qg-bg); } +.lb tbody tr.data:hover .ax { background: var(--qg-row-hover, var(--qg-bg-soft)); } +.lb tbody tr.data.expanded > .ax { background: var(--qg-bg-soft); } +.lb .ax-1 { left: 0; min-width: var(--axis-w-1); padding-left: 8px; padding-right: 4px; } +.lb .ax-2 { left: var(--axis-w-1); min-width: var(--axis-w-2); } +.lb .ax-3 { left: calc(var(--axis-w-1) + var(--axis-w-2)); min-width: var(--axis-w-3); } +.lb .ax-4 { + left: calc(var(--axis-w-1) + var(--axis-w-2) + var(--axis-w-3)); + min-width: var(--axis-w-4); border-right: 1px solid var(--qg-border); +} + +.lb tbody td { + padding: 7px 8px; + border-top: 1px solid var(--qg-border-soft, var(--qg-border)); + white-space: nowrap; +} +.lb tbody tr.data:hover td { background-color: var(--qg-row-hover, var(--qg-bg-soft)); } +.lb tbody tr.data.expanded > td { background-color: var(--qg-bg-soft); } + +/* metric value cells */ +.lb-metric-cell { + text-align: center; font-family: 'JetBrains Mono', monospace; + font-variant-numeric: tabular-nums; + min-width: 68px; +} +.lb-metric-cell.metric-primary { min-width: 80px; } +.lb-metric-cell .primary { color: var(--qg-fg); font-size: 12.5px; } +.lb-metric-cell .secondary { color: var(--qg-fg-muted); font-size: 11px; } +table.lb.mode-single .lb-metric-cell .secondary { color: var(--qg-fg); font-size: 12.5px; } +.lb-metric-cell.best .primary, +.lb-metric-cell.best .secondary { color: var(--qg-accent); font-weight: 700; } +table.lb.mode-single .lb-metric-cell.best .secondary { color: var(--qg-accent); font-weight: 700; } +[data-theme="dark"] .lb-metric-cell.best .primary, +[data-theme="dark"] .lb-metric-cell.best .secondary { text-shadow: 0 0 6px rgba(236,72,153,0.35); } + +.lb-method-name { font-weight: 600; } + +/* expand/collapse button (15x15 plus/minus rotation) */ +.lb-exp-btn { + position: relative; + width: 15px; height: 15px; + border-radius: 4px; + background: var(--qg-bg-soft); border: 1px solid var(--qg-border); + color: var(--qg-fg-muted); + cursor: pointer; + padding: 0; + transition: background 0.2s, border-color 0.2s, color 0.2s; +} +.lb-exp-btn:hover { color: var(--qg-fg); border-color: var(--qg-accent); } +.lb-exp-btn::before, .lb-exp-btn::after { + content: ''; position: absolute; + top: 50%; left: 50%; + width: 7px; height: 1.5px; + background: currentColor; + border-radius: 1px; + transform: translate(-50%, -50%); + transition: transform 0.28s cubic-bezier(.4,.0,.2,1); +} +.lb-exp-btn::after { transform: translate(-50%, -50%) rotate(90deg); } +tr.expanded .lb-exp-btn { + background: var(--qg-accent); border-color: var(--qg-accent); color: #fff; } +tr.expanded .lb-exp-btn::after { transform: translate(-50%, -50%) rotate(0deg); } -/* Body cells: lower z so sticky thead wins; different bg so columns pop - * against the soft-bg header. Hover keeps the columns in sync with the row. */ -.qg-table-scroll tbody td.qg-axis-1, -.qg-table-scroll tbody td.qg-axis-2, -.qg-table-scroll tbody td.qg-axis-3 { +/* expanded row panel — grid-rows transition for true height animation. + The td has full table width via colspan; we sticky-left the inner wrap + and clamp its width to the scroll-container's visible viewport (set as + --lb-vp-w in JS) so the panel stays in view regardless of horizontal + scroll, and DOES NOT extend the table's overall scroll width. */ +.lb-exp-row > td { + padding: 0 !important; + background: var(--qg-bg) !important; + border-top: 0 !important; +} +.lb-exp-wrap { + display: grid; + grid-template-rows: 0fr; + transition: grid-template-rows 0.32s cubic-bezier(.4,.0,.2,1); + position: sticky; + left: 0; + width: var(--lb-vp-w, 100%); + max-width: var(--lb-vp-w, 100%); +} +.lb-exp-row.show .lb-exp-wrap { grid-template-rows: 1fr; } +.lb-exp-wrap > .lb-exp-inner { overflow: hidden; min-height: 0; } +.lb-exp-panel { + padding: 16px 20px 20px; background: var(--qg-bg); - z-index: 5; -} -.qg-table-scroll tbody tr:hover td.qg-axis-1, -.qg-table-scroll tbody tr:hover td.qg-axis-2, -.qg-table-scroll tbody tr:hover td.qg-axis-3 { - background: var(--qg-row-hover); -} - -/* On narrow viewports the sticky columns would eat the whole screen; drop - * back to standard scroll so users can reach the data columns. */ -@media (max-width: 768px) { - .qg-axis-1, - .qg-axis-2, - .qg-axis-3 { - position: static; - min-width: 0; - } - .qg-axis-3 { border-right: none; } + border-top: 1px solid var(--qg-border); + border-bottom: 1px solid var(--qg-border); + opacity: 0; + transform: translateY(-4px); + transition: opacity 0.22s ease 0.06s, transform 0.22s ease 0.06s; } +.lb-exp-row.show .lb-exp-panel { opacity: 1; transform: translateY(0); } -/* ---------- Best-cell highlight ---------------------------------------- */ +.lb-exp-meta { + display: flex; flex-wrap: wrap; align-items: center; gap: 8px; + font-size: 12px; color: var(--qg-fg-muted); margin-bottom: 12px; +} +.lb-exp-meta .pill { + background: var(--qg-bg-soft); border: 1px solid var(--qg-border); + padding: 3px 9px; border-radius: 999px; font-family: 'JetBrains Mono', monospace; + font-size: 11px; color: var(--qg-fg); + display: inline-flex; align-items: center; gap: 6px; +} +.lb-exp-meta .pill strong { + color: var(--qg-fg-muted); font-weight: 500; + text-transform: uppercase; font-size: 10px; letter-spacing: 0.05em; +} -.qg-cell-best { - color: var(--qg-accent); - font-weight: 700; +/* tabs */ +.lb-tabs { + display: flex; gap: 0; + border-bottom: 1px solid var(--qg-border); + margin-bottom: 14px; overflow-x: auto; + scrollbar-width: thin; scrollbar-color: var(--qg-border) transparent; } -[data-theme="dark"] .qg-cell-best { - text-shadow: 0 0 6px rgba(236, 72, 153, 0.4); +.lb-tab { + padding: 8px 13px; font-size: 12.5px; color: var(--qg-fg-muted); + cursor: pointer; border: 0; background: transparent; + border-bottom: 2px solid transparent; white-space: nowrap; + font-family: inherit; font-weight: 500; + display: flex; align-items: center; gap: 7px; + transition: color 0.15s; } +.lb-tab:hover { color: var(--qg-fg); } +.lb-tab.active { color: var(--qg-fg); border-bottom-color: var(--qg-accent); } +.lb-tab .score { font-family: 'JetBrains Mono', monospace; font-size: 11px; color: var(--qg-fg-muted); } +.lb-tab.active .score { color: var(--qg-accent); } +.lb-tab-pane { display: none; } +.lb-tab-pane.active { display: block; } -/* ---------- Sort-arrow polish on InteractiveTable thead ---------------- */ +/* code-step blocks */ +.lb-step { + background: var(--qg-bg-soft); + border: 1px solid var(--qg-border); + border-radius: 9px; + margin-bottom: 10px; + overflow: hidden; +} +.lb-step-head { + display: flex; align-items: center; justify-content: space-between; + padding: 8px 12px; background: var(--qg-bg-softer, var(--qg-bg-soft)); + border-bottom: 1px solid var(--qg-border); +} +.lb-step-head .title { + display: flex; align-items: center; gap: 9px; + font-size: 12px; font-weight: 600; color: var(--qg-fg); +} +.lb-step-head .num { + width: 18px; height: 18px; border-radius: 999px; + background: var(--qg-accent); color: #fff; font-size: 10.5px; font-weight: 700; + display: inline-flex; align-items: center; justify-content: center; +} +.lb-step-head .hint { + color: var(--qg-fg-muted); font-size: 11px; font-weight: 400; margin-left: 4px; +} +.lb-copy-btn { + font-family: inherit; font-size: 11px; font-weight: 500; + padding: 4px 8px; border-radius: 6px; + border: 1px solid var(--qg-border); background: var(--qg-bg); + color: var(--qg-fg-muted); cursor: pointer; + display: inline-flex; align-items: center; gap: 5px; + transition: color 0.15s, border-color 0.15s; +} +.lb-copy-btn:hover { color: var(--qg-fg); border-color: var(--qg-accent); } +.lb-copy-btn.copied { color: var(--qg-accent); border-color: var(--qg-accent); } +.lb-step pre { + margin: 0; padding: 12px 14px; overflow-x: auto; + font-family: 'JetBrains Mono', monospace; font-size: 11.5px; line-height: 1.55; + color: var(--qg-fg); + scrollbar-width: thin; scrollbar-color: var(--qg-border) transparent; +} +.lb-step pre::-webkit-scrollbar { height: 7px; } +.lb-step pre::-webkit-scrollbar-thumb { background: var(--qg-border); border-radius: 6px; } +.lb-step pre .flag { color: #a78bfa; } +.lb-step pre .val { color: #6ee7b7; } + +.lb-exp-footer { + display: flex; gap: 10px; align-items: center; + margin-top: 6px; font-size: 11.5px; color: var(--qg-fg-muted); + flex-wrap: wrap; +} +.lb-exp-footer a { color: var(--qg-accent); text-decoration: none; } +.lb-exp-footer a:hover { text-decoration: underline; } -.qg-itable table thead th .qg-sort-arrow { - opacity: 0.4; - font-size: 0.75rem; - margin-left: 0.25rem; +/* responsive */ +@media (max-width: 1280px) { + table.lb { --axis-w-2: 76px; --axis-w-3: 122px; --axis-w-4: 88px; } + .lb-metric-cell { min-width: 62px; } + .lb-metric-cell.metric-primary { min-width: 74px; } } -.qg-itable table thead th:hover .qg-sort-arrow { - opacity: 0.7; +@media (max-width: 1100px) { + table.lb { font-size: 12px; } + .lb-metric-cell { min-width: 58px; } + .lb-metric-cell.metric-primary { min-width: 68px; } + .lb tbody td, .lb thead th { padding: 6px 6px; } } -.qg-itable table thead th[data-sort-dir] .qg-sort-arrow { - opacity: 1; - color: var(--qg-accent); +@media (max-width: 900px) { + .lb .ax { position: static; } + .lb .ax-4 { border-right: 0; } + .lb-table-card { height: auto; } + .lb-filter-row { gap: 12px; } + .lb-search-wrap { margin-left: 0; width: 100%; } + .lb-search-input { min-width: 0; flex: 1; } } diff --git a/web/shared/components/Footer.astro b/web/shared/components/Footer.astro index 0e7214f..515d072 100644 --- a/web/shared/components/Footer.astro +++ b/web/shared/components/Footer.astro @@ -26,7 +26,7 @@ const baseLinks: FooterLink[] = [ ---