From 30fac74f3071f6433068af85c3e000b430f8ecd8 Mon Sep 17 00:00:00 2001
From: renarchi <159624970+renarchi@users.noreply.github.com>
Date: Tue, 16 Jun 2026 00:37:58 +0300
Subject: [PATCH 1/3] feat: Add Datasets page and DIV2K
---
data/tag-categories.json | 12 +
data/tags.json | 16 +
data/users.json | 18 +
.../components/dataset-download-button.tsx | 197 ++++++++
src/elements/components/editable-tags.tsx | 39 +-
src/elements/tag-selector.tsx | 47 +-
src/lib/license.ts | 6 +
src/lib/util.ts | 2 +-
src/pages/add-dataset.tsx | 280 ++++++++++++
src/pages/datasets/[id].tsx | 426 ++++++++++++++++++
src/pages/datasets/index.tsx | 136 ++++++
11 files changed, 1162 insertions(+), 17 deletions(-)
create mode 100644 src/elements/components/dataset-download-button.tsx
create mode 100644 src/pages/add-dataset.tsx
create mode 100644 src/pages/datasets/[id].tsx
create mode 100644 src/pages/datasets/index.tsx
diff --git a/data/tag-categories.json b/data/tag-categories.json
index 193cc209..678b08fd 100644
--- a/data/tag-categories.json
+++ b/data/tag-categories.json
@@ -162,5 +162,17 @@
"helper:invalid-scale",
"helper:invalid-channels"
]
+ },
+ "dataset": {
+ "name": "Dataset",
+ "description": "Tags specific to datasets",
+ "order": 9,
+ "simple": true,
+ "tags": [
+ "dataset:realistic",
+ "dataset:anime",
+ "dataset:manga",
+ "dataset:game-textures"
+ ]
}
}
\ No newline at end of file
diff --git a/data/tags.json b/data/tags.json
index 07535cee..00736314 100644
--- a/data/tags.json
+++ b/data/tags.json
@@ -292,6 +292,22 @@
"name": "RGBA",
"description": "The model upscales images with transparency."
},
+ "dataset:anime": {
+ "name": "Anime",
+ "description": "Anime dataset"
+ },
+ "dataset:game-textures": {
+ "name": "Game Textures",
+ "description": "Game textures dataset"
+ },
+ "dataset:manga": {
+ "name": "Manga",
+ "description": "Manga dataset"
+ },
+ "dataset:realistic": {
+ "name": "Realistic",
+ "description": "Realistic dataset"
+ },
"helper:invalid-channels": {
"name": "Invalid channels",
"description": "The number of input or output channels of the model is not valid."
diff --git a/data/users.json b/data/users.json
index deec592b..46a6889a 100644
--- a/data/users.json
+++ b/data/users.json
@@ -14,6 +14,9 @@
"alsa": {
"name": "Alsa"
},
+ "andrey-ignatov": {
+ "name": "Andrey Ignatov"
+ },
"aptitude": {
"name": "aptitude"
},
@@ -83,6 +86,9 @@
"dinjerr": {
"name": "DinJerr"
},
+ "eirikur-agustsson": {
+ "name": "Eirikur Agustsson"
+ },
"eula": {
"name": "end user license agreement#9756"
},
@@ -128,6 +134,9 @@
"jingyunliang": {
"name": "JingyunLiang"
},
+ "jiqing-wu": {
+ "name": "Jiqing Wu"
+ },
"jixiaozhong": {
"name": "jixiaozhong"
},
@@ -164,6 +173,9 @@
"loinne": {
"name": "Loinne"
},
+ "luc-van-gool": {
+ "name": "Luc Van Gool"
+ },
"lyonhrt": {
"name": "LyonHrt"
},
@@ -218,6 +230,9 @@
"pokepress": {
"name": "pokepress"
},
+ "radu-timofte": {
+ "name": "Radu Timofte"
+ },
"rastrum": {
"name": "Rastrum"
},
@@ -248,6 +263,9 @@
"sharekhan": {
"name": "SharekhaN"
},
+ "shuhang-gu": {
+ "name": "Shuhang Gu"
+ },
"sirosky": {
"name": "Sirosky"
},
diff --git a/src/elements/components/dataset-download-button.tsx b/src/elements/components/dataset-download-button.tsx
new file mode 100644
index 00000000..a5bcb216
--- /dev/null
+++ b/src/elements/components/dataset-download-button.tsx
@@ -0,0 +1,197 @@
+import { Menu, Transition } from '@headlessui/react';
+import { Fragment } from 'react';
+import { BsChevronDown, BsFillTrashFill } from 'react-icons/bs';
+import { FiExternalLink } from 'react-icons/fi';
+import { SiDropbox, SiGithub, SiGoogledrive, SiMega, SiMicrosoftonedrive } from 'react-icons/si';
+import Logo from '../../../public/logo.svg';
+import { isSelfHosted, toDirectDownloadLink } from '../../lib/download-util';
+import { joinClasses } from '../../lib/util';
+import { Link } from './link';
+
+type DatasetDownloadButtonProps = {
+ url: string;
+ readonly?: boolean;
+ onChange?: (url: string) => void;
+};
+
+const hostFromUrl = (url: string): string => {
+ try {
+ const parsedUrl = new URL(url);
+ const domainParts = parsedUrl.hostname.split('.');
+ const domainAndTld = domainParts.slice(domainParts.length - 2).join('.');
+
+ if (domainAndTld === 'github.com') {
+ return 'GitHub';
+ }
+ if (parsedUrl.hostname === 'drive.google.com') {
+ return 'Google Drive';
+ }
+ if (parsedUrl.hostname === 'cdn.discordapp.com') {
+ return 'Discord';
+ }
+ if (domainAndTld === '1drv.ms') {
+ return 'OneDrive';
+ }
+ if (domainAndTld === 'mega.nz') {
+ return 'Mega';
+ }
+ if (domainAndTld === 'mediafire.com') {
+ return 'MediaFire';
+ }
+ if (domainAndTld === 'pcloud.link') {
+ return 'pCloud';
+ }
+ if (domainAndTld === 'icedrive.net') {
+ return 'Icedrive';
+ }
+ if (domainAndTld === 'dropbox.com') {
+ return 'Dropbox';
+ }
+ return parsedUrl.hostname;
+ } catch (e) {
+ console.debug(e);
+ return 'an unknown hoster';
+ }
+};
+
+const iconFromHost = (host: string) => {
+ switch (host) {
+ case 'GitHub':
+ return ;
+ case 'Google Drive':
+ return ;
+ case 'OneDrive':
+ return ;
+ case 'Mega':
+ return ;
+ case 'Dropbox':
+ return ;
+ default:
+ return ;
+ }
+};
+
+const isMirrorExternal = (url: string) => !isSelfHosted(url);
+
+export const DatasetDownloadButton = ({ url, readonly, onChange }: DatasetDownloadButtonProps) => {
+ const isExternal = isMirrorExternal(url);
+ const host = hostFromUrl(url);
+
+ const showMenu = !readonly;
+
+ return (
+
+
+
+ {isExternal ? (
+
+ ) : (
+
+
+
+ )}
+ Visit Dataset Link
+ {isExternal && (
+
+ Hosted by {host}
+
+ )}
+
+
+
+ {showMenu && (
+
+
+
+
+
+
+
+
+
+ {url !== '' && (
+
{
+ const newUrl = prompt('Edit URL', url);
+ if (newUrl !== null && onChange) {
+ onChange(newUrl);
+ }
+ }}
+ >
+ {isExternal ? (
+
+
+ {iconFromHost(host)}
+
+
{host}
+
+ ) : (
+
+ )}
+ {!readonly && (
+ {
+ e.stopPropagation();
+ if (onChange) {
+ onChange('');
+ }
+ }}
+ >
+
+
+ )}
+
+ )}
+ {!readonly && url === '' && (
+
{
+ const newUrl = prompt('Enter a new URL');
+ if (newUrl && onChange) {
+ onChange(newUrl);
+ }
+ }}
+ >
+ + Add URL
+
+ )}
+
+
+
+
+ )}
+
+ );
+};
diff --git a/src/elements/components/editable-tags.tsx b/src/elements/components/editable-tags.tsx
index ac4b0494..eefc69bd 100644
--- a/src/elements/components/editable-tags.tsx
+++ b/src/elements/components/editable-tags.tsx
@@ -1,6 +1,6 @@
import { Popover, Transition } from '@headlessui/react';
import Link from 'next/link';
-import { Fragment, useEffect, useState } from 'react';
+import { Fragment, useEffect, useMemo, useState } from 'react';
import { BsChevronDown } from 'react-icons/bs';
import { useTags } from '../../lib/hooks/use-tags';
import { addImpliedTags, removeImplyingTags } from '../../lib/implied-tags';
@@ -11,13 +11,20 @@ import style from './editable-tags.module.scss';
export interface SmallTagProps {
tagId: TagId;
name: string;
+ context?: 'models' | 'datasets';
}
-export function SmallTag({ tagId, name }: SmallTagProps) {
+export function SmallTag({ tagId, name, context = 'models' }: SmallTagProps) {
return (
{name}
@@ -28,14 +35,16 @@ export interface EditableTagsProps {
tags: readonly TagId[];
onChange?: (value: TagId[]) => void;
readonly?: boolean;
+ context?: 'models' | 'datasets';
}
-export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) {
+export function EditableTags({ tags, onChange, readonly, context = 'models' }: EditableTagsProps) {
const { tagData } = useTags();
return (
{!readonly && onChange && (
@@ -44,6 +53,7 @@ export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) {
const name = tagData.get(tagId)?.name ?? `unknown tag:${tagId}`;
return (
void }) {
+function EditTags({
+ tags,
+ onChange,
+ context = 'models',
+}: {
+ tags: readonly TagId[];
+ onChange: (value: TagId[]) => void;
+ context?: 'models' | 'datasets';
+}) {
const { tagData, categoryOrder } = useTags();
+ const filteredCategoryOrder = useMemo(() => {
+ if (context === 'datasets') {
+ return categoryOrder.filter(([id]) => id === 'dataset');
+ }
+ return categoryOrder.filter(([id]) => id !== 'dataset');
+ }, [categoryOrder, context]);
+
const [currentTags, setCurrentTags] = useState(tags);
useEffect(() => {
setCurrentTags(tags);
@@ -104,7 +129,7 @@ function EditTags({ tags, onChange }: { tags: readonly TagId[]; onChange: (value
}`}
>
- {categoryOrder.map(([categoryId, category]) => {
+ {filteredCategoryOrder.map(([categoryId, category]) => {
const manual = category.tags.filter((tagId) => !isDerivedTag(tagId));
if (manual.length === 0) {
return
;
diff --git a/src/elements/tag-selector.tsx b/src/elements/tag-selector.tsx
index 6c2621a8..48d12d0b 100644
--- a/src/elements/tag-selector.tsx
+++ b/src/elements/tag-selector.tsx
@@ -56,9 +56,10 @@ export type TagSelectorStyle = 'simple' | 'advanced';
export interface TagSelectorProps {
selection: TagSelection;
onChange: (selection: TagSelection, style: TagSelectorStyle) => void;
+ context?: 'models' | 'datasets';
}
-export function TagSelector({ selection, onChange }: TagSelectorProps) {
+export function TagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) {
const [simple, setSimple] = useState(true);
const { editMode } = useWebApi();
@@ -66,22 +67,28 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) {
useEffect(() => {
if (simple) {
- const reduced = reduceToSimple(tagCategoryData.values(), selection);
+ const filteredCategories =
+ context === 'datasets'
+ ? [...tagCategoryData.entries()].filter(([id]) => id === 'dataset').map(([, c]) => c)
+ : [...tagCategoryData.entries()].filter(([id]) => id !== 'dataset').map(([, c]) => c);
+ const reduced = reduceToSimple(filteredCategories, selection);
if (reduced !== selection) {
setSimple(false);
}
}
- }, [simple, tagData, tagCategoryData, selection]);
+ }, [simple, tagData, tagCategoryData, selection, context]);
return (
{simple ? (
) : (
@@ -92,7 +99,15 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) {
onClick={() => {
setSimple(!simple);
if (!simple) {
- const reduced = reduceToSimple(tagCategoryData.values(), selection);
+ const filteredCategories =
+ context === 'datasets'
+ ? [...tagCategoryData.entries()]
+ .filter(([id]) => id === 'dataset')
+ .map(([, c]) => c)
+ : [...tagCategoryData.entries()]
+ .filter(([id]) => id !== 'dataset')
+ .map(([, c]) => c);
+ const reduced = reduceToSimple(filteredCategories, selection);
if (reduced !== selection) onChange(reduced, 'simple');
}
}}
@@ -128,13 +143,20 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) {
);
}
-function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) {
+function AdvancedTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) {
const { tagData, categoryOrder } = useTags();
const { editMode } = useWebApi();
+ const filteredCategoryOrder = useMemo(() => {
+ if (context === 'datasets') {
+ return categoryOrder.filter(([id]) => id === 'dataset');
+ }
+ return categoryOrder.filter(([id]) => id !== 'dataset');
+ }, [categoryOrder, context]);
+
return (
- {categoryOrder.map(([categoryId, category]) => {
+ {filteredCategoryOrder.map(([categoryId, category]) => {
if (category.tags.length === 0 || (category.editOnly && !editMode))
return
;
@@ -166,11 +188,18 @@ function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) {
);
}
-function SimpleTagSelector({ selection, onChange }: TagSelectorProps) {
+function SimpleTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) {
const { tagData, categoryOrder } = useTags();
+ const filteredCategoryOrder = useMemo(() => {
+ if (context === 'datasets') {
+ return categoryOrder.filter(([id]) => id === 'dataset');
+ }
+ return categoryOrder.filter(([id]) => id !== 'dataset');
+ }, [categoryOrder, context]);
+
const tags = useMemo(() => {
- return categoryOrder
+ return filteredCategoryOrder
.map(([, category]) => category)
.filter((category) => category.simple)
.flatMap(({ tags }) => {
@@ -183,7 +212,7 @@ function SimpleTagSelector({ selection, onChange }: TagSelectorProps) {
})
.filter(isNonNull);
});
- }, [categoryOrder, tagData]);
+ }, [filteredCategoryOrder, tagData]);
const selected: TagId | undefined = useMemo(() => {
const required = tags.filter(([tagId]) => selection.get(tagId) === SelectionState.Required);
diff --git a/src/lib/license.ts b/src/lib/license.ts
index 0a95e6ef..960c181c 100644
--- a/src/lib/license.ts
+++ b/src/lib/license.ts
@@ -21,6 +21,12 @@ const KNOWN_LICENSES_: Record
= {
conditions: ['include-copyright', 'state-changes'],
limitations: ['liability', 'trademark-use', 'warranty'],
},
+ 'Academic-Research-Only': {
+ name: 'Academic Research Purpose Only',
+ permissions: ['private-use', 'modifications'],
+ conditions: ['include-copyright'],
+ limitations: ['liability', 'warranty'],
+ },
'BSD-3-Clause': {
name: 'BSD 3-Clause "New" or "Revised" License',
permissions: ['commercial-use', 'distribution', 'modifications', 'private-use'],
diff --git a/src/lib/util.ts b/src/lib/util.ts
index 9608dabe..fba6a9f5 100644
--- a/src/lib/util.ts
+++ b/src/lib/util.ts
@@ -138,7 +138,7 @@ export function compareTagId(a: TagId, b: TagId): number {
return compareString(getTagCategory(a) ?? '', getTagCategory(b) ?? '') || compareString(a, b);
}
export function isDerivedTag(id: TagId): boolean {
- return id.includes(':');
+ return id.includes(':') && !id.startsWith('dataset:');
}
export function getColorMode(numberOfChannels: number) {
diff --git a/src/pages/add-dataset.tsx b/src/pages/add-dataset.tsx
new file mode 100644
index 00000000..16b719e8
--- /dev/null
+++ b/src/pages/add-dataset.tsx
@@ -0,0 +1,280 @@
+import { useRouter } from 'next/router';
+import React, { useEffect, useMemo, useState } from 'react';
+import { TextLink } from '../elements/components/link';
+import { HeadCommon } from '../elements/head-common';
+import { PageContainer } from '../elements/page';
+import { useArchitectures } from '../lib/hooks/use-architectures';
+import { useDatasets } from '../lib/hooks/use-datasets';
+import { useModels } from '../lib/hooks/use-models';
+import { useTags } from '../lib/hooks/use-tags';
+import { useWebApi } from '../lib/hooks/use-web-api';
+import { withImpliedTags } from '../lib/implied-tags';
+import { ParseResult, parseDiscordMessage } from '../lib/parse-discord-message';
+import { Dataset, DatasetId, Tag, TagId } from '../lib/schema';
+import { canonicalizeDatasetId } from '../lib/schema-util';
+import { IS_DEPLOYED } from '../lib/site-data';
+
+function guessDatasetTags(name: string, description: string, tagData: ReadonlyMap): TagId[] {
+ const tags = new Set();
+
+ if (/\b(?:realistic|photo|photography|real)\b/i.test(`${name} ${description}`)) {
+ tags.add('dataset:realistic' as TagId);
+ }
+ if (/\b(?:anime|cartoon)\b/i.test(`${name} ${description}`)) {
+ tags.add('dataset:anime' as TagId);
+ }
+ if (/\b(?:manga)\b/i.test(`${name} ${description}`)) {
+ tags.add('dataset:manga' as TagId);
+ }
+ if (/\b(?:game[- ]textures?|textures?)\b/i.test(`${name} ${description}`)) {
+ tags.add('dataset:game-textures' as TagId);
+ }
+
+ return withImpliedTags(tags, tagData);
+}
+
+const EMPTY_PARSE_RESULT: ParseResult = { failed: [], parsed: {} };
+
+const discordMessageTemplate = `
+**Name:** DatasetNameThatIsCreative
+**License:** GNU GPL3 for example
+**Link:**
+**Description:** Your Description
+`.trim();
+
+function PageContent() {
+ const { datasetData } = useDatasets();
+ const { modelData } = useModels();
+ const { archData } = useArchitectures();
+ const { tagData } = useTags();
+ const router = useRouter();
+ const { webApi, editMode } = useWebApi(IS_DEPLOYED);
+
+ const [processing, setProcessing] = useState(false);
+ const [name, setName] = useState('Unknown');
+ const [partialId, setPartialId] = useState();
+ const [url, setUrl] = useState('');
+ const [description, setDescription] = useState('');
+
+ let fullId = canonicalizeDatasetId(partialId ?? name);
+ const partialIdFromFull = fullId;
+
+ const [parseMessageTemplate, setParseMessageTemplate] = useState(false);
+ const [messageTemplate, setMessageTemplate] = useState('');
+ const parsedMessage = useMemo((): ParseResult => {
+ if (!parseMessageTemplate) {
+ return EMPTY_PARSE_RESULT;
+ }
+ return parseDiscordMessage(messageTemplate, modelData, archData);
+ }, [parseMessageTemplate, messageTemplate, modelData, archData]);
+
+ useEffect(() => {
+ if (parsedMessage.parsed.name) {
+ setName(parsedMessage.parsed.name.replace(/[\s_\-]+/g, ' '));
+ setPartialId(undefined);
+ }
+ }, [parsedMessage.parsed.name]);
+
+ useEffect(() => {
+ if (parsedMessage.parsed.link) {
+ setUrl(parsedMessage.parsed.link);
+ }
+ }, [parsedMessage.parsed.link]);
+
+ useEffect(() => {
+ if (parsedMessage.parsed.description) {
+ setDescription(parsedMessage.parsed.description);
+ }
+ }, [parsedMessage.parsed.description]);
+
+ if (!editMode) return null;
+
+ const addDataset = async () => {
+ if (datasetData.has(fullId)) {
+ alert(`Dataset ${fullId} already exists`);
+ return;
+ }
+
+ const combinedDescription = [
+ parsedMessage.parsed.purpose ? `Purpose: ${parsedMessage.parsed.purpose}` : '',
+ description || parsedMessage.parsed.description || '',
+ ]
+ .join('\n\n')
+ .trim();
+
+ const dataset: Dataset = {
+ name,
+ author: [],
+ license: parsedMessage.parsed.license ?? null,
+ tags: guessDatasetTags(name, combinedDescription, tagData),
+ description: combinedDescription,
+ date: new Date().toISOString().split('T')[0],
+ url,
+ images: [],
+ };
+
+ setProcessing(true);
+
+ if (IS_DEPLOYED) {
+ sessionStorage.setItem('dummy-datasetId', fullId);
+ sessionStorage.setItem('dummy-dataset', JSON.stringify(dataset));
+ fullId = 'OMDB_ADDDATASET_DUMMY' as DatasetId;
+ }
+
+ await webApi.datasets.update([[fullId, dataset]]);
+
+ // fetch before navigating to ensure the dataset page is available
+ const page = `/datasets/${fullId}`;
+ await fetch(page);
+ await router.push(`/datasets/${fullId}`);
+ };
+
+ let inputError;
+ if (name.trim() === '') {
+ inputError = 'Name cannot be empty';
+ } else if (fullId.trim() === '') {
+ inputError = 'ID cannot be empty';
+ }
+
+ const canAddDataset = !inputError && !processing;
+
+ return (
+ <>
+ Add Dataset
+
+
{
+ setParseMessageTemplate(e.target.checked);
+ }}
+ />
+
Parse Discord Message Template?
+
+ {parseMessageTemplate && (
+
+
+ How to use: Paste a message from the{' '}
+
+ model-releases
+ {' '}
+ channel (or any message following the message template).
+ To copy a message: Move your mouse over the message > click on the three dots
+ ("More") > Copy Text.
+
+
+ )}
+
+
+
Name:
+
+ setName(e.target.value)}
+ />
+
+
+
Id:
+
+
+ {
+ const newPartialId = canonicalizeDatasetId(e.target.value);
+ setPartialId((prev) => {
+ if (prev === undefined && newPartialId === partialIdFromFull) {
+ return undefined;
+ }
+ return String(newPartialId) ? newPartialId : undefined;
+ });
+ }}
+ onChange={(e) => setPartialId(e.target.value)}
+ onKeyDown={(e) => {
+ if (e.key === 'Enter') {
+ const target = e.target as HTMLInputElement;
+ const newPartialId = canonicalizeDatasetId(String(target.value));
+ setPartialId(String(newPartialId) ? newPartialId : undefined);
+ }
+ }}
+ />
+
+
+
+
Homepage/URL:
+
+ setUrl(e.target.value)}
+ />
+
+
+
Description:
+
+
+
+
+
+ {
+ addDataset().catch((e) => console.error(e));
+ }}
+ >
+ {processing ? 'Currently adding dataset' : 'Add Dataset'}
+
+ {inputError}
+
+ >
+ );
+}
+
+export default function Page() {
+ return (
+ <>
+
+
+
+
+ >
+ );
+}
diff --git a/src/pages/datasets/[id].tsx b/src/pages/datasets/[id].tsx
new file mode 100644
index 00000000..98ed0c49
--- /dev/null
+++ b/src/pages/datasets/[id].tsx
@@ -0,0 +1,426 @@
+import { GetStaticPaths, GetStaticProps } from 'next';
+import Head from 'next/head';
+import { useRouter } from 'next/router';
+import { ParsedUrlQuery } from 'querystring';
+import React, { ReactNode, useCallback, useMemo } from 'react';
+import { BsFillTrashFill } from 'react-icons/bs';
+import { DatasetDownloadButton } from '../../elements/components/dataset-download-button';
+import { EditableLabel } from '../../elements/components/editable-label';
+import { EditableMarkdownContainer } from '../../elements/components/editable-markdown';
+import { EditableTags } from '../../elements/components/editable-tags';
+import { EditableUsers } from '../../elements/components/editable-users';
+import { ImageCarousel } from '../../elements/components/image-carousel';
+import { LicenseAttributes } from '../../elements/components/license-attributes';
+import { Link } from '../../elements/components/link';
+import { HeadCommon } from '../../elements/head-common';
+import { PageContainer } from '../../elements/page';
+import { useDatasets } from '../../lib/hooks/use-datasets';
+import { useUpdateDataset } from '../../lib/hooks/use-update-dataset';
+import { useUsers } from '../../lib/hooks/use-users';
+import { useWebApi } from '../../lib/hooks/use-web-api';
+import { KNOWN_LICENSES } from '../../lib/license';
+import { Dataset, DatasetId } from '../../lib/schema';
+import { getCachedDatasets } from '../../lib/server/cached';
+import { fileApi } from '../../lib/server/file-data';
+import { IS_DEPLOYED } from '../../lib/site-data';
+import { EMPTY_ARRAY, asArray } from '../../lib/util';
+import { validateDataset } from '../../lib/validate-dataset';
+
+interface Params extends ParsedUrlQuery {
+ id: DatasetId;
+}
+
+interface Props {
+ datasetId: DatasetId;
+ staticDatasetData: Record;
+}
+
+function isTrue(value: T | null | undefined | false | '' | 0): value is T {
+ return Boolean(value);
+}
+
+function MetadataTable({ rows }: { rows: (false | null | undefined | readonly [string, ReactNode])[] }) {
+ const filteredRows = rows.filter(isTrue);
+ return (
+
+
+
+ {filteredRows.map((row, i) => {
+ const [label, value] = row;
+ const extraPadding = i === 0 ? 'pt-3' : i === filteredRows.length - 1 ? 'pb-3' : '';
+ const isLastRow = i === filteredRows.length - 1;
+ return (
+
+
+ {label}
+
+ {value}
+
+ );
+ })}
+
+
+
+ );
+}
+
+function LicenseProp({
+ dataset,
+ updateDatasetProperty,
+ editMode,
+}: {
+ dataset: Dataset;
+ updateDatasetProperty: ReturnType['updateDatasetProperty'];
+ editMode: boolean;
+}) {
+ if (!editMode) {
+ return dataset.license ? (
+ <>
+ {dataset.license}
+
+ >
+ ) : (
+ <>None>
+ );
+ }
+
+ return (
+ <>
+ {
+ updateDatasetProperty('license', (e.target.value || null) as never);
+ }}
+ >
+ None
+ {Object.entries(KNOWN_LICENSES).map(([key]) => (
+
+ {key}
+
+ ))}
+
+ {dataset.license && }
+ >
+ );
+}
+
+function PageContent({ datasetId, staticDatasetData }: Props) {
+ const { datasetData } = useDatasets(staticDatasetData);
+ const { userData } = useUsers();
+ const router = useRouter();
+
+ const realDatasetId =
+ datasetId === 'OMDB_ADDDATASET_DUMMY' ? (sessionStorage.getItem('dummy-datasetId') as DatasetId) : datasetId;
+
+ const dataset = datasetData.get(realDatasetId);
+
+ const { webApi, editMode } = useWebApi(IS_DEPLOYED);
+ const { updateDatasetProperty } = useUpdateDataset(webApi, realDatasetId);
+
+ const authors = useMemo(() => {
+ if (!dataset) return [];
+ return asArray(dataset.author);
+ }, [dataset]);
+ const authorsJoined = useMemo(
+ () => authors.map((userId) => userData.get(userId)?.name ?? userId).join(', '),
+ [authors, userData]
+ );
+
+ const title = dataset ? dataset.name : String(realDatasetId);
+ const getDatasetPreviewImage = (d: Dataset) => {
+ if (!d.images || d.images.length === 0) return undefined;
+ const image = d.images[0];
+ return image.type === 'paired' ? image.SR : image.url;
+ };
+ const previewImage = dataset ? getDatasetPreviewImage(dataset) : undefined;
+
+ const handleDelete = useCallback(async () => {
+ if (!webApi || !dataset) return;
+ if (confirm(`Are you sure you want to delete dataset "${title}"?`)) {
+ try {
+ await webApi.datasets.delete([realDatasetId]);
+ await router.push('/datasets');
+ } catch (e) {
+ console.error(e);
+ alert(`Error deleting dataset: ${String(e)}`);
+ }
+ }
+ }, [webApi, dataset, realDatasetId, title, router]);
+
+ const runDatasetValidation = useCallback(() => {
+ if (!webApi || !dataset) {
+ throw new Error('API or dataset not available');
+ }
+ return validateDataset(dataset, realDatasetId, webApi);
+ }, [dataset, realDatasetId, webApi]);
+
+ if (!dataset) {
+ return (
+
+
Dataset not found
+
+ Back to datasets
+
+
+ );
+ }
+
+ return (
+ <>
+
+ {previewImage && (
+
+
+
+ )}
+
+
+ {/* Image carousel */}
+
+ updateDatasetProperty('images', images)}
+ />
+
+
+
+ {/* Left Column */}
+
+
+
+ {editMode && (
+
+ {
+ handleDelete().catch(console.error);
+ }}
+ >
+ Delete Dataset
+
+ {IS_DEPLOYED && (
+ <>
+ {
+ navigator.clipboard
+ .readText()
+ .then((text) => {
+ try {
+ const parsedDataset = JSON.parse(text) as Dataset;
+ webApi.datasets
+ .update([[realDatasetId, parsedDataset]])
+ .catch(console.error);
+ } catch (e) {
+ console.error(e);
+ }
+ })
+ .catch(console.error);
+ }}
+ >
+ Load Dataset from clipboard
+
+ {
+ navigator.clipboard
+ .writeText(JSON.stringify(dataset, null, 2))
+ .catch(console.error);
+ }}
+ >
+ Copy Dataset to clipboard
+
+ {
+ try {
+ const errors = runDatasetValidation();
+ if (errors.length > 0) {
+ alert(errors.map(({ message }) => message).join('\n'));
+ return;
+ }
+ const path =
+ 'https://github.com/OpenModelDB/open-model-database/issues/new';
+ const datasetJson = JSON.stringify(dataset, null, 2);
+ const codeBlock = `\`\`\`json\n${datasetJson}\n\`\`\``;
+ const queryParams = new URLSearchParams({
+ title: `[DATASET ADD REQUEST] ${dataset.name}`,
+ body: codeBlock,
+ template: 'dataset-add-request.md',
+ });
+ const url = `${path}?${queryParams.toString()}`;
+ window.open(url, '_blank');
+ } catch (e) {
+ console.error(e);
+ }
+ }}
+ >
+ Submit Dataset as GitHub Issue
+
+ >
+ )}
+
+ )}
+
+ updateDatasetProperty('name', value)}
+ />
+
+
+ {
+ updateDatasetProperty('author', users.length === 1 ? users[0] : users);
+ }}
+ />
+
+
+
+ {editMode &&
tags:
}
+
updateDatasetProperty('tags', tags)}
+ />
+
+
+ updateDatasetProperty('description', value)}
+ />
+
+
+
+
+ {/* Right Column: Sidebar */}
+
+
+ {dataset.url && (
+
+ updateDatasetProperty('url', newUrl)}
+ />
+ {editMode && (
+ {
+ updateDatasetProperty('url', '');
+ }}
+ >
+
+
+ )}
+
+ )}
+ {editMode && !dataset.url && (
+
{
+ const newUrl = prompt('Enter dataset URL');
+ if (newUrl) {
+ updateDatasetProperty('url', newUrl);
+ }
+ }}
+ >
+ + Add URL
+
+ )}
+
+
+
+ ,
+ ],
+ [
+ 'Added on',
+ updateDatasetProperty('date', value || '')}
+ />,
+ ],
+ ]}
+ />
+
+
+
+
+ >
+ );
+}
+
+export default function Page({ datasetId, staticDatasetData }: Props) {
+ return (
+
+
+
+ );
+}
+
+export const getStaticPaths: GetStaticPaths = async () => {
+ const datasetIds = await fileApi.datasets.getIds();
+
+ return {
+ paths: datasetIds.map((id) => ({ params: { id } })),
+ fallback: false,
+ };
+};
+
+export const getStaticProps: GetStaticProps = async (context) => {
+ const datasetId = context.params?.id;
+ if (!datasetId) throw new Error("Missing path param 'id'");
+
+ const datasetData = await getCachedDatasets();
+ const relevantIds = [datasetId].sort();
+ const relevantDatasetData: Record = {};
+ for (const id of relevantIds) {
+ const dataset = datasetData.get(id);
+ if (dataset) {
+ relevantDatasetData[id] = dataset;
+ }
+ }
+
+ return {
+ props: {
+ datasetId,
+ staticDatasetData: relevantDatasetData,
+ },
+ };
+};
diff --git a/src/pages/datasets/index.tsx b/src/pages/datasets/index.tsx
new file mode 100644
index 00000000..8cfce7a1
--- /dev/null
+++ b/src/pages/datasets/index.tsx
@@ -0,0 +1,136 @@
+import { GetStaticProps } from 'next';
+import React, { useCallback, useMemo, useState } from 'react';
+import { DatasetResults } from '../../elements/components/dataset-results';
+import { SearchBar } from '../../elements/components/searchbar';
+import { HeadCommon } from '../../elements/head-common';
+import { PageContainer } from '../../elements/page';
+import { TagSelector } from '../../elements/tag-selector';
+import { useDatasets } from '../../lib/hooks/use-datasets';
+import { useSearch } from '../../lib/hooks/use-search';
+import { useTags } from '../../lib/hooks/use-tags';
+import { Dataset, DatasetId } from '../../lib/schema';
+import { createDatasetSearchIndex } from '../../lib/search/create';
+import { compileCondition } from '../../lib/search/logical-condition';
+import { SearchResult } from '../../lib/search/search-index';
+import { tokenize } from '../../lib/search/token';
+import { fileApi } from '../../lib/server/file-data';
+import { TagSelection, getTagCondition } from '../../lib/tag-condition';
+
+interface Props {
+ datasetData: Record;
+}
+
+export default function Page({ datasetData: staticDatasetData }: Props) {
+ const { datasetData } = useDatasets(staticDatasetData);
+ const { tagData, tagCategoryData } = useTags();
+
+ const sortSearchResults = useCallback((searchResults: SearchResult[]): void => {
+ // sort by id to get stable order
+ searchResults.sort((a, b) => a.id.localeCompare(b.id));
+ searchResults.sort((a, b) => b.score - a.score);
+ }, []);
+
+ const searchIndex = useMemo(() => createDatasetSearchIndex(datasetData), [datasetData]);
+
+ const [selectedDatasets, setSelectedDatasets] = useState(() => {
+ const results: SearchResult[] = [...datasetData.keys()].map((id) => ({ id, score: 0 }));
+ sortSearchResults(results);
+ return results.map((r) => r.id);
+ });
+
+ const updatedSelectedDatasets = useCallback(
+ (searchQuery: string, tags: TagSelection): void => {
+ const queryTokens = tokenize(searchQuery);
+ const tagCondition = compileCondition(getTagCondition(tags, tagCategoryData.values()));
+
+ const searchResults = searchIndex.retrieve(tagCondition, queryTokens);
+ sortSearchResults(searchResults);
+ setSelectedDatasets(searchResults.map((r) => r.id));
+ },
+ [searchIndex, tagCategoryData, sortSearchResults]
+ );
+
+ const { searchQuery, tagSelection, setSearchQuery, setTagSelection } = useSearch(tagData, updatedSelectedDatasets);
+
+ return (
+ <>
+
+
+
+ Training
+
+ Datasets
+
+
+
+
+ Explore and search for datasets used to train upscaling and restoration models.
+
+
+ {/* Search */}
+ setSearchQuery(e.target.value, 400)}
+ onEnter={(e) => {
+ setSearchQuery(e.currentTarget.value, 0);
+ if (window.innerWidth < 600 || window.navigator.maxTouchPoints > 0) {
+ e.currentTarget.blur();
+ const anchor = document.getElementById('scroll-anchor');
+ if (anchor) {
+ const headerOffset = 80;
+ const elementPosition = anchor.getBoundingClientRect().top;
+ const offsetPosition = elementPosition + window.pageYOffset - headerOffset;
+ window.scrollTo({
+ top: offsetPosition,
+ behavior: 'smooth',
+ });
+ }
+ }
+ }}
+ />
+
+ {/* Tags */}
+
+ {
+ setTagSelection(value, style === 'advanced' ? 800 : 0);
+ }}
+ />
+
+
+
+
+ {/* Dataset Cards */}
+ {selectedDatasets.length > 0 ? (
+
+ ) : (
+
+
No datasets found
+
Try changing your search filters
+
+ )}
+
+ >
+ );
+}
+
+export const getStaticProps: GetStaticProps = async (_context) => {
+ return {
+ props: {
+ datasetData: Object.fromEntries(await fileApi.datasets.getAll()),
+ },
+ };
+};
From 1789c5a9541e2a97bfecc9597ddea266b5969a9e Mon Sep 17 00:00:00 2001
From: renarchi <159624970+renarchi@users.noreply.github.com>
Date: Tue, 16 Jun 2026 01:02:50 +0300
Subject: [PATCH 2/3] fix: Add missing dataset hooks, components, and API
routes to resolve CI build errors
---
data/datasets/div2k-dataset-diverse-2k.json | 26 +++
scripts/validate-db.ts | 5 +
src/elements/components/dataset-card-grid.tsx | 23 ++
src/elements/components/dataset-card.tsx | 205 ++++++++++++++++++
src/elements/components/dataset-results.tsx | 35 +++
src/elements/header.tsx | 67 +++++-
src/lib/data-api.ts | 3 +
src/lib/hooks/use-datasets.ts | 59 +++++
src/lib/hooks/use-update-dataset.ts | 26 +++
src/lib/schema-util.ts | 6 +-
src/lib/schema.ts | 12 +
src/lib/search/create.ts | 24 +-
src/lib/server/cached.ts | 9 +-
src/lib/server/file-data.ts | 102 ++++++++-
src/lib/validate-dataset.ts | 27 +++
src/lib/web-api.ts | 7 +-
src/pages/api/datasets.ts | 4 +
17 files changed, 625 insertions(+), 15 deletions(-)
create mode 100644 data/datasets/div2k-dataset-diverse-2k.json
create mode 100644 src/elements/components/dataset-card-grid.tsx
create mode 100644 src/elements/components/dataset-card.tsx
create mode 100644 src/elements/components/dataset-results.tsx
create mode 100644 src/lib/hooks/use-datasets.ts
create mode 100644 src/lib/hooks/use-update-dataset.ts
create mode 100644 src/lib/validate-dataset.ts
create mode 100644 src/pages/api/datasets.ts
diff --git a/data/datasets/div2k-dataset-diverse-2k.json b/data/datasets/div2k-dataset-diverse-2k.json
new file mode 100644
index 00000000..348b395d
--- /dev/null
+++ b/data/datasets/div2k-dataset-diverse-2k.json
@@ -0,0 +1,26 @@
+{
+ "name": "DIV2K dataset: DIVerse 2K",
+ "author": [
+ "radu-timofte",
+ "eirikur-agustsson",
+ "shuhang-gu",
+ "jiqing-wu",
+ "andrey-ignatov",
+ "luc-van-gool"
+ ],
+ "license": "Academic-Research-Only",
+ "tags": [
+ "dataset:realistic"
+ ],
+ "description": "The DIV2K dataset is divided into:\n\ntrain data:\nstarting from 800 high definition high resolution images we obtain corresponding low resolution images and provide both high and low resolution images for 2, 3, and 4 downscaling factors\nvalidation data: 100 high definition high resolution images are used for genereting low resolution corresponding images, the low res are provided from the beginning of the challenge and are meant for the participants to get online feedback from the validation server; the high resolution images will be released when the final phase of the challenge starts.\n\ntest data:\n100 diverse images are used to generate low resolution corresponding images; the participants will receive the low resolution images when the final evaluation phase starts and the results will be announced after the challenge is over and the winners are decided.",
+ "date": "2026-06-15",
+ "url": "https://data.vision.ee.ethz.ch/cvl/DIV2K/",
+ "images": [
+ {
+ "type": "paired",
+ "caption": "900",
+ "LR": "https://i.slow.pics/KWca9Mq7.png",
+ "SR": "https://i.slow.pics/7P7FzpFq.png"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/scripts/validate-db.ts b/scripts/validate-db.ts
index fb1f057a..0d965613 100644
--- a/scripts/validate-db.ts
+++ b/scripts/validate-db.ts
@@ -1,6 +1,7 @@
import fs from 'fs/promises';
import path from 'path';
import { fileApi } from '../src/lib/server/file-data';
+import { validateDataset } from '../src/lib/validate-dataset';
import { Report, validateModel } from '../src/lib/validate-model';
const getAllFiles = async (dir: string): Promise => {
@@ -23,6 +24,7 @@ const getAllFiles = async (dir: string): Promise => {
const getReports = async (): Promise => {
const modelData = await fileApi.models.getAll();
+ const datasetData = await fileApi.datasets.getAll();
const architectureData = await fileApi.architectures.getAll();
const tagData = await fileApi.tags.getAll();
const userData = await fileApi.users.getAll();
@@ -31,6 +33,9 @@ const getReports = async (): Promise => {
for (const [modelId, model] of modelData) {
errors.push(...validateModel(model, modelId, modelData, architectureData, tagData, userData, fileApi));
}
+ for (const [datasetId, dataset] of datasetData) {
+ errors.push(...validateDataset(dataset, datasetId, fileApi));
+ }
const jsonFiles = (await getAllFiles('data/')).filter((file) => file.endsWith('.json'));
await Promise.all(
diff --git a/src/elements/components/dataset-card-grid.tsx b/src/elements/components/dataset-card-grid.tsx
new file mode 100644
index 00000000..f87497e0
--- /dev/null
+++ b/src/elements/components/dataset-card-grid.tsx
@@ -0,0 +1,23 @@
+import React from 'react';
+import { Dataset, DatasetId } from '../../lib/schema';
+import { DatasetCard } from './dataset-card';
+import style from './model-card-grid.module.scss';
+
+export interface DatasetCardGridProps {
+ datasets: readonly (readonly [DatasetId, Dataset])[];
+}
+
+export function DatasetCardGrid({ datasets }: DatasetCardGridProps) {
+ return (
+
+ {datasets.map(([id, dataset], i) => (
+ = 12}
+ />
+ ))}
+
+ );
+}
diff --git a/src/elements/components/dataset-card.tsx b/src/elements/components/dataset-card.tsx
new file mode 100644
index 00000000..c0c09fc1
--- /dev/null
+++ b/src/elements/components/dataset-card.tsx
@@ -0,0 +1,205 @@
+/* eslint-disable @next/next/no-img-element */
+/* eslint-disable react/display-name */
+import React, { memo, useRef, useState } from 'react';
+import { LazyLoadComponent } from 'react-lazy-load-image-component';
+import { useDevicePixelRatio } from '../../lib/hooks/use-device-pixel-ratio';
+import { useUpdateDataset } from '../../lib/hooks/use-update-dataset';
+import { useUsers } from '../../lib/hooks/use-users';
+import { useWebApi } from '../../lib/hooks/use-web-api';
+import { joinList } from '../../lib/react-util';
+import { Dataset, DatasetId, ImageSize, PairedImage } from '../../lib/schema';
+import { asArray, assertNever, joinClasses } from '../../lib/util';
+import { EditableTags } from './editable-tags';
+import { Link } from './link';
+import style from './model-card.module.scss';
+
+export interface DatasetCardProps {
+ id: DatasetId;
+ dataset: Dataset;
+ lazy?: boolean;
+}
+
+const EMPTY_SIZE: ImageSize = {
+ height: 0,
+ width: 0,
+};
+
+function getNaturalSize(image: HTMLImageElement): ImageSize {
+ return {
+ height: image.naturalHeight,
+ width: image.naturalWidth,
+ };
+}
+
+const SideBySideImage = ({ datasetName, image }: { datasetName: string; image: PairedImage }) => {
+ const [lrDimensions, setLrDimensions] = useState(EMPTY_SIZE);
+ const [srDimensions, setSrDimensions] = useState(EMPTY_SIZE);
+
+ const maxHeight = Math.max(lrDimensions.height, srDimensions.height);
+ const maxWidth = Math.max(lrDimensions.width, srDimensions.width);
+
+ const lrRef = useRef(null);
+ const srRef = useRef(null);
+
+ const dpr = useDevicePixelRatio();
+ const scale = (1 / dpr) * Math.max(1, Math.round(dpr + 0.16));
+
+ return (
+
+
+
{
+ setLrDimensions(getNaturalSize(e.target as HTMLImageElement));
+ }}
+ />
+
+
+
{
+ setSrDimensions(getNaturalSize(e.target as HTMLImageElement));
+ }}
+ />
+
+
+ );
+};
+
+const getDatasetCardImageComponent = (dataset: Dataset | undefined) => {
+ const image = dataset?.images?.[0];
+ if (!dataset || !image) {
+ return No Image
;
+ }
+ switch (image.type) {
+ case 'paired': {
+ return (
+
+ );
+ }
+ case 'standalone': {
+ const imageSrc = image.url;
+ return (
+
+ );
+ }
+ default:
+ return assertNever(image);
+ }
+};
+
+const DatasetCardContent = memo(({ id, dataset }: DatasetCardProps) => {
+ const { userData } = useUsers();
+ const { webApi, editMode } = useWebApi();
+ const { updateDatasetProperty } = useUpdateDataset(webApi, id);
+
+ const isPaired = dataset.images?.[0]?.type === 'paired' && !editMode;
+
+ return (
+
+
+ {getDatasetCardImageComponent(dataset)}
+
+
+
+
+ {dataset.name}
+
+
+ {'by '}
+ {joinList(
+ asArray(dataset.author).map((userId) => (
+
+ {userData.get(userId)?.name ?? `unknown user:${userId}`}
+
+ ))
+ )}
+
+
+ {/* Description */}
+
+ {dataset.description}
+
+
+ {/* Tags */}
+
+ updateDatasetProperty('tags', tags)}
+ />
+
+
+
+ );
+});
+
+export const DatasetCard = memo(({ id, dataset, lazy = false }: DatasetCardProps) => {
+ const { editMode } = useWebApi();
+
+ const inner = (
+
+
+
+ );
+
+ if (!lazy) return inner;
+
+ return (
+
+ }
+ >
+ {inner}
+
+ );
+});
diff --git a/src/elements/components/dataset-results.tsx b/src/elements/components/dataset-results.tsx
new file mode 100644
index 00000000..6b950dc8
--- /dev/null
+++ b/src/elements/components/dataset-results.tsx
@@ -0,0 +1,35 @@
+import React, { memo, useMemo } from 'react';
+import { Dataset, DatasetId } from '../../lib/schema';
+import { DatasetCardGrid } from './dataset-card-grid';
+import style from './model-results.module.scss';
+
+interface DatasetResultsProps {
+ datasetData: ReadonlyMap;
+ datasets: readonly DatasetId[];
+}
+
+// eslint-disable-next-line react/display-name
+export const DatasetResults = memo(({ datasets, datasetData }: DatasetResultsProps) => {
+ const dataPairs = useMemo(() => {
+ const pairs: (readonly [DatasetId, Dataset])[] = [];
+ for (const id of datasets) {
+ const data = datasetData.get(id);
+ if (data) {
+ pairs.push([id, data]);
+ }
+ }
+ return pairs;
+ }, [datasets, datasetData]);
+
+ return (
+ <>
+
+
+ Found {datasets.length} dataset
+ {datasets.length === 1 ? '' : 's'}
+
+
+
+ >
+ );
+});
diff --git a/src/elements/header.tsx b/src/elements/header.tsx
index 47c98147..71ee5401 100644
--- a/src/elements/header.tsx
+++ b/src/elements/header.tsx
@@ -59,17 +59,39 @@ export function Header({ searchBar }: HeaderProps) {
>
How To Upscale
+
+ Datasets
+
{editMode && (
-
- Add Model
-
+ <>
+
+ Add Model
+
+
+ Add Dataset
+
+ >
)}
@@ -146,6 +168,7 @@ export function Header({ searchBar }: HeaderProps) {
function HeaderDrawer() {
const [showDrawer, setShowDrawer] = useState(false);
+ const { editMode } = useEditModeToggle();
return (
<>
@@ -214,6 +237,30 @@ function HeaderDrawer() {
How To Upscale
+ Browse
+
+
Datasets
+
+ {editMode && (
+ <>
+ Edit
+
+
Add Model
+
+
+
Add Dataset
+
+ >
+ )}
Links
;
readonly architectures: CollectionApi
;
readonly collections: CollectionApi;
+ readonly datasets: CollectionApi;
}
/**
diff --git a/src/lib/hooks/use-datasets.ts b/src/lib/hooks/use-datasets.ts
new file mode 100644
index 00000000..453ffc13
--- /dev/null
+++ b/src/lib/hooks/use-datasets.ts
@@ -0,0 +1,59 @@
+import { useCallback, useEffect, useMemo, useState } from 'react';
+import { Dataset, DatasetId } from '../schema';
+import { EMPTY_MAP, typedEntries } from '../util';
+import { addUpdateListener, getWebApi, startListeningForUpdates } from '../web-api';
+
+export interface UseDatasets {
+ readonly datasetData: ReadonlyMap;
+}
+
+export function useDatasets(datasets?: Readonly>): UseDatasets {
+ const staticData: ReadonlyMap = useMemo(
+ () => (datasets ? new Map(typedEntries(datasets)) : EMPTY_MAP),
+ [datasets]
+ );
+ const [data, setData] = useState(staticData);
+
+ const update = useCallback((value: ReadonlyMap): void => {
+ setData((prev) => {
+ if (prev === value) return prev;
+
+ const newData = new Map();
+ for (const [id, dataset] of value) {
+ const old = prev.get(id);
+ if (old && areEqual(old, dataset)) {
+ newData.set(id, old);
+ } else {
+ newData.set(id, dataset);
+ }
+ }
+ return newData;
+ });
+ }, []);
+
+ const updateWithWebApi = useCallback((): void => {
+ getWebApi()
+ .then(async (webApi) => {
+ if (!webApi) return;
+ const datasets = await webApi.datasets.getAll();
+ update(datasets);
+ })
+ .catch((e) => console.error(e));
+ }, [update]);
+
+ useEffect(() => {
+ update(staticData);
+ updateWithWebApi();
+ }, [update, updateWithWebApi, staticData]);
+
+ useEffect(() => {
+ startListeningForUpdates();
+ return addUpdateListener(updateWithWebApi);
+ }, [updateWithWebApi]);
+
+ return { datasetData: data };
+}
+
+function areEqual(a: Dataset, b: Dataset): boolean {
+ return JSON.stringify(a) === JSON.stringify(b);
+}
diff --git a/src/lib/hooks/use-update-dataset.ts b/src/lib/hooks/use-update-dataset.ts
new file mode 100644
index 00000000..eebde0b0
--- /dev/null
+++ b/src/lib/hooks/use-update-dataset.ts
@@ -0,0 +1,26 @@
+import { useMemo } from 'react';
+import { DBApi } from '../data-api';
+import { Dataset, DatasetId } from '../schema';
+import { noop } from '../util';
+
+export type UpdateDatasetPropertyFn = (key: K, value: Dataset[K]) => void;
+
+export interface UseUpdateDataset {
+ updateDatasetProperty: UpdateDatasetPropertyFn;
+}
+
+export function useUpdateDataset(webApi: DBApi | undefined, datasetId: DatasetId): UseUpdateDataset {
+ const updateDatasetProperty = useMemo(() => {
+ if (!webApi) return noop;
+ return (key: K, value: Dataset[K]) => {
+ const fn = async () => {
+ const dataset = await webApi.datasets.get(datasetId);
+ dataset[key] = value;
+ await webApi.datasets.update([[datasetId, dataset]]);
+ };
+ fn().catch((e) => console.error(e));
+ };
+ }, [webApi, datasetId]);
+
+ return { updateDatasetProperty };
+}
diff --git a/src/lib/schema-util.ts b/src/lib/schema-util.ts
index b31bcf42..3d5f8669 100644
--- a/src/lib/schema-util.ts
+++ b/src/lib/schema-util.ts
@@ -1,6 +1,7 @@
-import { ArchId, ModelId, TagId, UserId } from './schema';
+import { ArchId, DatasetId, ModelId, TagId, UserId } from './schema';
export const ModelIdPattern = /^\d+x-[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$/;
+export const DatasetIdPattern = /^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$/;
export const UserIdPattern = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
export const ArchIdPattern = /^[a-z0-9]+(?:-[a-z0-9]+|[+])*$/;
export const TagIdPattern = /^(?:[a-z0-9]+:)?[a-z0-9]+(?:-[a-z0-9]+|[+])*$/;
@@ -25,6 +26,9 @@ export function canonicalizeModelId(id: string): ModelId {
return id as ModelId;
}
+export function canonicalizeDatasetId(id: string): DatasetId {
+ return lowerDashes(id, /[^a-z0-9]/) as DatasetId;
+}
export function canonicalizeUserId(id: string): UserId {
return lowerDashes(id, /[^a-z0-9]/) as UserId;
}
diff --git a/src/lib/schema.ts b/src/lib/schema.ts
index 131baed4..0f54d50f 100644
--- a/src/lib/schema.ts
+++ b/src/lib/schema.ts
@@ -14,6 +14,7 @@ export type TagId = string & { readonly TagId: never };
export type TagCategoryId = string & { readonly TagCategoryId: never };
export type ArchId = string & { readonly ArchId: never };
export type CollectionId = string & { readonly CollectionId: never };
+export type DatasetId = string & { readonly DatasetId: never };
export type MarkDownString = string;
export interface Model extends Partial {
@@ -142,3 +143,14 @@ export interface Collection {
models: ModelId[];
author: UserId | UserId[];
}
+
+export interface Dataset {
+ name: string;
+ author: UserId | UserId[];
+ license: SPDXLicense | null;
+ tags: TagId[];
+ description: MarkDownString;
+ date: string;
+ url: string;
+ images?: Image[];
+}
diff --git a/src/lib/search/create.ts b/src/lib/search/create.ts
index ecd252f1..769b69be 100644
--- a/src/lib/search/create.ts
+++ b/src/lib/search/create.ts
@@ -1,5 +1,5 @@
import { deriveTags } from '../derive-tags';
-import { Model, ModelId, TagId } from '../schema';
+import { Dataset, DatasetId, Model, ModelId, TagId } from '../schema';
import { asArray } from '../util';
import { CorpusEntry, SearchIndex } from './search-index';
@@ -31,3 +31,25 @@ export function createModelSearchIndex(modelData: ReadonlyMap) {
})
);
}
+
+export function createDatasetSearchIndex(datasetData: ReadonlyMap) {
+ return new SearchIndex(
+ [...datasetData].map(([id, dataset]): CorpusEntry => {
+ return {
+ id,
+ tags: new Set(dataset.tags),
+ texts: [
+ {
+ text: [id, dataset.name].filter(Boolean).join('\n').toLowerCase(),
+ weight: 8,
+ },
+ {
+ text: asArray(dataset.author).filter(Boolean).join('\n').toLowerCase(),
+ weight: 4,
+ },
+ { text: dataset.description.toLowerCase(), weight: 1 },
+ ],
+ };
+ })
+ );
+}
diff --git a/src/lib/server/cached.ts b/src/lib/server/cached.ts
index e2e89c63..6bd31b79 100644
--- a/src/lib/server/cached.ts
+++ b/src/lib/server/cached.ts
@@ -1,4 +1,4 @@
-import { Collection, CollectionId, Model, ModelId } from '../schema';
+import { Collection, CollectionId, Dataset, DatasetId, Model, ModelId } from '../schema';
import { fileApi, getFileApiMutationCounterUnsynchronized } from './file-data';
let cachedMutationCounter = 0;
@@ -29,3 +29,10 @@ export const getCachedModels = cached((): Promise> =
export const getCachedCollections = cached(
(): Promise> => fileApi.collections.getAll()
);
+
+/**
+ * This is a cached version of `fileApi.datasets.getAll()`.
+ *
+ * The caller is not allowed to mutate the returned map or any of its values.
+ */
+export const getCachedDatasets = cached((): Promise> => fileApi.datasets.getAll());
diff --git a/src/lib/server/file-data.ts b/src/lib/server/file-data.ts
index 13873bb7..eb6c14a3 100644
--- a/src/lib/server/file-data.ts
+++ b/src/lib/server/file-data.ts
@@ -1,5 +1,5 @@
import { FSWatcher } from 'chokidar';
-import { readFile, readdir, rename, unlink, writeFile } from 'fs/promises';
+import { mkdir, readFile, readdir, rename, unlink, writeFile } from 'fs/promises';
import { join } from 'path';
import { CollectionApi, DBApi, SynchronizedCollection, notifyOnWrite } from '../data-api';
import { RWLock } from '../lock';
@@ -8,6 +8,8 @@ import {
ArchId,
Collection,
CollectionId,
+ Dataset,
+ DatasetId,
Model,
ModelId,
Tag,
@@ -22,6 +24,7 @@ import { JsonFile, fileExists } from './fs-util';
export const DATA_DIR = './data/';
const MODEL_DIR = join(DATA_DIR, 'models');
+const DATASET_DIR = join(DATA_DIR, 'datasets');
const USERS_JSON = join(DATA_DIR, 'users.json');
const TAGS_JSON = join(DATA_DIR, 'tags.json');
const TAG_CATEGORIES_JSON = join(DATA_DIR, 'tag-categories.json');
@@ -219,6 +222,101 @@ const modelApi: CollectionApi = {
},
};
+function getDatasetDataPath(id: DatasetId): string {
+ return join(DATASET_DIR, `${id}.json`);
+}
+
+async function getAllDatasetIds(): Promise {
+ if (!(await fileExists(DATASET_DIR))) {
+ await mkdir(DATASET_DIR, { recursive: true });
+ }
+ const files = await readdir(DATASET_DIR);
+ const ids = files.filter((f) => f.endsWith('.json')).map((f) => f.slice(0, -'.json'.length) as DatasetId);
+ return ids;
+}
+
+async function getSingleDatasetData(id: DatasetId): Promise {
+ const content = await readFile(getDatasetDataPath(id), 'utf-8');
+ return JSON.parse(content) as Dataset;
+}
+
+function getDatasetData(ids: readonly DatasetId[]): Promise {
+ return Promise.all(ids.map(getSingleDatasetData));
+}
+
+const datasetKeyOrder = [
+ 'name',
+ 'author',
+ 'license',
+ 'tags',
+ 'description',
+ 'date',
+ 'url',
+ 'images',
+] as const satisfies readonly (keyof Dataset)[];
+
+async function writeDatasetData(id: DatasetId, dataset: Readonly): Promise {
+ sortObjectKeys(dataset, datasetKeyOrder);
+ for (const i of dataset.images || []) {
+ sortObjectKeys(i, ['type', 'caption', 'LR', 'SR', 'url', 'thumbnail']);
+ }
+ dataset.tags.sort(compareTagId);
+ const file = getDatasetDataPath(id);
+ await writeFile(file, JSON.stringify(dataset, undefined, 4), 'utf-8');
+}
+
+const datasetApi: CollectionApi = {
+ get: getSingleDatasetData,
+ getIds: getAllDatasetIds,
+ async getAll(): Promise> {
+ const ids = await getAllDatasetIds();
+ const data = await getDatasetData(ids);
+ return new Map(ids.map((id, i) => [id, data[i]]));
+ },
+
+ async update(updates: Iterable): Promise {
+ if (!(await fileExists(DATASET_DIR))) {
+ await mkdir(DATASET_DIR, { recursive: true });
+ }
+ await Promise.all(
+ [...new Map(updates)].map(async ([id, value]) => {
+ await writeDatasetData(id, value);
+ console.warn(`Updated dataset data of ${id}`);
+ })
+ );
+ },
+ async delete(ids: Iterable): Promise {
+ await Promise.all(
+ [...ids].map(async (id) => {
+ const file = getDatasetDataPath(id);
+ if (await fileExists(file)) {
+ await unlink(file);
+ console.warn(`Delete dataset data of ${id}`);
+ } else {
+ console.warn(`Dataset data of ${id} cannot be deleted because it doesn't exist`);
+ }
+ })
+ );
+ },
+ async changeId(id: DatasetId, newId: DatasetId): Promise {
+ if (id === newId) return;
+
+ const datasetIds = await getAllDatasetIds();
+ if (!datasetIds.includes(id)) {
+ throw new Error(`Cannot change dataset id ${id} because it does not exist`);
+ }
+ if (datasetIds.includes(newId)) {
+ throw new Error(`Cannot change dataset id ${id} to ${newId} because ${newId} already exists`);
+ }
+
+ const from = getDatasetDataPath(id);
+ const to = getDatasetDataPath(newId);
+ const temp = `${to}.tmp`;
+ await rename(from, temp);
+ await rename(temp, to);
+ },
+};
+
function ofJsonFile(
file: JsonFile>,
{
@@ -378,6 +476,7 @@ export const fileApi: DBApi = {
tagCategories: wrapCollection(tagCategoryApi),
architectures: wrapCollection(archApi),
collections: wrapCollection(collectionApi),
+ datasets: wrapCollection(datasetApi),
};
export function getFileApiMutationCounter(): Promise {
@@ -389,6 +488,7 @@ export function getFileApiMutationCounterUnsynchronized(): number {
const watcher = new FSWatcher({ persistent: false, ignorePermissionErrors: true, usePolling: true });
watcher.add(MODEL_DIR);
+watcher.add(DATASET_DIR);
watcher.on('add', addMutation);
watcher.on('unlink', addMutation);
watcher.on('change', addMutation);
diff --git a/src/lib/validate-dataset.ts b/src/lib/validate-dataset.ts
new file mode 100644
index 00000000..35e64ccf
--- /dev/null
+++ b/src/lib/validate-dataset.ts
@@ -0,0 +1,27 @@
+import { DBApi } from './data-api';
+import { Dataset, DatasetId } from './schema';
+import { canonicalizeDatasetId } from './schema-util';
+import { Report } from './validate-model';
+
+export const validateDataset = (dataset: Dataset, datasetId: DatasetId, api: DBApi): Report[] => {
+ const errors: Report[] = [];
+ const report = (message: string, fix?: () => Promise) =>
+ errors.push({ message: `Dataset ${datasetId}: ${message}`, fix });
+
+ const expected = canonicalizeDatasetId(datasetId);
+ if (expected !== datasetId) {
+ report(`Dataset ID should be ${expected}`, () => api.datasets.changeId(datasetId, expected));
+ }
+
+ if (dataset.images?.some((image) => image.thumbnail)) {
+ report(`Thumbnails are automatically generated and should not appear in the database`, async () => {
+ const dataset = await api.datasets.get(datasetId);
+ for (const image of dataset.images || []) {
+ delete image.thumbnail;
+ }
+ await api.datasets.update([[datasetId, dataset]]);
+ });
+ }
+
+ return errors;
+};
diff --git a/src/lib/web-api.ts b/src/lib/web-api.ts
index 3e0911bf..be270737 100644
--- a/src/lib/web-api.ts
+++ b/src/lib/web-api.ts
@@ -6,6 +6,8 @@ import {
ArchId,
Collection,
CollectionId,
+ Dataset,
+ DatasetId,
Model,
ModelId,
Tag,
@@ -106,13 +108,14 @@ async function createMapCollection(path: string): Promise => {
if (IS_DEPLOYED) {
- const [models, users, tags, tagCategories, architectures, collections] = await Promise.all([
+ const [models, users, tags, tagCategories, architectures, collections, datasets] = await Promise.all([
createMapCollection('/api/v1/models.json'),
createMapCollection('/api/v1/users.json'),
createMapCollection('/api/v1/tags.json'),
createMapCollection('/api/v1/tagCategories.json'),
createMapCollection('/api/v1/architectures.json'),
createMapCollection('/api/v1/collections.json'),
+ createMapCollection('/api/v1/datasets.json'),
]);
return {
@@ -122,6 +125,7 @@ const getDbAPI = async (): Promise => {
tagCategories,
architectures,
collections,
+ datasets,
};
}
return {
@@ -131,6 +135,7 @@ const getDbAPI = async (): Promise => {
tagCategories: createWebCollection('/api/tag-categories'),
architectures: createWebCollection('/api/architectures'),
collections: createWebCollection('/api/collections'),
+ datasets: createWebCollection('/api/datasets'),
};
};
diff --git a/src/pages/api/datasets.ts b/src/pages/api/datasets.ts
new file mode 100644
index 00000000..d771d3e9
--- /dev/null
+++ b/src/pages/api/datasets.ts
@@ -0,0 +1,4 @@
+import { handleJsonApi } from '../../lib/server/api-impl';
+import { fileApi } from '../../lib/server/file-data';
+
+export default handleJsonApi(fileApi.datasets);
From 0a2c0e876bf6070f7411af54f2023115c07a1d16 Mon Sep 17 00:00:00 2001
From: renarchi <159624970+renarchi@users.noreply.github.com>
Date: Tue, 16 Jun 2026 01:08:36 +0300
Subject: [PATCH 3/3] fix: Remove unnecessary conditionals to resolve CI
linting warnings
---
.../components/dataset-download-button.tsx | 24 +++++++++----------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/src/elements/components/dataset-download-button.tsx b/src/elements/components/dataset-download-button.tsx
index a5bcb216..380c58c9 100644
--- a/src/elements/components/dataset-download-button.tsx
+++ b/src/elements/components/dataset-download-button.tsx
@@ -159,21 +159,19 @@ export const DatasetDownloadButton = ({ url, readonly, onChange }: DatasetDownlo
) : (
)}
- {!readonly && (
- {
- e.stopPropagation();
- if (onChange) {
- onChange('');
- }
- }}
- >
-
-
- )}
+ {
+ e.stopPropagation();
+ if (onChange) {
+ onChange('');
+ }
+ }}
+ >
+
+
)}
- {!readonly && url === '' && (
+ {url === '' && (