From 30fac74f3071f6433068af85c3e000b430f8ecd8 Mon Sep 17 00:00:00 2001 From: renarchi <159624970+renarchi@users.noreply.github.com> Date: Tue, 16 Jun 2026 00:37:58 +0300 Subject: [PATCH 1/3] feat: Add Datasets page and DIV2K --- data/tag-categories.json | 12 + data/tags.json | 16 + data/users.json | 18 + .../components/dataset-download-button.tsx | 197 ++++++++ src/elements/components/editable-tags.tsx | 39 +- src/elements/tag-selector.tsx | 47 +- src/lib/license.ts | 6 + src/lib/util.ts | 2 +- src/pages/add-dataset.tsx | 280 ++++++++++++ src/pages/datasets/[id].tsx | 426 ++++++++++++++++++ src/pages/datasets/index.tsx | 136 ++++++ 11 files changed, 1162 insertions(+), 17 deletions(-) create mode 100644 src/elements/components/dataset-download-button.tsx create mode 100644 src/pages/add-dataset.tsx create mode 100644 src/pages/datasets/[id].tsx create mode 100644 src/pages/datasets/index.tsx diff --git a/data/tag-categories.json b/data/tag-categories.json index 193cc209..678b08fd 100644 --- a/data/tag-categories.json +++ b/data/tag-categories.json @@ -162,5 +162,17 @@ "helper:invalid-scale", "helper:invalid-channels" ] + }, + "dataset": { + "name": "Dataset", + "description": "Tags specific to datasets", + "order": 9, + "simple": true, + "tags": [ + "dataset:realistic", + "dataset:anime", + "dataset:manga", + "dataset:game-textures" + ] } } \ No newline at end of file diff --git a/data/tags.json b/data/tags.json index 07535cee..00736314 100644 --- a/data/tags.json +++ b/data/tags.json @@ -292,6 +292,22 @@ "name": "RGBA", "description": "The model upscales images with transparency." }, + "dataset:anime": { + "name": "Anime", + "description": "Anime dataset" + }, + "dataset:game-textures": { + "name": "Game Textures", + "description": "Game textures dataset" + }, + "dataset:manga": { + "name": "Manga", + "description": "Manga dataset" + }, + "dataset:realistic": { + "name": "Realistic", + "description": "Realistic dataset" + }, "helper:invalid-channels": { "name": "Invalid channels", "description": "The number of input or output channels of the model is not valid." diff --git a/data/users.json b/data/users.json index deec592b..46a6889a 100644 --- a/data/users.json +++ b/data/users.json @@ -14,6 +14,9 @@ "alsa": { "name": "Alsa" }, + "andrey-ignatov": { + "name": "Andrey Ignatov" + }, "aptitude": { "name": "aptitude" }, @@ -83,6 +86,9 @@ "dinjerr": { "name": "DinJerr" }, + "eirikur-agustsson": { + "name": "Eirikur Agustsson" + }, "eula": { "name": "end user license agreement#9756" }, @@ -128,6 +134,9 @@ "jingyunliang": { "name": "JingyunLiang" }, + "jiqing-wu": { + "name": "Jiqing Wu" + }, "jixiaozhong": { "name": "jixiaozhong" }, @@ -164,6 +173,9 @@ "loinne": { "name": "Loinne" }, + "luc-van-gool": { + "name": "Luc Van Gool" + }, "lyonhrt": { "name": "LyonHrt" }, @@ -218,6 +230,9 @@ "pokepress": { "name": "pokepress" }, + "radu-timofte": { + "name": "Radu Timofte" + }, "rastrum": { "name": "Rastrum" }, @@ -248,6 +263,9 @@ "sharekhan": { "name": "SharekhaN" }, + "shuhang-gu": { + "name": "Shuhang Gu" + }, "sirosky": { "name": "Sirosky" }, diff --git a/src/elements/components/dataset-download-button.tsx b/src/elements/components/dataset-download-button.tsx new file mode 100644 index 00000000..a5bcb216 --- /dev/null +++ b/src/elements/components/dataset-download-button.tsx @@ -0,0 +1,197 @@ +import { Menu, Transition } from '@headlessui/react'; +import { Fragment } from 'react'; +import { BsChevronDown, BsFillTrashFill } from 'react-icons/bs'; +import { FiExternalLink } from 'react-icons/fi'; +import { SiDropbox, SiGithub, SiGoogledrive, SiMega, SiMicrosoftonedrive } from 'react-icons/si'; +import Logo from '../../../public/logo.svg'; +import { isSelfHosted, toDirectDownloadLink } from '../../lib/download-util'; +import { joinClasses } from '../../lib/util'; +import { Link } from './link'; + +type DatasetDownloadButtonProps = { + url: string; + readonly?: boolean; + onChange?: (url: string) => void; +}; + +const hostFromUrl = (url: string): string => { + try { + const parsedUrl = new URL(url); + const domainParts = parsedUrl.hostname.split('.'); + const domainAndTld = domainParts.slice(domainParts.length - 2).join('.'); + + if (domainAndTld === 'github.com') { + return 'GitHub'; + } + if (parsedUrl.hostname === 'drive.google.com') { + return 'Google Drive'; + } + if (parsedUrl.hostname === 'cdn.discordapp.com') { + return 'Discord'; + } + if (domainAndTld === '1drv.ms') { + return 'OneDrive'; + } + if (domainAndTld === 'mega.nz') { + return 'Mega'; + } + if (domainAndTld === 'mediafire.com') { + return 'MediaFire'; + } + if (domainAndTld === 'pcloud.link') { + return 'pCloud'; + } + if (domainAndTld === 'icedrive.net') { + return 'Icedrive'; + } + if (domainAndTld === 'dropbox.com') { + return 'Dropbox'; + } + return parsedUrl.hostname; + } catch (e) { + console.debug(e); + return 'an unknown hoster'; + } +}; + +const iconFromHost = (host: string) => { + switch (host) { + case 'GitHub': + return ; + case 'Google Drive': + return ; + case 'OneDrive': + return ; + case 'Mega': + return ; + case 'Dropbox': + return ; + default: + return ; + } +}; + +const isMirrorExternal = (url: string) => !isSelfHosted(url); + +export const DatasetDownloadButton = ({ url, readonly, onChange }: DatasetDownloadButtonProps) => { + const isExternal = isMirrorExternal(url); + const host = hostFromUrl(url); + + const showMenu = !readonly; + + return ( +
+ +
+ {isExternal ? ( + + ) : ( + + + + )} + Visit Dataset Link + {isExternal && ( +
+ Hosted by {host} +
+ )} +
+ + + {showMenu && ( + +
+ + + +
+ + +
+ {url !== '' && ( + { + const newUrl = prompt('Edit URL', url); + if (newUrl !== null && onChange) { + onChange(newUrl); + } + }} + > + {isExternal ? ( +
+
+ {iconFromHost(host)} +
+
{host}
+
+ ) : ( + + )} + {!readonly && ( + + )} +
+ )} + {!readonly && url === '' && ( + { + const newUrl = prompt('Enter a new URL'); + if (newUrl && onChange) { + onChange(newUrl); + } + }} + > + + Add URL + + )} +
+
+
+
+ )} +
+ ); +}; diff --git a/src/elements/components/editable-tags.tsx b/src/elements/components/editable-tags.tsx index ac4b0494..eefc69bd 100644 --- a/src/elements/components/editable-tags.tsx +++ b/src/elements/components/editable-tags.tsx @@ -1,6 +1,6 @@ import { Popover, Transition } from '@headlessui/react'; import Link from 'next/link'; -import { Fragment, useEffect, useState } from 'react'; +import { Fragment, useEffect, useMemo, useState } from 'react'; import { BsChevronDown } from 'react-icons/bs'; import { useTags } from '../../lib/hooks/use-tags'; import { addImpliedTags, removeImplyingTags } from '../../lib/implied-tags'; @@ -11,13 +11,20 @@ import style from './editable-tags.module.scss'; export interface SmallTagProps { tagId: TagId; name: string; + context?: 'models' | 'datasets'; } -export function SmallTag({ tagId, name }: SmallTagProps) { +export function SmallTag({ tagId, name, context = 'models' }: SmallTagProps) { return ( {name} @@ -28,14 +35,16 @@ export interface EditableTagsProps { tags: readonly TagId[]; onChange?: (value: TagId[]) => void; readonly?: boolean; + context?: 'models' | 'datasets'; } -export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) { +export function EditableTags({ tags, onChange, readonly, context = 'models' }: EditableTagsProps) { const { tagData } = useTags(); return (
{!readonly && onChange && ( @@ -44,6 +53,7 @@ export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) { const name = tagData.get(tagId)?.name ?? `unknown tag:${tagId}`; return ( void }) { +function EditTags({ + tags, + onChange, + context = 'models', +}: { + tags: readonly TagId[]; + onChange: (value: TagId[]) => void; + context?: 'models' | 'datasets'; +}) { const { tagData, categoryOrder } = useTags(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + const [currentTags, setCurrentTags] = useState(tags); useEffect(() => { setCurrentTags(tags); @@ -104,7 +129,7 @@ function EditTags({ tags, onChange }: { tags: readonly TagId[]; onChange: (value }`} >
- {categoryOrder.map(([categoryId, category]) => { + {filteredCategoryOrder.map(([categoryId, category]) => { const manual = category.tags.filter((tagId) => !isDerivedTag(tagId)); if (manual.length === 0) { return ; diff --git a/src/elements/tag-selector.tsx b/src/elements/tag-selector.tsx index 6c2621a8..48d12d0b 100644 --- a/src/elements/tag-selector.tsx +++ b/src/elements/tag-selector.tsx @@ -56,9 +56,10 @@ export type TagSelectorStyle = 'simple' | 'advanced'; export interface TagSelectorProps { selection: TagSelection; onChange: (selection: TagSelection, style: TagSelectorStyle) => void; + context?: 'models' | 'datasets'; } -export function TagSelector({ selection, onChange }: TagSelectorProps) { +export function TagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const [simple, setSimple] = useState(true); const { editMode } = useWebApi(); @@ -66,22 +67,28 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { useEffect(() => { if (simple) { - const reduced = reduceToSimple(tagCategoryData.values(), selection); + const filteredCategories = + context === 'datasets' + ? [...tagCategoryData.entries()].filter(([id]) => id === 'dataset').map(([, c]) => c) + : [...tagCategoryData.entries()].filter(([id]) => id !== 'dataset').map(([, c]) => c); + const reduced = reduceToSimple(filteredCategories, selection); if (reduced !== selection) { setSimple(false); } } - }, [simple, tagData, tagCategoryData, selection]); + }, [simple, tagData, tagCategoryData, selection, context]); return (
{simple ? ( ) : ( @@ -92,7 +99,15 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { onClick={() => { setSimple(!simple); if (!simple) { - const reduced = reduceToSimple(tagCategoryData.values(), selection); + const filteredCategories = + context === 'datasets' + ? [...tagCategoryData.entries()] + .filter(([id]) => id === 'dataset') + .map(([, c]) => c) + : [...tagCategoryData.entries()] + .filter(([id]) => id !== 'dataset') + .map(([, c]) => c); + const reduced = reduceToSimple(filteredCategories, selection); if (reduced !== selection) onChange(reduced, 'simple'); } }} @@ -128,13 +143,20 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { ); } -function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) { +function AdvancedTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const { tagData, categoryOrder } = useTags(); const { editMode } = useWebApi(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + return (
- {categoryOrder.map(([categoryId, category]) => { + {filteredCategoryOrder.map(([categoryId, category]) => { if (category.tags.length === 0 || (category.editOnly && !editMode)) return ; @@ -166,11 +188,18 @@ function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) { ); } -function SimpleTagSelector({ selection, onChange }: TagSelectorProps) { +function SimpleTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const { tagData, categoryOrder } = useTags(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + const tags = useMemo(() => { - return categoryOrder + return filteredCategoryOrder .map(([, category]) => category) .filter((category) => category.simple) .flatMap(({ tags }) => { @@ -183,7 +212,7 @@ function SimpleTagSelector({ selection, onChange }: TagSelectorProps) { }) .filter(isNonNull); }); - }, [categoryOrder, tagData]); + }, [filteredCategoryOrder, tagData]); const selected: TagId | undefined = useMemo(() => { const required = tags.filter(([tagId]) => selection.get(tagId) === SelectionState.Required); diff --git a/src/lib/license.ts b/src/lib/license.ts index 0a95e6ef..960c181c 100644 --- a/src/lib/license.ts +++ b/src/lib/license.ts @@ -21,6 +21,12 @@ const KNOWN_LICENSES_: Record = { conditions: ['include-copyright', 'state-changes'], limitations: ['liability', 'trademark-use', 'warranty'], }, + 'Academic-Research-Only': { + name: 'Academic Research Purpose Only', + permissions: ['private-use', 'modifications'], + conditions: ['include-copyright'], + limitations: ['liability', 'warranty'], + }, 'BSD-3-Clause': { name: 'BSD 3-Clause "New" or "Revised" License', permissions: ['commercial-use', 'distribution', 'modifications', 'private-use'], diff --git a/src/lib/util.ts b/src/lib/util.ts index 9608dabe..fba6a9f5 100644 --- a/src/lib/util.ts +++ b/src/lib/util.ts @@ -138,7 +138,7 @@ export function compareTagId(a: TagId, b: TagId): number { return compareString(getTagCategory(a) ?? '', getTagCategory(b) ?? '') || compareString(a, b); } export function isDerivedTag(id: TagId): boolean { - return id.includes(':'); + return id.includes(':') && !id.startsWith('dataset:'); } export function getColorMode(numberOfChannels: number) { diff --git a/src/pages/add-dataset.tsx b/src/pages/add-dataset.tsx new file mode 100644 index 00000000..16b719e8 --- /dev/null +++ b/src/pages/add-dataset.tsx @@ -0,0 +1,280 @@ +import { useRouter } from 'next/router'; +import React, { useEffect, useMemo, useState } from 'react'; +import { TextLink } from '../elements/components/link'; +import { HeadCommon } from '../elements/head-common'; +import { PageContainer } from '../elements/page'; +import { useArchitectures } from '../lib/hooks/use-architectures'; +import { useDatasets } from '../lib/hooks/use-datasets'; +import { useModels } from '../lib/hooks/use-models'; +import { useTags } from '../lib/hooks/use-tags'; +import { useWebApi } from '../lib/hooks/use-web-api'; +import { withImpliedTags } from '../lib/implied-tags'; +import { ParseResult, parseDiscordMessage } from '../lib/parse-discord-message'; +import { Dataset, DatasetId, Tag, TagId } from '../lib/schema'; +import { canonicalizeDatasetId } from '../lib/schema-util'; +import { IS_DEPLOYED } from '../lib/site-data'; + +function guessDatasetTags(name: string, description: string, tagData: ReadonlyMap): TagId[] { + const tags = new Set(); + + if (/\b(?:realistic|photo|photography|real)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:realistic' as TagId); + } + if (/\b(?:anime|cartoon)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:anime' as TagId); + } + if (/\b(?:manga)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:manga' as TagId); + } + if (/\b(?:game[- ]textures?|textures?)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:game-textures' as TagId); + } + + return withImpliedTags(tags, tagData); +} + +const EMPTY_PARSE_RESULT: ParseResult = { failed: [], parsed: {} }; + +const discordMessageTemplate = ` +**Name:** DatasetNameThatIsCreative +**License:** GNU GPL3 for example +**Link:** +**Description:** Your Description +`.trim(); + +function PageContent() { + const { datasetData } = useDatasets(); + const { modelData } = useModels(); + const { archData } = useArchitectures(); + const { tagData } = useTags(); + const router = useRouter(); + const { webApi, editMode } = useWebApi(IS_DEPLOYED); + + const [processing, setProcessing] = useState(false); + const [name, setName] = useState('Unknown'); + const [partialId, setPartialId] = useState(); + const [url, setUrl] = useState(''); + const [description, setDescription] = useState(''); + + let fullId = canonicalizeDatasetId(partialId ?? name); + const partialIdFromFull = fullId; + + const [parseMessageTemplate, setParseMessageTemplate] = useState(false); + const [messageTemplate, setMessageTemplate] = useState(''); + const parsedMessage = useMemo((): ParseResult => { + if (!parseMessageTemplate) { + return EMPTY_PARSE_RESULT; + } + return parseDiscordMessage(messageTemplate, modelData, archData); + }, [parseMessageTemplate, messageTemplate, modelData, archData]); + + useEffect(() => { + if (parsedMessage.parsed.name) { + setName(parsedMessage.parsed.name.replace(/[\s_\-]+/g, ' ')); + setPartialId(undefined); + } + }, [parsedMessage.parsed.name]); + + useEffect(() => { + if (parsedMessage.parsed.link) { + setUrl(parsedMessage.parsed.link); + } + }, [parsedMessage.parsed.link]); + + useEffect(() => { + if (parsedMessage.parsed.description) { + setDescription(parsedMessage.parsed.description); + } + }, [parsedMessage.parsed.description]); + + if (!editMode) return null; + + const addDataset = async () => { + if (datasetData.has(fullId)) { + alert(`Dataset ${fullId} already exists`); + return; + } + + const combinedDescription = [ + parsedMessage.parsed.purpose ? `Purpose: ${parsedMessage.parsed.purpose}` : '', + description || parsedMessage.parsed.description || '', + ] + .join('\n\n') + .trim(); + + const dataset: Dataset = { + name, + author: [], + license: parsedMessage.parsed.license ?? null, + tags: guessDatasetTags(name, combinedDescription, tagData), + description: combinedDescription, + date: new Date().toISOString().split('T')[0], + url, + images: [], + }; + + setProcessing(true); + + if (IS_DEPLOYED) { + sessionStorage.setItem('dummy-datasetId', fullId); + sessionStorage.setItem('dummy-dataset', JSON.stringify(dataset)); + fullId = 'OMDB_ADDDATASET_DUMMY' as DatasetId; + } + + await webApi.datasets.update([[fullId, dataset]]); + + // fetch before navigating to ensure the dataset page is available + const page = `/datasets/${fullId}`; + await fetch(page); + await router.push(`/datasets/${fullId}`); + }; + + let inputError; + if (name.trim() === '') { + inputError = 'Name cannot be empty'; + } else if (fullId.trim() === '') { + inputError = 'ID cannot be empty'; + } + + const canAddDataset = !inputError && !processing; + + return ( + <> +

Add Dataset

+
+ { + setParseMessageTemplate(e.target.checked); + }} + /> + + + {parseMessageTemplate && ( +
+

+ How to use: Paste a message from the{' '} + + model-releases + {' '} + channel (or any message following the message template).
+ To copy a message: Move your mouse over the message > click on the three dots + ("More") > Copy Text. +

+