diff --git a/data/datasets/div2k-dataset-diverse-2k.json b/data/datasets/div2k-dataset-diverse-2k.json new file mode 100644 index 00000000..348b395d --- /dev/null +++ b/data/datasets/div2k-dataset-diverse-2k.json @@ -0,0 +1,26 @@ +{ + "name": "DIV2K dataset: DIVerse 2K", + "author": [ + "radu-timofte", + "eirikur-agustsson", + "shuhang-gu", + "jiqing-wu", + "andrey-ignatov", + "luc-van-gool" + ], + "license": "Academic-Research-Only", + "tags": [ + "dataset:realistic" + ], + "description": "The DIV2K dataset is divided into:\n\ntrain data:\nstarting from 800 high definition high resolution images we obtain corresponding low resolution images and provide both high and low resolution images for 2, 3, and 4 downscaling factors\nvalidation data: 100 high definition high resolution images are used for genereting low resolution corresponding images, the low res are provided from the beginning of the challenge and are meant for the participants to get online feedback from the validation server; the high resolution images will be released when the final phase of the challenge starts.\n\ntest data:\n100 diverse images are used to generate low resolution corresponding images; the participants will receive the low resolution images when the final evaluation phase starts and the results will be announced after the challenge is over and the winners are decided.", + "date": "2026-06-15", + "url": "https://data.vision.ee.ethz.ch/cvl/DIV2K/", + "images": [ + { + "type": "paired", + "caption": "900", + "LR": "https://i.slow.pics/KWca9Mq7.png", + "SR": "https://i.slow.pics/7P7FzpFq.png" + } + ] +} \ No newline at end of file diff --git a/data/tag-categories.json b/data/tag-categories.json index 193cc209..678b08fd 100644 --- a/data/tag-categories.json +++ b/data/tag-categories.json @@ -162,5 +162,17 @@ "helper:invalid-scale", "helper:invalid-channels" ] + }, + "dataset": { + "name": "Dataset", + "description": "Tags specific to datasets", + "order": 9, + "simple": true, + "tags": [ + "dataset:realistic", + "dataset:anime", + "dataset:manga", + "dataset:game-textures" + ] } } \ No newline at end of file diff --git a/data/tags.json b/data/tags.json index 07535cee..00736314 100644 --- a/data/tags.json +++ b/data/tags.json @@ -292,6 +292,22 @@ "name": "RGBA", "description": "The model upscales images with transparency." }, + "dataset:anime": { + "name": "Anime", + "description": "Anime dataset" + }, + "dataset:game-textures": { + "name": "Game Textures", + "description": "Game textures dataset" + }, + "dataset:manga": { + "name": "Manga", + "description": "Manga dataset" + }, + "dataset:realistic": { + "name": "Realistic", + "description": "Realistic dataset" + }, "helper:invalid-channels": { "name": "Invalid channels", "description": "The number of input or output channels of the model is not valid." diff --git a/data/users.json b/data/users.json index deec592b..46a6889a 100644 --- a/data/users.json +++ b/data/users.json @@ -14,6 +14,9 @@ "alsa": { "name": "Alsa" }, + "andrey-ignatov": { + "name": "Andrey Ignatov" + }, "aptitude": { "name": "aptitude" }, @@ -83,6 +86,9 @@ "dinjerr": { "name": "DinJerr" }, + "eirikur-agustsson": { + "name": "Eirikur Agustsson" + }, "eula": { "name": "end user license agreement#9756" }, @@ -128,6 +134,9 @@ "jingyunliang": { "name": "JingyunLiang" }, + "jiqing-wu": { + "name": "Jiqing Wu" + }, "jixiaozhong": { "name": "jixiaozhong" }, @@ -164,6 +173,9 @@ "loinne": { "name": "Loinne" }, + "luc-van-gool": { + "name": "Luc Van Gool" + }, "lyonhrt": { "name": "LyonHrt" }, @@ -218,6 +230,9 @@ "pokepress": { "name": "pokepress" }, + "radu-timofte": { + "name": "Radu Timofte" + }, "rastrum": { "name": "Rastrum" }, @@ -248,6 +263,9 @@ "sharekhan": { "name": "SharekhaN" }, + "shuhang-gu": { + "name": "Shuhang Gu" + }, "sirosky": { "name": "Sirosky" }, diff --git a/scripts/validate-db.ts b/scripts/validate-db.ts index fb1f057a..0d965613 100644 --- a/scripts/validate-db.ts +++ b/scripts/validate-db.ts @@ -1,6 +1,7 @@ import fs from 'fs/promises'; import path from 'path'; import { fileApi } from '../src/lib/server/file-data'; +import { validateDataset } from '../src/lib/validate-dataset'; import { Report, validateModel } from '../src/lib/validate-model'; const getAllFiles = async (dir: string): Promise => { @@ -23,6 +24,7 @@ const getAllFiles = async (dir: string): Promise => { const getReports = async (): Promise => { const modelData = await fileApi.models.getAll(); + const datasetData = await fileApi.datasets.getAll(); const architectureData = await fileApi.architectures.getAll(); const tagData = await fileApi.tags.getAll(); const userData = await fileApi.users.getAll(); @@ -31,6 +33,9 @@ const getReports = async (): Promise => { for (const [modelId, model] of modelData) { errors.push(...validateModel(model, modelId, modelData, architectureData, tagData, userData, fileApi)); } + for (const [datasetId, dataset] of datasetData) { + errors.push(...validateDataset(dataset, datasetId, fileApi)); + } const jsonFiles = (await getAllFiles('data/')).filter((file) => file.endsWith('.json')); await Promise.all( diff --git a/src/elements/components/dataset-card-grid.tsx b/src/elements/components/dataset-card-grid.tsx new file mode 100644 index 00000000..f87497e0 --- /dev/null +++ b/src/elements/components/dataset-card-grid.tsx @@ -0,0 +1,23 @@ +import React from 'react'; +import { Dataset, DatasetId } from '../../lib/schema'; +import { DatasetCard } from './dataset-card'; +import style from './model-card-grid.module.scss'; + +export interface DatasetCardGridProps { + datasets: readonly (readonly [DatasetId, Dataset])[]; +} + +export function DatasetCardGrid({ datasets }: DatasetCardGridProps) { + return ( +
+ {datasets.map(([id, dataset], i) => ( + = 12} + /> + ))} +
+ ); +} diff --git a/src/elements/components/dataset-card.tsx b/src/elements/components/dataset-card.tsx new file mode 100644 index 00000000..c0c09fc1 --- /dev/null +++ b/src/elements/components/dataset-card.tsx @@ -0,0 +1,205 @@ +/* eslint-disable @next/next/no-img-element */ +/* eslint-disable react/display-name */ +import React, { memo, useRef, useState } from 'react'; +import { LazyLoadComponent } from 'react-lazy-load-image-component'; +import { useDevicePixelRatio } from '../../lib/hooks/use-device-pixel-ratio'; +import { useUpdateDataset } from '../../lib/hooks/use-update-dataset'; +import { useUsers } from '../../lib/hooks/use-users'; +import { useWebApi } from '../../lib/hooks/use-web-api'; +import { joinList } from '../../lib/react-util'; +import { Dataset, DatasetId, ImageSize, PairedImage } from '../../lib/schema'; +import { asArray, assertNever, joinClasses } from '../../lib/util'; +import { EditableTags } from './editable-tags'; +import { Link } from './link'; +import style from './model-card.module.scss'; + +export interface DatasetCardProps { + id: DatasetId; + dataset: Dataset; + lazy?: boolean; +} + +const EMPTY_SIZE: ImageSize = { + height: 0, + width: 0, +}; + +function getNaturalSize(image: HTMLImageElement): ImageSize { + return { + height: image.naturalHeight, + width: image.naturalWidth, + }; +} + +const SideBySideImage = ({ datasetName, image }: { datasetName: string; image: PairedImage }) => { + const [lrDimensions, setLrDimensions] = useState(EMPTY_SIZE); + const [srDimensions, setSrDimensions] = useState(EMPTY_SIZE); + + const maxHeight = Math.max(lrDimensions.height, srDimensions.height); + const maxWidth = Math.max(lrDimensions.width, srDimensions.width); + + const lrRef = useRef(null); + const srRef = useRef(null); + + const dpr = useDevicePixelRatio(); + const scale = (1 / dpr) * Math.max(1, Math.round(dpr + 0.16)); + + return ( +
+
+ {datasetName} { + setLrDimensions(getNaturalSize(e.target as HTMLImageElement)); + }} + /> +
+
+ {datasetName} { + setSrDimensions(getNaturalSize(e.target as HTMLImageElement)); + }} + /> +
+
+ ); +}; + +const getDatasetCardImageComponent = (dataset: Dataset | undefined) => { + const image = dataset?.images?.[0]; + if (!dataset || !image) { + return
No Image
; + } + switch (image.type) { + case 'paired': { + return ( + + ); + } + case 'standalone': { + const imageSrc = image.url; + return ( + {dataset.name} + ); + } + default: + return assertNever(image); + } +}; + +const DatasetCardContent = memo(({ id, dataset }: DatasetCardProps) => { + const { userData } = useUsers(); + const { webApi, editMode } = useWebApi(); + const { updateDatasetProperty } = useUpdateDataset(webApi, id); + + const isPaired = dataset.images?.[0]?.type === 'paired' && !editMode; + + return ( +
+ + {getDatasetCardImageComponent(dataset)} + + +
+ + {dataset.name} + +
+ {'by '} + {joinList( + asArray(dataset.author).map((userId) => ( + + {userData.get(userId)?.name ?? `unknown user:${userId}`} + + )) + )} +
+ + {/* Description */} +
+ {dataset.description} +
+ + {/* Tags */} +
+ updateDatasetProperty('tags', tags)} + /> +
+
+
+ ); +}); + +export const DatasetCard = memo(({ id, dataset, lazy = false }: DatasetCardProps) => { + const { editMode } = useWebApi(); + + const inner = ( +
+ +
+ ); + + if (!lazy) return inner; + + return ( + + } + > + {inner} + + ); +}); diff --git a/src/elements/components/dataset-download-button.tsx b/src/elements/components/dataset-download-button.tsx new file mode 100644 index 00000000..380c58c9 --- /dev/null +++ b/src/elements/components/dataset-download-button.tsx @@ -0,0 +1,195 @@ +import { Menu, Transition } from '@headlessui/react'; +import { Fragment } from 'react'; +import { BsChevronDown, BsFillTrashFill } from 'react-icons/bs'; +import { FiExternalLink } from 'react-icons/fi'; +import { SiDropbox, SiGithub, SiGoogledrive, SiMega, SiMicrosoftonedrive } from 'react-icons/si'; +import Logo from '../../../public/logo.svg'; +import { isSelfHosted, toDirectDownloadLink } from '../../lib/download-util'; +import { joinClasses } from '../../lib/util'; +import { Link } from './link'; + +type DatasetDownloadButtonProps = { + url: string; + readonly?: boolean; + onChange?: (url: string) => void; +}; + +const hostFromUrl = (url: string): string => { + try { + const parsedUrl = new URL(url); + const domainParts = parsedUrl.hostname.split('.'); + const domainAndTld = domainParts.slice(domainParts.length - 2).join('.'); + + if (domainAndTld === 'github.com') { + return 'GitHub'; + } + if (parsedUrl.hostname === 'drive.google.com') { + return 'Google Drive'; + } + if (parsedUrl.hostname === 'cdn.discordapp.com') { + return 'Discord'; + } + if (domainAndTld === '1drv.ms') { + return 'OneDrive'; + } + if (domainAndTld === 'mega.nz') { + return 'Mega'; + } + if (domainAndTld === 'mediafire.com') { + return 'MediaFire'; + } + if (domainAndTld === 'pcloud.link') { + return 'pCloud'; + } + if (domainAndTld === 'icedrive.net') { + return 'Icedrive'; + } + if (domainAndTld === 'dropbox.com') { + return 'Dropbox'; + } + return parsedUrl.hostname; + } catch (e) { + console.debug(e); + return 'an unknown hoster'; + } +}; + +const iconFromHost = (host: string) => { + switch (host) { + case 'GitHub': + return ; + case 'Google Drive': + return ; + case 'OneDrive': + return ; + case 'Mega': + return ; + case 'Dropbox': + return ; + default: + return ; + } +}; + +const isMirrorExternal = (url: string) => !isSelfHosted(url); + +export const DatasetDownloadButton = ({ url, readonly, onChange }: DatasetDownloadButtonProps) => { + const isExternal = isMirrorExternal(url); + const host = hostFromUrl(url); + + const showMenu = !readonly; + + return ( +
+ +
+ {isExternal ? ( + + ) : ( + + + + )} + Visit Dataset Link + {isExternal && ( +
+ Hosted by {host} +
+ )} +
+ + + {showMenu && ( + +
+ + + +
+ + +
+ {url !== '' && ( + { + const newUrl = prompt('Edit URL', url); + if (newUrl !== null && onChange) { + onChange(newUrl); + } + }} + > + {isExternal ? ( +
+
+ {iconFromHost(host)} +
+
{host}
+
+ ) : ( + + )} + +
+ )} + {url === '' && ( + { + const newUrl = prompt('Enter a new URL'); + if (newUrl && onChange) { + onChange(newUrl); + } + }} + > + + Add URL + + )} +
+
+
+
+ )} +
+ ); +}; diff --git a/src/elements/components/dataset-results.tsx b/src/elements/components/dataset-results.tsx new file mode 100644 index 00000000..6b950dc8 --- /dev/null +++ b/src/elements/components/dataset-results.tsx @@ -0,0 +1,35 @@ +import React, { memo, useMemo } from 'react'; +import { Dataset, DatasetId } from '../../lib/schema'; +import { DatasetCardGrid } from './dataset-card-grid'; +import style from './model-results.module.scss'; + +interface DatasetResultsProps { + datasetData: ReadonlyMap; + datasets: readonly DatasetId[]; +} + +// eslint-disable-next-line react/display-name +export const DatasetResults = memo(({ datasets, datasetData }: DatasetResultsProps) => { + const dataPairs = useMemo(() => { + const pairs: (readonly [DatasetId, Dataset])[] = []; + for (const id of datasets) { + const data = datasetData.get(id); + if (data) { + pairs.push([id, data]); + } + } + return pairs; + }, [datasets, datasetData]); + + return ( + <> +
+ + Found {datasets.length} dataset + {datasets.length === 1 ? '' : 's'} + +
+ + + ); +}); diff --git a/src/elements/components/editable-tags.tsx b/src/elements/components/editable-tags.tsx index ac4b0494..eefc69bd 100644 --- a/src/elements/components/editable-tags.tsx +++ b/src/elements/components/editable-tags.tsx @@ -1,6 +1,6 @@ import { Popover, Transition } from '@headlessui/react'; import Link from 'next/link'; -import { Fragment, useEffect, useState } from 'react'; +import { Fragment, useEffect, useMemo, useState } from 'react'; import { BsChevronDown } from 'react-icons/bs'; import { useTags } from '../../lib/hooks/use-tags'; import { addImpliedTags, removeImplyingTags } from '../../lib/implied-tags'; @@ -11,13 +11,20 @@ import style from './editable-tags.module.scss'; export interface SmallTagProps { tagId: TagId; name: string; + context?: 'models' | 'datasets'; } -export function SmallTag({ tagId, name }: SmallTagProps) { +export function SmallTag({ tagId, name, context = 'models' }: SmallTagProps) { return ( {name} @@ -28,14 +35,16 @@ export interface EditableTagsProps { tags: readonly TagId[]; onChange?: (value: TagId[]) => void; readonly?: boolean; + context?: 'models' | 'datasets'; } -export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) { +export function EditableTags({ tags, onChange, readonly, context = 'models' }: EditableTagsProps) { const { tagData } = useTags(); return (
{!readonly && onChange && ( @@ -44,6 +53,7 @@ export function EditableTags({ tags, onChange, readonly }: EditableTagsProps) { const name = tagData.get(tagId)?.name ?? `unknown tag:${tagId}`; return ( void }) { +function EditTags({ + tags, + onChange, + context = 'models', +}: { + tags: readonly TagId[]; + onChange: (value: TagId[]) => void; + context?: 'models' | 'datasets'; +}) { const { tagData, categoryOrder } = useTags(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + const [currentTags, setCurrentTags] = useState(tags); useEffect(() => { setCurrentTags(tags); @@ -104,7 +129,7 @@ function EditTags({ tags, onChange }: { tags: readonly TagId[]; onChange: (value }`} >
- {categoryOrder.map(([categoryId, category]) => { + {filteredCategoryOrder.map(([categoryId, category]) => { const manual = category.tags.filter((tagId) => !isDerivedTag(tagId)); if (manual.length === 0) { return ; diff --git a/src/elements/header.tsx b/src/elements/header.tsx index 47c98147..71ee5401 100644 --- a/src/elements/header.tsx +++ b/src/elements/header.tsx @@ -59,17 +59,39 @@ export function Header({ searchBar }: HeaderProps) { > How To Upscale + + Datasets + {editMode && ( - - Add Model - + <> + + Add Model + + + Add Dataset + + )} @@ -146,6 +168,7 @@ export function Header({ searchBar }: HeaderProps) { function HeaderDrawer() { const [showDrawer, setShowDrawer] = useState(false); + const { editMode } = useEditModeToggle(); return ( <> @@ -214,6 +237,30 @@ function HeaderDrawer() { How To Upscale
+ Browse + +
Datasets
+ + {editMode && ( + <> + Edit + +
Add Model
+ + +
Add Dataset
+ + + )} Links void; + context?: 'models' | 'datasets'; } -export function TagSelector({ selection, onChange }: TagSelectorProps) { +export function TagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const [simple, setSimple] = useState(true); const { editMode } = useWebApi(); @@ -66,22 +67,28 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { useEffect(() => { if (simple) { - const reduced = reduceToSimple(tagCategoryData.values(), selection); + const filteredCategories = + context === 'datasets' + ? [...tagCategoryData.entries()].filter(([id]) => id === 'dataset').map(([, c]) => c) + : [...tagCategoryData.entries()].filter(([id]) => id !== 'dataset').map(([, c]) => c); + const reduced = reduceToSimple(filteredCategories, selection); if (reduced !== selection) { setSimple(false); } } - }, [simple, tagData, tagCategoryData, selection]); + }, [simple, tagData, tagCategoryData, selection, context]); return (
{simple ? ( ) : ( @@ -92,7 +99,15 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { onClick={() => { setSimple(!simple); if (!simple) { - const reduced = reduceToSimple(tagCategoryData.values(), selection); + const filteredCategories = + context === 'datasets' + ? [...tagCategoryData.entries()] + .filter(([id]) => id === 'dataset') + .map(([, c]) => c) + : [...tagCategoryData.entries()] + .filter(([id]) => id !== 'dataset') + .map(([, c]) => c); + const reduced = reduceToSimple(filteredCategories, selection); if (reduced !== selection) onChange(reduced, 'simple'); } }} @@ -128,13 +143,20 @@ export function TagSelector({ selection, onChange }: TagSelectorProps) { ); } -function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) { +function AdvancedTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const { tagData, categoryOrder } = useTags(); const { editMode } = useWebApi(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + return (
- {categoryOrder.map(([categoryId, category]) => { + {filteredCategoryOrder.map(([categoryId, category]) => { if (category.tags.length === 0 || (category.editOnly && !editMode)) return ; @@ -166,11 +188,18 @@ function AdvancedTagSelector({ selection, onChange }: TagSelectorProps) { ); } -function SimpleTagSelector({ selection, onChange }: TagSelectorProps) { +function SimpleTagSelector({ selection, onChange, context = 'models' }: TagSelectorProps) { const { tagData, categoryOrder } = useTags(); + const filteredCategoryOrder = useMemo(() => { + if (context === 'datasets') { + return categoryOrder.filter(([id]) => id === 'dataset'); + } + return categoryOrder.filter(([id]) => id !== 'dataset'); + }, [categoryOrder, context]); + const tags = useMemo(() => { - return categoryOrder + return filteredCategoryOrder .map(([, category]) => category) .filter((category) => category.simple) .flatMap(({ tags }) => { @@ -183,7 +212,7 @@ function SimpleTagSelector({ selection, onChange }: TagSelectorProps) { }) .filter(isNonNull); }); - }, [categoryOrder, tagData]); + }, [filteredCategoryOrder, tagData]); const selected: TagId | undefined = useMemo(() => { const required = tags.filter(([tagId]) => selection.get(tagId) === SelectionState.Required); diff --git a/src/lib/data-api.ts b/src/lib/data-api.ts index c9a6dea8..808a59fc 100644 --- a/src/lib/data-api.ts +++ b/src/lib/data-api.ts @@ -5,6 +5,8 @@ import { ArchId, Collection, CollectionId, + Dataset, + DatasetId, Model, ModelId, Tag, @@ -23,6 +25,7 @@ export interface DBApi { readonly users: CollectionApi; readonly architectures: CollectionApi; readonly collections: CollectionApi; + readonly datasets: CollectionApi; } /** diff --git a/src/lib/hooks/use-datasets.ts b/src/lib/hooks/use-datasets.ts new file mode 100644 index 00000000..453ffc13 --- /dev/null +++ b/src/lib/hooks/use-datasets.ts @@ -0,0 +1,59 @@ +import { useCallback, useEffect, useMemo, useState } from 'react'; +import { Dataset, DatasetId } from '../schema'; +import { EMPTY_MAP, typedEntries } from '../util'; +import { addUpdateListener, getWebApi, startListeningForUpdates } from '../web-api'; + +export interface UseDatasets { + readonly datasetData: ReadonlyMap; +} + +export function useDatasets(datasets?: Readonly>): UseDatasets { + const staticData: ReadonlyMap = useMemo( + () => (datasets ? new Map(typedEntries(datasets)) : EMPTY_MAP), + [datasets] + ); + const [data, setData] = useState(staticData); + + const update = useCallback((value: ReadonlyMap): void => { + setData((prev) => { + if (prev === value) return prev; + + const newData = new Map(); + for (const [id, dataset] of value) { + const old = prev.get(id); + if (old && areEqual(old, dataset)) { + newData.set(id, old); + } else { + newData.set(id, dataset); + } + } + return newData; + }); + }, []); + + const updateWithWebApi = useCallback((): void => { + getWebApi() + .then(async (webApi) => { + if (!webApi) return; + const datasets = await webApi.datasets.getAll(); + update(datasets); + }) + .catch((e) => console.error(e)); + }, [update]); + + useEffect(() => { + update(staticData); + updateWithWebApi(); + }, [update, updateWithWebApi, staticData]); + + useEffect(() => { + startListeningForUpdates(); + return addUpdateListener(updateWithWebApi); + }, [updateWithWebApi]); + + return { datasetData: data }; +} + +function areEqual(a: Dataset, b: Dataset): boolean { + return JSON.stringify(a) === JSON.stringify(b); +} diff --git a/src/lib/hooks/use-update-dataset.ts b/src/lib/hooks/use-update-dataset.ts new file mode 100644 index 00000000..eebde0b0 --- /dev/null +++ b/src/lib/hooks/use-update-dataset.ts @@ -0,0 +1,26 @@ +import { useMemo } from 'react'; +import { DBApi } from '../data-api'; +import { Dataset, DatasetId } from '../schema'; +import { noop } from '../util'; + +export type UpdateDatasetPropertyFn = (key: K, value: Dataset[K]) => void; + +export interface UseUpdateDataset { + updateDatasetProperty: UpdateDatasetPropertyFn; +} + +export function useUpdateDataset(webApi: DBApi | undefined, datasetId: DatasetId): UseUpdateDataset { + const updateDatasetProperty = useMemo(() => { + if (!webApi) return noop; + return (key: K, value: Dataset[K]) => { + const fn = async () => { + const dataset = await webApi.datasets.get(datasetId); + dataset[key] = value; + await webApi.datasets.update([[datasetId, dataset]]); + }; + fn().catch((e) => console.error(e)); + }; + }, [webApi, datasetId]); + + return { updateDatasetProperty }; +} diff --git a/src/lib/license.ts b/src/lib/license.ts index 0a95e6ef..960c181c 100644 --- a/src/lib/license.ts +++ b/src/lib/license.ts @@ -21,6 +21,12 @@ const KNOWN_LICENSES_: Record = { conditions: ['include-copyright', 'state-changes'], limitations: ['liability', 'trademark-use', 'warranty'], }, + 'Academic-Research-Only': { + name: 'Academic Research Purpose Only', + permissions: ['private-use', 'modifications'], + conditions: ['include-copyright'], + limitations: ['liability', 'warranty'], + }, 'BSD-3-Clause': { name: 'BSD 3-Clause "New" or "Revised" License', permissions: ['commercial-use', 'distribution', 'modifications', 'private-use'], diff --git a/src/lib/schema-util.ts b/src/lib/schema-util.ts index b31bcf42..3d5f8669 100644 --- a/src/lib/schema-util.ts +++ b/src/lib/schema-util.ts @@ -1,6 +1,7 @@ -import { ArchId, ModelId, TagId, UserId } from './schema'; +import { ArchId, DatasetId, ModelId, TagId, UserId } from './schema'; export const ModelIdPattern = /^\d+x-[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$/; +export const DatasetIdPattern = /^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*$/; export const UserIdPattern = /^[a-z0-9]+(?:-[a-z0-9]+)*$/; export const ArchIdPattern = /^[a-z0-9]+(?:-[a-z0-9]+|[+])*$/; export const TagIdPattern = /^(?:[a-z0-9]+:)?[a-z0-9]+(?:-[a-z0-9]+|[+])*$/; @@ -25,6 +26,9 @@ export function canonicalizeModelId(id: string): ModelId { return id as ModelId; } +export function canonicalizeDatasetId(id: string): DatasetId { + return lowerDashes(id, /[^a-z0-9]/) as DatasetId; +} export function canonicalizeUserId(id: string): UserId { return lowerDashes(id, /[^a-z0-9]/) as UserId; } diff --git a/src/lib/schema.ts b/src/lib/schema.ts index 131baed4..0f54d50f 100644 --- a/src/lib/schema.ts +++ b/src/lib/schema.ts @@ -14,6 +14,7 @@ export type TagId = string & { readonly TagId: never }; export type TagCategoryId = string & { readonly TagCategoryId: never }; export type ArchId = string & { readonly ArchId: never }; export type CollectionId = string & { readonly CollectionId: never }; +export type DatasetId = string & { readonly DatasetId: never }; export type MarkDownString = string; export interface Model extends Partial { @@ -142,3 +143,14 @@ export interface Collection { models: ModelId[]; author: UserId | UserId[]; } + +export interface Dataset { + name: string; + author: UserId | UserId[]; + license: SPDXLicense | null; + tags: TagId[]; + description: MarkDownString; + date: string; + url: string; + images?: Image[]; +} diff --git a/src/lib/search/create.ts b/src/lib/search/create.ts index ecd252f1..769b69be 100644 --- a/src/lib/search/create.ts +++ b/src/lib/search/create.ts @@ -1,5 +1,5 @@ import { deriveTags } from '../derive-tags'; -import { Model, ModelId, TagId } from '../schema'; +import { Dataset, DatasetId, Model, ModelId, TagId } from '../schema'; import { asArray } from '../util'; import { CorpusEntry, SearchIndex } from './search-index'; @@ -31,3 +31,25 @@ export function createModelSearchIndex(modelData: ReadonlyMap) { }) ); } + +export function createDatasetSearchIndex(datasetData: ReadonlyMap) { + return new SearchIndex( + [...datasetData].map(([id, dataset]): CorpusEntry => { + return { + id, + tags: new Set(dataset.tags), + texts: [ + { + text: [id, dataset.name].filter(Boolean).join('\n').toLowerCase(), + weight: 8, + }, + { + text: asArray(dataset.author).filter(Boolean).join('\n').toLowerCase(), + weight: 4, + }, + { text: dataset.description.toLowerCase(), weight: 1 }, + ], + }; + }) + ); +} diff --git a/src/lib/server/cached.ts b/src/lib/server/cached.ts index e2e89c63..6bd31b79 100644 --- a/src/lib/server/cached.ts +++ b/src/lib/server/cached.ts @@ -1,4 +1,4 @@ -import { Collection, CollectionId, Model, ModelId } from '../schema'; +import { Collection, CollectionId, Dataset, DatasetId, Model, ModelId } from '../schema'; import { fileApi, getFileApiMutationCounterUnsynchronized } from './file-data'; let cachedMutationCounter = 0; @@ -29,3 +29,10 @@ export const getCachedModels = cached((): Promise> = export const getCachedCollections = cached( (): Promise> => fileApi.collections.getAll() ); + +/** + * This is a cached version of `fileApi.datasets.getAll()`. + * + * The caller is not allowed to mutate the returned map or any of its values. + */ +export const getCachedDatasets = cached((): Promise> => fileApi.datasets.getAll()); diff --git a/src/lib/server/file-data.ts b/src/lib/server/file-data.ts index 13873bb7..eb6c14a3 100644 --- a/src/lib/server/file-data.ts +++ b/src/lib/server/file-data.ts @@ -1,5 +1,5 @@ import { FSWatcher } from 'chokidar'; -import { readFile, readdir, rename, unlink, writeFile } from 'fs/promises'; +import { mkdir, readFile, readdir, rename, unlink, writeFile } from 'fs/promises'; import { join } from 'path'; import { CollectionApi, DBApi, SynchronizedCollection, notifyOnWrite } from '../data-api'; import { RWLock } from '../lock'; @@ -8,6 +8,8 @@ import { ArchId, Collection, CollectionId, + Dataset, + DatasetId, Model, ModelId, Tag, @@ -22,6 +24,7 @@ import { JsonFile, fileExists } from './fs-util'; export const DATA_DIR = './data/'; const MODEL_DIR = join(DATA_DIR, 'models'); +const DATASET_DIR = join(DATA_DIR, 'datasets'); const USERS_JSON = join(DATA_DIR, 'users.json'); const TAGS_JSON = join(DATA_DIR, 'tags.json'); const TAG_CATEGORIES_JSON = join(DATA_DIR, 'tag-categories.json'); @@ -219,6 +222,101 @@ const modelApi: CollectionApi = { }, }; +function getDatasetDataPath(id: DatasetId): string { + return join(DATASET_DIR, `${id}.json`); +} + +async function getAllDatasetIds(): Promise { + if (!(await fileExists(DATASET_DIR))) { + await mkdir(DATASET_DIR, { recursive: true }); + } + const files = await readdir(DATASET_DIR); + const ids = files.filter((f) => f.endsWith('.json')).map((f) => f.slice(0, -'.json'.length) as DatasetId); + return ids; +} + +async function getSingleDatasetData(id: DatasetId): Promise { + const content = await readFile(getDatasetDataPath(id), 'utf-8'); + return JSON.parse(content) as Dataset; +} + +function getDatasetData(ids: readonly DatasetId[]): Promise { + return Promise.all(ids.map(getSingleDatasetData)); +} + +const datasetKeyOrder = [ + 'name', + 'author', + 'license', + 'tags', + 'description', + 'date', + 'url', + 'images', +] as const satisfies readonly (keyof Dataset)[]; + +async function writeDatasetData(id: DatasetId, dataset: Readonly): Promise { + sortObjectKeys(dataset, datasetKeyOrder); + for (const i of dataset.images || []) { + sortObjectKeys(i, ['type', 'caption', 'LR', 'SR', 'url', 'thumbnail']); + } + dataset.tags.sort(compareTagId); + const file = getDatasetDataPath(id); + await writeFile(file, JSON.stringify(dataset, undefined, 4), 'utf-8'); +} + +const datasetApi: CollectionApi = { + get: getSingleDatasetData, + getIds: getAllDatasetIds, + async getAll(): Promise> { + const ids = await getAllDatasetIds(); + const data = await getDatasetData(ids); + return new Map(ids.map((id, i) => [id, data[i]])); + }, + + async update(updates: Iterable): Promise { + if (!(await fileExists(DATASET_DIR))) { + await mkdir(DATASET_DIR, { recursive: true }); + } + await Promise.all( + [...new Map(updates)].map(async ([id, value]) => { + await writeDatasetData(id, value); + console.warn(`Updated dataset data of ${id}`); + }) + ); + }, + async delete(ids: Iterable): Promise { + await Promise.all( + [...ids].map(async (id) => { + const file = getDatasetDataPath(id); + if (await fileExists(file)) { + await unlink(file); + console.warn(`Delete dataset data of ${id}`); + } else { + console.warn(`Dataset data of ${id} cannot be deleted because it doesn't exist`); + } + }) + ); + }, + async changeId(id: DatasetId, newId: DatasetId): Promise { + if (id === newId) return; + + const datasetIds = await getAllDatasetIds(); + if (!datasetIds.includes(id)) { + throw new Error(`Cannot change dataset id ${id} because it does not exist`); + } + if (datasetIds.includes(newId)) { + throw new Error(`Cannot change dataset id ${id} to ${newId} because ${newId} already exists`); + } + + const from = getDatasetDataPath(id); + const to = getDatasetDataPath(newId); + const temp = `${to}.tmp`; + await rename(from, temp); + await rename(temp, to); + }, +}; + function ofJsonFile( file: JsonFile>, { @@ -378,6 +476,7 @@ export const fileApi: DBApi = { tagCategories: wrapCollection(tagCategoryApi), architectures: wrapCollection(archApi), collections: wrapCollection(collectionApi), + datasets: wrapCollection(datasetApi), }; export function getFileApiMutationCounter(): Promise { @@ -389,6 +488,7 @@ export function getFileApiMutationCounterUnsynchronized(): number { const watcher = new FSWatcher({ persistent: false, ignorePermissionErrors: true, usePolling: true }); watcher.add(MODEL_DIR); +watcher.add(DATASET_DIR); watcher.on('add', addMutation); watcher.on('unlink', addMutation); watcher.on('change', addMutation); diff --git a/src/lib/util.ts b/src/lib/util.ts index 9608dabe..fba6a9f5 100644 --- a/src/lib/util.ts +++ b/src/lib/util.ts @@ -138,7 +138,7 @@ export function compareTagId(a: TagId, b: TagId): number { return compareString(getTagCategory(a) ?? '', getTagCategory(b) ?? '') || compareString(a, b); } export function isDerivedTag(id: TagId): boolean { - return id.includes(':'); + return id.includes(':') && !id.startsWith('dataset:'); } export function getColorMode(numberOfChannels: number) { diff --git a/src/lib/validate-dataset.ts b/src/lib/validate-dataset.ts new file mode 100644 index 00000000..35e64ccf --- /dev/null +++ b/src/lib/validate-dataset.ts @@ -0,0 +1,27 @@ +import { DBApi } from './data-api'; +import { Dataset, DatasetId } from './schema'; +import { canonicalizeDatasetId } from './schema-util'; +import { Report } from './validate-model'; + +export const validateDataset = (dataset: Dataset, datasetId: DatasetId, api: DBApi): Report[] => { + const errors: Report[] = []; + const report = (message: string, fix?: () => Promise) => + errors.push({ message: `Dataset ${datasetId}: ${message}`, fix }); + + const expected = canonicalizeDatasetId(datasetId); + if (expected !== datasetId) { + report(`Dataset ID should be ${expected}`, () => api.datasets.changeId(datasetId, expected)); + } + + if (dataset.images?.some((image) => image.thumbnail)) { + report(`Thumbnails are automatically generated and should not appear in the database`, async () => { + const dataset = await api.datasets.get(datasetId); + for (const image of dataset.images || []) { + delete image.thumbnail; + } + await api.datasets.update([[datasetId, dataset]]); + }); + } + + return errors; +}; diff --git a/src/lib/web-api.ts b/src/lib/web-api.ts index 3e0911bf..be270737 100644 --- a/src/lib/web-api.ts +++ b/src/lib/web-api.ts @@ -6,6 +6,8 @@ import { ArchId, Collection, CollectionId, + Dataset, + DatasetId, Model, ModelId, Tag, @@ -106,13 +108,14 @@ async function createMapCollection(path: string): Promise => { if (IS_DEPLOYED) { - const [models, users, tags, tagCategories, architectures, collections] = await Promise.all([ + const [models, users, tags, tagCategories, architectures, collections, datasets] = await Promise.all([ createMapCollection('/api/v1/models.json'), createMapCollection('/api/v1/users.json'), createMapCollection('/api/v1/tags.json'), createMapCollection('/api/v1/tagCategories.json'), createMapCollection('/api/v1/architectures.json'), createMapCollection('/api/v1/collections.json'), + createMapCollection('/api/v1/datasets.json'), ]); return { @@ -122,6 +125,7 @@ const getDbAPI = async (): Promise => { tagCategories, architectures, collections, + datasets, }; } return { @@ -131,6 +135,7 @@ const getDbAPI = async (): Promise => { tagCategories: createWebCollection('/api/tag-categories'), architectures: createWebCollection('/api/architectures'), collections: createWebCollection('/api/collections'), + datasets: createWebCollection('/api/datasets'), }; }; diff --git a/src/pages/add-dataset.tsx b/src/pages/add-dataset.tsx new file mode 100644 index 00000000..16b719e8 --- /dev/null +++ b/src/pages/add-dataset.tsx @@ -0,0 +1,280 @@ +import { useRouter } from 'next/router'; +import React, { useEffect, useMemo, useState } from 'react'; +import { TextLink } from '../elements/components/link'; +import { HeadCommon } from '../elements/head-common'; +import { PageContainer } from '../elements/page'; +import { useArchitectures } from '../lib/hooks/use-architectures'; +import { useDatasets } from '../lib/hooks/use-datasets'; +import { useModels } from '../lib/hooks/use-models'; +import { useTags } from '../lib/hooks/use-tags'; +import { useWebApi } from '../lib/hooks/use-web-api'; +import { withImpliedTags } from '../lib/implied-tags'; +import { ParseResult, parseDiscordMessage } from '../lib/parse-discord-message'; +import { Dataset, DatasetId, Tag, TagId } from '../lib/schema'; +import { canonicalizeDatasetId } from '../lib/schema-util'; +import { IS_DEPLOYED } from '../lib/site-data'; + +function guessDatasetTags(name: string, description: string, tagData: ReadonlyMap): TagId[] { + const tags = new Set(); + + if (/\b(?:realistic|photo|photography|real)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:realistic' as TagId); + } + if (/\b(?:anime|cartoon)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:anime' as TagId); + } + if (/\b(?:manga)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:manga' as TagId); + } + if (/\b(?:game[- ]textures?|textures?)\b/i.test(`${name} ${description}`)) { + tags.add('dataset:game-textures' as TagId); + } + + return withImpliedTags(tags, tagData); +} + +const EMPTY_PARSE_RESULT: ParseResult = { failed: [], parsed: {} }; + +const discordMessageTemplate = ` +**Name:** DatasetNameThatIsCreative +**License:** GNU GPL3 for example +**Link:** +**Description:** Your Description +`.trim(); + +function PageContent() { + const { datasetData } = useDatasets(); + const { modelData } = useModels(); + const { archData } = useArchitectures(); + const { tagData } = useTags(); + const router = useRouter(); + const { webApi, editMode } = useWebApi(IS_DEPLOYED); + + const [processing, setProcessing] = useState(false); + const [name, setName] = useState('Unknown'); + const [partialId, setPartialId] = useState(); + const [url, setUrl] = useState(''); + const [description, setDescription] = useState(''); + + let fullId = canonicalizeDatasetId(partialId ?? name); + const partialIdFromFull = fullId; + + const [parseMessageTemplate, setParseMessageTemplate] = useState(false); + const [messageTemplate, setMessageTemplate] = useState(''); + const parsedMessage = useMemo((): ParseResult => { + if (!parseMessageTemplate) { + return EMPTY_PARSE_RESULT; + } + return parseDiscordMessage(messageTemplate, modelData, archData); + }, [parseMessageTemplate, messageTemplate, modelData, archData]); + + useEffect(() => { + if (parsedMessage.parsed.name) { + setName(parsedMessage.parsed.name.replace(/[\s_\-]+/g, ' ')); + setPartialId(undefined); + } + }, [parsedMessage.parsed.name]); + + useEffect(() => { + if (parsedMessage.parsed.link) { + setUrl(parsedMessage.parsed.link); + } + }, [parsedMessage.parsed.link]); + + useEffect(() => { + if (parsedMessage.parsed.description) { + setDescription(parsedMessage.parsed.description); + } + }, [parsedMessage.parsed.description]); + + if (!editMode) return null; + + const addDataset = async () => { + if (datasetData.has(fullId)) { + alert(`Dataset ${fullId} already exists`); + return; + } + + const combinedDescription = [ + parsedMessage.parsed.purpose ? `Purpose: ${parsedMessage.parsed.purpose}` : '', + description || parsedMessage.parsed.description || '', + ] + .join('\n\n') + .trim(); + + const dataset: Dataset = { + name, + author: [], + license: parsedMessage.parsed.license ?? null, + tags: guessDatasetTags(name, combinedDescription, tagData), + description: combinedDescription, + date: new Date().toISOString().split('T')[0], + url, + images: [], + }; + + setProcessing(true); + + if (IS_DEPLOYED) { + sessionStorage.setItem('dummy-datasetId', fullId); + sessionStorage.setItem('dummy-dataset', JSON.stringify(dataset)); + fullId = 'OMDB_ADDDATASET_DUMMY' as DatasetId; + } + + await webApi.datasets.update([[fullId, dataset]]); + + // fetch before navigating to ensure the dataset page is available + const page = `/datasets/${fullId}`; + await fetch(page); + await router.push(`/datasets/${fullId}`); + }; + + let inputError; + if (name.trim() === '') { + inputError = 'Name cannot be empty'; + } else if (fullId.trim() === '') { + inputError = 'ID cannot be empty'; + } + + const canAddDataset = !inputError && !processing; + + return ( + <> +

Add Dataset

+
+ { + setParseMessageTemplate(e.target.checked); + }} + /> + + + {parseMessageTemplate && ( +
+

+ How to use: Paste a message from the{' '} + + model-releases + {' '} + channel (or any message following the message template).
+ To copy a message: Move your mouse over the message > click on the three dots + ("More") > Copy Text. +

+