From d3690280d8c494aadef38a6172c19e397df8689f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 15:27:24 +0000 Subject: [PATCH 1/2] fix(security): treat OSV MAL-* malware advisories as CRITICAL, escalate UNKNOWN severity to ask OSV malware advisories (OpenSSF malicious-packages, MAL-* ids) usually carry no CVSS score. The severity chain collapsed them to UNKNOWN -> NONE -> allow, so packages known to be malware were installed with a reassuring "none are above LOW severity" message. - osv.ts: any advisory with a MAL- prefixed id or alias is CRITICAL - index.ts: UNKNOWN no longer maps to NONE - decision.ts: UNKNOWN severity triggers ask instead of allow Fixes PRO-373. https://claude.ai/code/session_01QTEvxicjPZXdx2hvmvnmzH --- src/decision.test.ts | 57 ++++++++++++++++++++++++++++++++++++++++++++ src/decision.ts | 10 ++++++-- src/index.ts | 6 +++-- src/osv.test.ts | 55 ++++++++++++++++++++++++++++++++++++++++++ src/osv.ts | 12 ++++++++++ 5 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 src/decision.test.ts diff --git a/src/decision.test.ts b/src/decision.test.ts new file mode 100644 index 0000000..636d798 --- /dev/null +++ b/src/decision.test.ts @@ -0,0 +1,57 @@ +import { describe, it, expect } from 'vitest'; +import { makeDecision, makeFullDecision, type Vulnerability } from './decision.js'; + +function vuln(severity: Vulnerability['severity'], name = 'some-pkg'): Vulnerability { + return { name, severity, version: '1.0.0' }; +} + +describe('makeDecision', () => { + it('allows when there are no vulnerabilities', () => { + expect(makeDecision([]).decision).toBe('allow'); + }); + + it('denies on CRITICAL', () => { + expect(makeDecision([vuln('CRITICAL')]).decision).toBe('deny'); + }); + + it('denies on HIGH', () => { + expect(makeDecision([vuln('HIGH')]).decision).toBe('deny'); + }); + + it('asks on MODERATE', () => { + expect(makeDecision([vuln('MODERATE')]).decision).toBe('ask'); + }); + + it('asks on UNKNOWN severity instead of allowing', () => { + // Regression: advisories without a CVSS score (e.g. MAL-* malware + // entries before they are mapped to CRITICAL, or fresh unscored + // reports) must never silently pass. + const result = makeDecision([vuln('UNKNOWN')]); + expect(result.decision).toBe('ask'); + expect(result.reason).toContain('UNKNOWN severity'); + }); + + it('allows on LOW only', () => { + const result = makeDecision([vuln('LOW')]); + expect(result.decision).toBe('allow'); + }); + + it('deny takes priority over UNKNOWN in mixed results', () => { + expect(makeDecision([vuln('UNKNOWN'), vuln('CRITICAL')]).decision).toBe('deny'); + }); +}); + +describe('makeFullDecision', () => { + it('escalates UNKNOWN-severity vulnerabilities to ask without supply chain signals', () => { + const result = makeFullDecision([vuln('UNKNOWN')], []); + expect(result.decision).toBe('ask'); + }); + + it('keeps deny when supply chain signals are also present', () => { + const result = makeFullDecision([vuln('CRITICAL')], [ + { type: 'new-package', severity: 'HIGH', detail: 'Package created 2 days ago.' }, + ]); + expect(result.decision).toBe('deny'); + expect(result.reason).toContain('Package created 2 days ago.'); + }); +}); diff --git a/src/decision.ts b/src/decision.ts index 411632f..1651d7c 100644 --- a/src/decision.ts +++ b/src/decision.ts @@ -2,7 +2,7 @@ import type { SupplyChainSignal } from './registry.js'; export interface Vulnerability { name: string; - severity: 'CRITICAL' | 'HIGH' | 'MODERATE' | 'LOW' | 'NONE'; + severity: 'CRITICAL' | 'HIGH' | 'MODERATE' | 'LOW' | 'UNKNOWN' | 'NONE'; version: string; fixVersion?: string; } @@ -26,7 +26,9 @@ export function makeDecision(vulnerabilities: Vulnerability[]): DecisionResult { if (severities.includes('CRITICAL') || severities.includes('HIGH')) { decision = 'deny'; - } else if (severities.includes('MODERATE')) { + } else if (severities.includes('MODERATE') || severities.includes('UNKNOWN')) { + // UNKNOWN severity means the advisory exists but carries no score — + // never treat that as harmless; require user approval. decision = 'ask'; } @@ -34,6 +36,7 @@ export function makeDecision(vulnerabilities: Vulnerability[]): DecisionResult { const criticalCount = severities.filter(s => s === 'CRITICAL').length; const highCount = severities.filter(s => s === 'HIGH').length; const moderateCount = severities.filter(s => s === 'MODERATE').length; + const unknownCount = severities.filter(s => s === 'UNKNOWN').length; const parts = []; if (criticalCount > 0) { @@ -45,6 +48,9 @@ export function makeDecision(vulnerabilities: Vulnerability[]): DecisionResult { if (moderateCount > 0) { parts.push(`${moderateCount} MODERATE`); } + if (unknownCount > 0) { + parts.push(`${unknownCount} UNKNOWN severity`); + } const vuln = vulnerabilities[0]; const fixVersion = vuln.fixVersion ? `, recommended fix: ${vuln.fixVersion}` : ''; diff --git a/src/index.ts b/src/index.ts index cac7d40..4f730a3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -32,8 +32,10 @@ function mapSeverity(osvSeverity: OSVVulnerability['severity']): Vulnerability[' case 'HIGH': return 'HIGH'; case 'MEDIUM': return 'MODERATE'; case 'LOW': return 'LOW'; - case 'UNKNOWN': return 'NONE'; - default: return 'NONE'; + // UNKNOWN must not collapse to NONE: advisories without a CVSS score + // (common for malware and fresh reports) would silently pass as "allow". + case 'UNKNOWN': return 'UNKNOWN'; + default: return 'UNKNOWN'; } } diff --git a/src/osv.test.ts b/src/osv.test.ts index 8e5a7a6..ccdfb4f 100644 --- a/src/osv.test.ts +++ b/src/osv.test.ts @@ -35,6 +35,61 @@ function pypiResponse(version: string) { return { ok: true, status: 200, json: () => Promise.resolve({ info: { version } }) }; } +describe('checkPackageVulnerabilities — malware advisories (MAL-*)', () => { + function osvRawResponse(vulns: unknown[]) { + return { ok: true, status: 200, json: () => Promise.resolve({ vulns }) }; + } + + it('treats MAL-* advisories without any severity data as CRITICAL', async () => { + mockFetch.mockImplementation((url: string) => { + if (url === 'https://api.osv.dev/v1/query') { + // Real-world shape: MAL entries typically have no severity array + // and no database_specific.severity. + return Promise.resolve( + osvRawResponse([{ id: 'MAL-2026-1234', summary: 'Malicious code in evil-pkg (npm)' }]) + ); + } + throw new Error(`unexpected fetch: ${url}`); + }); + + const result = await checkPackageVulnerabilities('evil-pkg', '1.0.0', 'npm'); + + expect(result.status).toBe('success'); + if (result.status !== 'success') return; + expect(result.vulnerabilities).toHaveLength(1); + expect(result.vulnerabilities[0].severity).toBe('CRITICAL'); + expect(result.vulnerabilities[0].id).toBe('MAL-2026-1234'); + }); + + it('treats advisories with a MAL-* alias as CRITICAL even when the id is a GHSA', async () => { + mockFetch.mockImplementation(() => + Promise.resolve( + osvRawResponse([ + { id: 'GHSA-xxxx-yyyy-zzzz', aliases: ['MAL-2026-9999'], summary: 'malware' }, + ]) + ) + ); + + const result = await checkPackageVulnerabilities('evil-pkg', '1.0.0', 'npm'); + + expect(result.status).toBe('success'); + if (result.status !== 'success') return; + expect(result.vulnerabilities[0].severity).toBe('CRITICAL'); + }); + + it('keeps non-MAL advisories without severity data as UNKNOWN (not CRITICAL)', async () => { + mockFetch.mockImplementation(() => + Promise.resolve(osvRawResponse([{ id: 'GHSA-aaaa-bbbb-cccc', summary: 'unscored' }])) + ); + + const result = await checkPackageVulnerabilities('some-pkg', '1.0.0', 'npm'); + + expect(result.status).toBe('success'); + if (result.status !== 'success') return; + expect(result.vulnerabilities[0].severity).toBe('UNKNOWN'); + }); +}); + describe('checkPackageVulnerabilities — version resolution', () => { it('resolves npm latest from registry when no version given, then queries OSV with that version', async () => { const calls: Array<{ url: string; body?: string }> = []; diff --git a/src/osv.ts b/src/osv.ts index 78565c4..b149f2c 100644 --- a/src/osv.ts +++ b/src/osv.ts @@ -50,7 +50,19 @@ function labelFromCvssScore(scoreNum: number): Vulnerability['severity'] { return 'UNKNOWN'; } +// OpenSSF malicious-package advisories (MAL-*) identify known malware but +// usually carry no CVSS score. Check the original id and aliases, not the +// CVE-preferring chooseId() result. +function isMalwareAdvisory(v: OSVVulnerability): boolean { + if (typeof v.id === 'string' && v.id.startsWith('MAL-')) return true; + const aliases = Array.isArray(v.aliases) ? v.aliases : []; + return aliases.some(a => typeof a === 'string' && a.startsWith('MAL-')); +} + function coerceSeverity(v: OSVVulnerability): Vulnerability['severity'] { + // Known malware is always critical, regardless of missing severity data. + if (isMalwareAdvisory(v)) return 'CRITICAL'; + if (Array.isArray(v.severity) && v.severity.length > 0) { let best: Vulnerability['severity'] = 'UNKNOWN'; const order: Record = { From 9deb89f787a0d8416d407ca1a2640cf3c26985c6 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 15:35:12 +0000 Subject: [PATCH 2/2] feat(typosquat): detect near-miss names of popular npm/PyPI packages Typosquatting is the most common package fraud vector and had no direct detection. New offline check compares installed package names against an embedded curated list of popular npm/PyPI packages using bounded Damerau-Levenshtein distance (transpositions like 'lodahs' count as one edit). Near-misses raise a HIGH supply chain signal, escalating the decision to ask with a 'Did you mean ...?' hint. - distance 1 for names >= 4 chars, distance 2 only for >= 8 chars (keeps short names from false-positiving) - PyPI names are PEP 503-normalized before comparison - exact matches with popular packages are never flagged - runs fully offline: no extra latency, no new failure mode Implements PRO-374. https://claude.ai/code/session_01QTEvxicjPZXdx2hvmvnmzH --- src/data/popular-packages.ts | 138 +++++++++++++++++++++++++++++++++++ src/index.ts | 7 ++ src/registry.ts | 2 +- src/typosquat.test.ts | 97 ++++++++++++++++++++++++ src/typosquat.ts | 88 ++++++++++++++++++++++ 5 files changed, 331 insertions(+), 1 deletion(-) create mode 100644 src/data/popular-packages.ts create mode 100644 src/typosquat.test.ts create mode 100644 src/typosquat.ts diff --git a/src/data/popular-packages.ts b/src/data/popular-packages.ts new file mode 100644 index 0000000..99438ba --- /dev/null +++ b/src/data/popular-packages.ts @@ -0,0 +1,138 @@ +// Curated list of high-download packages — the targets typosquatters imitate. +// Names must be lowercase; PyPI names in PEP 503 normalized form (hyphens). +// Extend via PRs; a fetch-based update script is tracked separately. + +export const POPULAR_NPM_PACKAGES: ReadonlySet = new Set([ + // Core utilities + 'lodash', 'underscore', 'ramda', 'async', 'bluebird', 'tslib', 'core-js', + 'moment', 'dayjs', 'date-fns', 'luxon', 'uuid', 'nanoid', 'slugify', + 'semver', 'minimist', 'yargs', 'commander', 'inquirer', 'chalk', 'debug', + 'dotenv', 'classnames', 'prop-types', 'immer', 'qs', 'query-string', + 'picocolors', 'kleur', 'colors', 'color', 'ansi-colors', 'strip-ansi', + 'string-width', 'wrap-ansi', 'supports-color', 'ora', 'boxen', 'figlet', + 'execa', 'shelljs', 'cross-spawn', 'cross-env', 'concurrently', 'npm-run-all', + 'rimraf', 'mkdirp', 'glob', 'globby', 'fast-glob', 'minimatch', 'micromatch', + 'picomatch', 'ignore', 'chokidar', 'fs-extra', 'del', 'open', 'tar', + 'archiver', 'jszip', 'adm-zip', 'extract-zip', 'unzipper', + // Frameworks & view layers + 'react', 'react-dom', 'react-native', 'next', 'vue', 'nuxt', 'svelte', + 'angular', 'rxjs', 'redux', 'react-redux', 'zustand', 'mobx', 'reselect', + 'react-router', 'react-router-dom', 'expo', 'electron', 'ionic', 'cordova', + 'jquery', 'bootstrap', 'styled-components', 'tailwindcss', 'postcss', + 'autoprefixer', 'sass', 'less', 'stylus', + // Servers & networking + 'express', 'koa', 'fastify', 'hapi', 'axios', 'node-fetch', 'got', + 'superagent', 'request', 'undici', 'ws', 'socket.io', 'cors', 'body-parser', + 'cookie-parser', 'express-session', 'multer', 'formidable', 'busboy', + 'form-data', 'mime', 'mime-types', 'file-type', 'http-server', 'serve', + 'json-server', 'nodemailer', 'helmet', 'morgan', 'compression', + // Auth & crypto + 'passport', 'jsonwebtoken', 'bcrypt', 'bcryptjs', 'crypto-js', 'argon2', + // Logging + 'winston', 'pino', 'bunyan', 'log4js', 'signale', 'consola', 'loglevel', + // Databases & ORMs + 'mongoose', 'mongodb', 'mysql', 'mysql2', 'pg', 'sqlite3', 'better-sqlite3', + 'redis', 'ioredis', 'sequelize', 'typeorm', 'prisma', 'knex', 'level', + // GraphQL & validation + 'graphql', 'apollo-server', 'ajv', 'joi', 'yup', 'zod', 'validator', + 'class-validator', + // Build tools & compilers + 'typescript', 'webpack', 'webpack-cli', 'webpack-dev-server', 'vite', + 'rollup', 'esbuild', 'parcel', 'babel-loader', 'ts-node', 'tsx', 'swc', + 'terser', 'uglify-js', 'source-map', 'source-map-support', 'acorn', + // Linting & formatting + 'eslint', 'prettier', 'stylelint', 'husky', 'lint-staged', 'standard', + 'eslint-plugin-react', 'eslint-plugin-import', 'eslint-config-prettier', + // Testing + 'jest', 'vitest', 'mocha', 'chai', 'sinon', 'supertest', 'cypress', + 'playwright', 'puppeteer', 'jsdom', 'cheerio', 'karma', 'jasmine', 'ava', + 'nyc', 'c8', 'nodemon', 'faker', + // Data & files + 'yaml', 'js-yaml', 'toml', 'ini', 'xml2js', 'fast-xml-parser', 'csv-parse', + 'csv-parser', 'papaparse', 'xlsx', 'exceljs', 'pdfkit', 'pdf-lib', 'sharp', + 'jimp', 'canvas', 'marked', 'markdown-it', 'js-beautify', + // Cloud & APIs + 'aws-sdk', 'firebase', 'firebase-admin', 'stripe', 'twilio', 'openai', + 'langchain', 'ethers', 'web3', 'discord.js', 'telegraf', 'octokit', + 'simple-git', 'pm2', + // Config & misc + 'cosmiconfig', 'configstore', 'conf', 'rc', 'update-notifier', 'zx', + 'eventemitter3', 'readable-stream', 'safe-buffer', 'buffer', 'events', + 'node-gyp', 'bindings', 'node-addon-api', 'progress', 'cli-progress', + 'cli-table3', 'table', 'listr2', 'regenerator-runtime', 'whatwg-fetch', + 'isomorphic-fetch', 'abort-controller', 'path-to-regexp', 'url-parse', + 'big.js', 'decimal.js', 'mathjs', 'numeral', + // Popular scoped packages + '@babel/core', '@babel/cli', '@babel/preset-env', '@babel/preset-react', + '@babel/preset-typescript', '@babel/runtime', '@types/node', '@types/react', + '@types/react-dom', '@types/express', '@types/lodash', '@types/jest', + '@typescript-eslint/parser', '@typescript-eslint/eslint-plugin', + '@apollo/client', '@aws-sdk/client-s3', '@sendgrid/mail', '@slack/web-api', + '@octokit/rest', '@changesets/cli', '@anthropic-ai/sdk', +]); + +export const POPULAR_PYPI_PACKAGES: ReadonlySet = new Set([ + // Core / packaging + 'pip', 'setuptools', 'wheel', 'virtualenv', 'pipenv', 'poetry', 'build', + 'twine', 'packaging', 'typing-extensions', 'importlib-metadata', 'zipp', + 'filelock', 'platformdirs', 'six', 'cython', + // HTTP & networking + 'requests', 'urllib3', 'httpx', 'aiohttp', 'websockets', 'certifi', 'idna', + 'charset-normalizer', 'requests-oauthlib', 'oauthlib', + // Data science & ML + 'numpy', 'pandas', 'scipy', 'matplotlib', 'seaborn', 'scikit-learn', + 'scikit-image', 'tensorflow', 'torch', 'torchvision', 'keras', + 'transformers', 'datasets', 'tokenizers', 'huggingface-hub', 'openai', + 'anthropic', 'langchain', 'tiktoken', 'nltk', 'spacy', 'gensim', 'numba', + 'joblib', 'plotly', 'bokeh', 'streamlit', 'gradio', 'xgboost', 'lightgbm', + // Imaging & media + 'pillow', 'opencv-python', 'imageio', 'moviepy', 'yt-dlp', + // Web frameworks + 'flask', 'django', 'fastapi', 'starlette', 'uvicorn', 'gunicorn', + 'tornado', 'sanic', 'bottle', 'celery', 'jinja2', 'markupsafe', 'werkzeug', + 'itsdangerous', 'blinker', + // Databases + 'sqlalchemy', 'alembic', 'psycopg2', 'psycopg2-binary', 'pymysql', + 'mysqlclient', 'pymongo', 'motor', 'redis', 'elasticsearch', 'peewee', + 'asyncpg', + // CLI & terminal + 'click', 'typer', 'rich', 'tqdm', 'colorama', 'termcolor', 'tabulate', + 'fire', 'prompt-toolkit', + // Parsing & scraping + 'beautifulsoup4', 'lxml', 'html5lib', 'soupsieve', 'scrapy', 'selenium', + 'playwright', 'feedparser', 'markdown', 'pyyaml', 'toml', 'tomli', + 'jsonschema', 'xmltodict', 'regex', 'chardet', + // Validation & serialization + 'pydantic', 'marshmallow', 'attrs', 'cattrs', 'orjson', 'ujson', + 'simplejson', 'msgpack', 'protobuf', 'grpcio', + // Auth & crypto + 'cryptography', 'pyjwt', 'pyopenssl', 'paramiko', 'bcrypt', 'passlib', + // Testing & QA + 'pytest', 'pytest-cov', 'pytest-asyncio', 'pytest-mock', 'tox', 'nox', + 'coverage', 'hypothesis', 'faker', 'factory-boy', 'mock', 'responses', + 'freezegun', + // Linting & formatting + 'black', 'flake8', 'pylint', 'isort', 'mypy', 'ruff', 'autopep8', 'yapf', + 'bandit', 'pre-commit', + // Docs + 'sphinx', 'mkdocs', 'mkdocs-material', + // Date & time + 'python-dateutil', 'pytz', 'tzdata', 'arrow', 'pendulum', 'dateparser', + 'humanize', 'croniter', + // Files & office + 'openpyxl', 'xlrd', 'xlsxwriter', 'python-docx', 'pypdf', 'pypdf2', + 'reportlab', 'pdfminer-six', + // Cloud & APIs + 'boto3', 'botocore', 'awscli', 's3transfer', 'azure-storage-blob', + 'google-cloud-storage', 'google-api-python-client', 'firebase-admin', + 'stripe', 'twilio', 'sendgrid', 'slack-sdk', 'discord-py', 'tweepy', + 'kafka-python', 'pika', 'paho-mqtt', + // Config & env + 'python-dotenv', 'environs', 'dynaconf', 'watchdog', 'schedule', + 'apscheduler', 'loguru', 'structlog', 'sentry-sdk', 'psutil', + // Text & misc + 'python-slugify', 'unidecode', 'validators', 'phonenumbers', + 'email-validator', 'rapidfuzz', 'fuzzywuzzy', 'more-itertools', + 'cachetools', 'diskcache', 'dill', 'cloudpickle', 'jupyter', 'notebook', + 'jupyterlab', 'ipython', 'ipykernel', 'nbconvert', +]); diff --git a/src/index.ts b/src/index.ts index 4f730a3..e2ec929 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,6 +24,7 @@ import { makeDecision, makeFullDecision, type Vulnerability } from './decision.j import { parseInstallCommand } from './parser.js'; import { checkPackageVulnerabilities, type Vulnerability as OSVVulnerability, type CheckResult } from './osv.js'; import { checkRegistryMetadata, type SupplyChainSignal } from './registry.js'; +import { checkTyposquat } from './typosquat.js'; // Map OSV severity to decision engine severity function mapSeverity(osvSeverity: OSVVulnerability['severity']): Vulnerability['severity'] { @@ -212,6 +213,12 @@ async function main() { } } + // Typosquat detection runs offline against the embedded popular-package list + for (const pkg of checkablePackages) { + const squat = checkTyposquat(pkg.name, pkg.ecosystem); + if (squat) allSignals.push(squat); + } + // Make decision based on CVE vulnerabilities + supply chain signals let { decision, reason } = makeFullDecision(allVulnerabilities, allSignals); diff --git a/src/registry.ts b/src/registry.ts index 229d358..59c15ee 100644 --- a/src/registry.ts +++ b/src/registry.ts @@ -7,7 +7,7 @@ const LOW_DOWNLOAD_THRESHOLD = 100; const REGISTRY_TIMEOUT_MS = 3000; export interface SupplyChainSignal { - type: 'version-quarantine' | 'new-package' | 'low-downloads'; + type: 'version-quarantine' | 'new-package' | 'low-downloads' | 'typosquat'; severity: 'HIGH' | 'MEDIUM'; detail: string; suggestion?: string; diff --git a/src/typosquat.test.ts b/src/typosquat.test.ts new file mode 100644 index 0000000..d07ded7 --- /dev/null +++ b/src/typosquat.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect } from 'vitest'; +import { checkTyposquat, boundedEditDistance } from './typosquat.js'; + +describe('boundedEditDistance', () => { + it('computes exact distances within the bound', () => { + expect(boundedEditDistance('lodash', 'lodash', 2)).toBe(0); + expect(boundedEditDistance('lodahs', 'lodash', 2)).toBe(1); // transposition = 1 edit (Damerau) + expect(boundedEditDistance('lodas', 'lodash', 2)).toBe(1); + expect(boundedEditDistance('ldoahs', 'lodash', 2)).toBe(2); // two transpositions + }); + + it('returns max+1 when the distance exceeds the bound', () => { + expect(boundedEditDistance('completely', 'different', 2)).toBe(3); + expect(boundedEditDistance('ab', 'abcdef', 2)).toBe(3); // length diff shortcut + }); +}); + +describe('checkTyposquat — npm', () => { + it('flags a transposition typo of a popular package', () => { + const signal = checkTyposquat('lodahs', 'npm'); + expect(signal).not.toBeNull(); + expect(signal!.type).toBe('typosquat'); + expect(signal!.severity).toBe('HIGH'); + expect(signal!.detail).toContain('lodash'); + expect(signal!.suggestion).toContain('lodash'); + }); + + it('flags a single-character omission', () => { + const signal = checkTyposquat('expres', 'npm'); + expect(signal).not.toBeNull(); + expect(signal!.detail).toContain('express'); + }); + + it('does not flag the popular package itself', () => { + expect(checkTyposquat('lodash', 'npm')).toBeNull(); + expect(checkTyposquat('express', 'npm')).toBeNull(); + expect(checkTyposquat('react', 'npm')).toBeNull(); + }); + + it('does not flag popular scoped packages', () => { + expect(checkTyposquat('@types/node', 'npm')).toBeNull(); + expect(checkTyposquat('@babel/core', 'npm')).toBeNull(); + }); + + it('flags typos within a scoped package name', () => { + const signal = checkTyposquat('@typse/node', 'npm'); + expect(signal).not.toBeNull(); + expect(signal!.detail).toContain('@types/node'); + }); + + it('ignores names that are far from every popular package', () => { + expect(checkTyposquat('my-internal-company-toolkit', 'npm')).toBeNull(); + }); + + it('skips very short names entirely (too noisy)', () => { + expect(checkTyposquat('vu', 'npm')).toBeNull(); + expect(checkTyposquat('rxt', 'npm')).toBeNull(); + }); + + it('flags 1-edit variants of short popular names', () => { + // "reactt" (insertion) and "raect" (transposition) are both 1 edit from "react" + expect(checkTyposquat('reactt', 'npm')).not.toBeNull(); + expect(checkTyposquat('raect', 'npm')).not.toBeNull(); + }); + + it('does not flag 2-edit variants of names shorter than 8 chars', () => { + // distance 2 from "react", candidate too short for the 2-edit threshold + expect(checkTyposquat('raecht', 'npm')).toBeNull(); + }); +}); + +describe('checkTyposquat — pypi', () => { + it('flags a transposition typo of requests', () => { + const signal = checkTyposquat('reqeusts', 'pypi'); + expect(signal).not.toBeNull(); + expect(signal!.detail).toContain('requests'); + }); + + it('applies PEP 503 normalization before matching', () => { + // Django (capitalized) IS the popular package "django" + expect(checkTyposquat('Django', 'pypi')).toBeNull(); + // python_dateutil normalizes to python-dateutil (popular) — not a squat + expect(checkTyposquat('python_dateutil', 'pypi')).toBeNull(); + }); + + it('flags near-misses of normalized names', () => { + const signal = checkTyposquat('python-dateutils', 'pypi'); + expect(signal).not.toBeNull(); + expect(signal!.detail).toContain('python-dateutil'); + }); +}); + +describe('checkTyposquat — homebrew', () => { + it('is skipped for homebrew packages', () => { + expect(checkTyposquat('wgett', 'homebrew')).toBeNull(); + }); +}); diff --git a/src/typosquat.ts b/src/typosquat.ts new file mode 100644 index 0000000..2d39011 --- /dev/null +++ b/src/typosquat.ts @@ -0,0 +1,88 @@ +import type { Ecosystem } from './osv.js'; +import type { SupplyChainSignal } from './registry.js'; +import { POPULAR_NPM_PACKAGES, POPULAR_PYPI_PACKAGES } from './data/popular-packages.js'; + +// Names shorter than this are too noisy for edit-distance comparison. +const MIN_NAME_LENGTH = 4; +// Two edits are only meaningful on longer names; one edit on short names +// already produces false positives ("vue" vs "vie"). +const TWO_EDIT_MIN_LENGTH = 8; + +// PEP 503: PyPI treats case and -/_/. runs as equivalent. +function normalizePypiName(name: string): string { + return name.toLowerCase().replace(/[-_.]+/g, '-'); +} + +// Damerau-Levenshtein (OSA variant) distance, bounded: returns max + 1 as +// soon as the distance provably exceeds max, so the popular-list scan stays +// cheap. Transpositions count as 1 edit — they are the most common typo +// ("lodahs") and plain Levenshtein would count them as 2. +export function boundedEditDistance(a: string, b: string, max: number): number { + if (a === b) return 0; + if (Math.abs(a.length - b.length) > max) return max + 1; + + let prevPrev: number[] | undefined; + let prev = Array.from({ length: b.length + 1 }, (_, i) => i); + + for (let i = 1; i <= a.length; i++) { + const curr = new Array(b.length + 1); + curr[0] = i; + let rowMin = i; + for (let j = 1; j <= b.length; j++) { + const cost = a[i - 1] === b[j - 1] ? 0 : 1; + let d = Math.min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost); + if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) { + d = Math.min(d, prevPrev![j - 2] + 1); + } + curr[j] = d; + if (d < rowMin) rowMin = d; + } + if (rowMin > max) return max + 1; + prevPrev = prev; + prev = curr; + } + return prev[b.length]; +} + +function maxEditDistance(name: string): number { + if (name.length >= TWO_EDIT_MIN_LENGTH) return 2; + if (name.length >= MIN_NAME_LENGTH) return 1; + return 0; +} + +// Offline check: is `name` a near-miss of a popular package it is not +// identical to? Returns a supply chain signal, or null when clean. +export function checkTyposquat(name: string, ecosystem: Ecosystem): SupplyChainSignal | null { + if (ecosystem === 'homebrew') return null; + + const popular = ecosystem === 'npm' ? POPULAR_NPM_PACKAGES : POPULAR_PYPI_PACKAGES; + const candidate = ecosystem === 'pypi' ? normalizePypiName(name) : name.toLowerCase(); + + // The package IS a popular one — by definition not a squat of itself. + if (popular.has(candidate)) return null; + + const maxDist = maxEditDistance(candidate); + if (maxDist === 0) return null; + + let bestMatch: string | undefined; + let bestDist = maxDist + 1; + for (const popularName of popular) { + if (popularName.length < MIN_NAME_LENGTH) continue; + const dist = boundedEditDistance(candidate, popularName, maxDist); + if (dist < bestDist) { + bestDist = dist; + bestMatch = popularName; + if (dist === 1) break; // cannot get closer than 1 (0 is excluded above) + } + } + + if (!bestMatch || bestDist > maxDist) return null; + + const edits = bestDist === 1 ? '1 edit' : `${bestDist} edits`; + return { + type: 'typosquat', + severity: 'HIGH', + detail: `"${name}" is ${edits} away from the popular package "${bestMatch}" — possible typosquat.`, + suggestion: `Did you mean "${bestMatch}"?`, + }; +}