From d239d633b417ac6d917f6ed94b3a96213663a09a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 25 May 2026 17:56:51 +0000 Subject: [PATCH 1/4] feat: add secret engine core --- package-lock.json | 18 +++ package.json | 3 + src/cli.ts | 14 +- src/hooks/preCommit.ts | 8 +- src/hooks/scanWorker.ts | 4 +- src/rules/index.ts | 9 -- src/scan/aspects/code.ts | 11 +- src/scan/parseOptions.ts | 140 ++++++++++++++++++-- src/scan/runner.ts | 7 +- src/scan/secret/builtinRules.ts | 104 +++++++++++++++ src/scan/secret/cache.ts | 64 +++++++++ src/scan/secret/config.ts | 125 ++++++++++++++++++ src/scan/secret/engine.ts | 136 +++++++++++++++++++ src/scan/secret/entropy.ts | 66 ++++++++++ src/scan/secret/remoteRules.ts | 43 ++++++ src/scan/secret/ruleLoader.ts | 218 +++++++++++++++++++++++++++++++ src/scan/secret/types.ts | 42 ++++++ src/scan/types.ts | 6 +- src/scanner.ts | 59 +++++++-- src/types.ts | 7 + tests/cliStrings.test.ts | 2 + tests/scanOptions.test.ts | 60 ++++++++- tests/scanner.test.ts | 6 +- tests/secretEngine.cache.test.ts | 57 ++++++++ tests/secretEngine.rules.test.ts | 47 +++++++ tests/secretsExamples.test.ts | 26 +++- 26 files changed, 1224 insertions(+), 58 deletions(-) create mode 100644 src/scan/secret/builtinRules.ts create mode 100644 src/scan/secret/cache.ts create mode 100644 src/scan/secret/config.ts create mode 100644 src/scan/secret/engine.ts create mode 100644 src/scan/secret/entropy.ts create mode 100644 src/scan/secret/remoteRules.ts create mode 100644 src/scan/secret/ruleLoader.ts create mode 100644 src/scan/secret/types.ts create mode 100644 tests/secretEngine.cache.test.ts create mode 100644 tests/secretEngine.rules.test.ts diff --git a/package-lock.json b/package-lock.json index 030fe8c..7e2bda9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,9 @@ "name": "codefence", "version": "0.1.0", "license": "ISC", + "dependencies": { + "yaml": "^2.9.0" + }, "bin": { "codefence": "dist/src/cli.js" }, @@ -49,6 +52,21 @@ "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==", "dev": true, "license": "MIT" + }, + "node_modules/yaml": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.9.0.tgz", + "integrity": "sha512-2AvhNX3mb8zd6Zy7INTtSpl1F15HW6Wnqj0srWlkKLcpYl/gMIMJiyuGq2KeI2YFxUPjdlB+3Lc10seMLtL4cA==", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } } } } diff --git a/package.json b/package.json index 2bf5c27..83c12f7 100644 --- a/package.json +++ b/package.json @@ -53,5 +53,8 @@ }, "bin": { "codefence": "dist/src/cli.js" + }, + "dependencies": { + "yaml": "^2.9.0" } } diff --git a/src/cli.ts b/src/cli.ts index 48faad0..273a561 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -67,7 +67,7 @@ Run \`${cliInvocation("scan", "--help")}\` for scan options. `); } -function main() { +async function main() { const [command, ...rest] = process.argv.slice(2); if (command === "install") { @@ -79,7 +79,7 @@ function main() { } if (command === "pre-commit") { - process.exit(runPreCommit()); + process.exit(await runPreCommit()); } if (command === "background-scan") { @@ -97,7 +97,7 @@ function main() { console.error(`Usage: ${cliInvocation("scan-worker", "--target [--workspace ]")}`); process.exit(1); } - process.exit(runScanWorker(options)); + process.exit(await runScanWorker(options)); } if (command === "check-deps") { @@ -124,7 +124,11 @@ function main() { process.exit(0); } - process.exit(runScan(parsed)); + process.exit(await runScan(parsed)); } -main(); +void main().catch((error) => { + const message = error instanceof Error ? error.message : String(error); + console.error(message); + process.exit(1); +}); diff --git a/src/hooks/preCommit.ts b/src/hooks/preCommit.ts index f9360fb..6547637 100644 --- a/src/hooks/preCommit.ts +++ b/src/hooks/preCommit.ts @@ -1,5 +1,6 @@ import { getChangedFiles } from "../git"; import { runScan } from "../scan/runner"; +import { defaultSecretScanOptions } from "../scan/secret/config"; import { shouldScanFile } from "../scanner"; import { countCodeCacheHits } from "./cache"; @@ -10,7 +11,7 @@ function header(title: string): void { console.log(`\n=== ${title} ===\n`); } -export function runPreCommit(): number { +export async function runPreCommit(): Promise { const start = Date.now(); try { @@ -38,11 +39,12 @@ export function runPreCommit(): number { console.log(`Staged code files: ${codeFiles.length}`); } - const exitCode = runScan({ + const exitCode = await runScan({ staged: true, paths: [], only: null, - skip: [] + skip: [], + secret: defaultSecretScanOptions() }); const elapsed = ((Date.now() - start) / 1000).toFixed(1); diff --git a/src/hooks/scanWorker.ts b/src/hooks/scanWorker.ts index 37ae91a..a61c9c1 100644 --- a/src/hooks/scanWorker.ts +++ b/src/hooks/scanWorker.ts @@ -9,7 +9,7 @@ export interface ScanWorkerOptions { target: string; } -export function runScanWorker(options: ScanWorkerOptions): number { +export async function runScanWorker(options: ScanWorkerOptions): Promise { const workspace = path.resolve(options.workspace); const target = options.target; @@ -29,7 +29,7 @@ export function runScanWorker(options: ScanWorkerOptions): number { return 1; } - const findings = scanFile(absPath); + const findings = await scanFile(absPath, { workspace }); writeCodeCache(workspace, rel, findings); if (findings.length > 0) { diff --git a/src/rules/index.ts b/src/rules/index.ts index 58ea64f..dfb201f 100644 --- a/src/rules/index.ts +++ b/src/rules/index.ts @@ -1,15 +1,6 @@ import { Rule } from "../types"; -const secretRegex = /(api[_-]?key|secret|token|password)\s*[:=]\s*["'][^"']{8,}["']/i; - const baseRules: Rule[] = [ - { - id: "no-hardcoded-secret", - description: "Flags likely hardcoded secrets", - severity: "high", - test: (line) => secretRegex.test(line), - message: "Potential hardcoded secret detected" - }, { id: "no-eval", description: "Disallow dynamic code execution", diff --git a/src/scan/aspects/code.ts b/src/scan/aspects/code.ts index 923e425..94010d3 100644 --- a/src/scan/aspects/code.ts +++ b/src/scan/aspects/code.ts @@ -5,7 +5,7 @@ import { AspectOutcome, ScanAspect, ScanContext } from "../types"; export const codeAspect: ScanAspect = { id: "code", label: "Local secure-coding rules", - run(context: ScanContext): AspectOutcome { + async run(context: ScanContext): Promise { const sourceFiles = context.files .filter((file) => shouldScanFile(file, { cwd: context.cwd, allowIgnored: context.explicitPaths }) @@ -21,7 +21,10 @@ export const codeAspect: ScanAspect = { }; } - const findings = scanFiles(sourceFiles); + const findings = await scanFiles(sourceFiles, { + workspace: context.cwd, + secret: context.options.secret + }); if (findings.length === 0) { console.log(`[code] No findings in ${sourceFiles.length} file(s).`); @@ -30,8 +33,10 @@ export const codeAspect: ScanAspect = { console.error(`[code] ${findings.length} finding(s):`); for (const finding of findings) { + const confidence = finding.confidence ? ` confidence=${finding.confidence}` : ""; + const evidence = finding.evidence ? ` evidence=${finding.evidence}` : ""; console.error( - ` ${finding.severity.toUpperCase()} ${finding.ruleId} ${finding.filePath}:${finding.line} - ${finding.message}` + ` ${finding.severity.toUpperCase()} ${finding.ruleId} ${finding.filePath}:${finding.line}${confidence} - ${finding.message}${evidence}` ); } diff --git a/src/scan/parseOptions.ts b/src/scan/parseOptions.ts index 8e99c4e..e2e32bb 100644 --- a/src/scan/parseOptions.ts +++ b/src/scan/parseOptions.ts @@ -1,5 +1,12 @@ import { cliInvocation } from "../cliName"; import { formatGitScanIgnoredPrefixes } from "./ignorePaths"; +import { + defaultSecretScanOptions, + parseConfidenceLevel, + parseDurationMs, + parsePositiveNumber +} from "./secret/config"; +import { SecretScanOptions } from "./secret/types"; import { AspectId, DEFAULT_ASPECTS, ScanOptions } from "./types"; const ASPECT_ALIASES: Record = { @@ -77,6 +84,40 @@ function collectPaths(argv: string[]): { paths: string[]; rest: string[] } { return { paths, rest }; } +function collectFlagValues(argv: string[], flag: string): { values: string[]; rest: string[] } { + const values: string[] = []; + const rest: string[] = []; + let i = 0; + + while (i < argv.length) { + const arg = argv[i]; + if (arg === flag) { + i++; + if (i >= argv.length || argv[i].startsWith("--")) { + throw new Error(`${flag} requires at least one value`); + } + while (i < argv.length && !argv[i].startsWith("--")) { + values.push(argv[i]); + i++; + } + continue; + } + if (arg.startsWith(`${flag}=`)) { + const eqValue = arg.slice(flag.length + 1).trim(); + if (!eqValue) { + throw new Error(`${flag} requires at least one value`); + } + values.push(eqValue); + i++; + continue; + } + rest.push(arg); + i++; + } + + return { values, rest }; +} + function envTrim(...keys: string[]): string | undefined { for (const key of keys) { const value = process.env[key]?.trim(); @@ -95,10 +136,68 @@ function defaultAspectsFromEnv(): AspectId[] { return parseAspectList(raw); } +function parseOnOff(value: string, flag: string): boolean { + const normalized = value.trim().toLowerCase(); + if (normalized === "on") { + return true; + } + if (normalized === "off") { + return false; + } + throw new Error(`${flag} must be on or off`); +} + +function parseSecretOptions(argv: string[]): { secret: SecretScanOptions; rest: string[] } { + const defaults = defaultSecretScanOptions(); + const secretRules = collectFlagValues(argv, "--secret-rules"); + const defaultRules = readFlagValue(secretRules.rest, "--secret-default-rules"); + const defaultRulesVersion = readFlagValue(defaultRules.rest, "--secret-default-rules-version"); + const updateUrl = readFlagValue(defaultRulesVersion.rest, "--secret-rules-update-url"); + const cacheTtl = readFlagValue(updateUrl.rest, "--secret-rules-cache-ttl"); + const entropyThreshold = readFlagValue(cacheTtl.rest, "--secret-entropy-threshold"); + const minLength = readFlagValue(entropyThreshold.rest, "--secret-min-length"); + const minConfidence = readFlagValue(minLength.rest, "--secret-min-confidence"); + + const secret: SecretScanOptions = { + ...defaults, + rulePaths: secretRules.values.length > 0 ? secretRules.values : defaults.rulePaths, + defaultRules: + defaultRules.value === null ? defaults.defaultRules : parseOnOff(defaultRules.value, "--secret-default-rules"), + defaultRulesVersion: + defaultRulesVersion.value === null ? defaults.defaultRulesVersion : defaultRulesVersion.value, + rulesUpdateUrl: updateUrl.value === null ? defaults.rulesUpdateUrl : updateUrl.value, + rulesRefresh: minConfidence.rest.includes("--secret-rules-refresh") || defaults.rulesRefresh, + rulesCacheTtlMs: + cacheTtl.value === null ? defaults.rulesCacheTtlMs : parseDurationMs(cacheTtl.value, defaults.rulesCacheTtlMs), + entropyThreshold: + entropyThreshold.value === null + ? defaults.entropyThreshold + : parsePositiveNumber( + entropyThreshold.value, + defaults.entropyThreshold, + "Secret entropy threshold" + ), + minLength: + minLength.value === null + ? defaults.minLength + : parsePositiveNumber(minLength.value, defaults.minLength, "Secret minimum length"), + minConfidence: + minConfidence.value === null + ? defaults.minConfidence + : parseConfidenceLevel(minConfidence.value, defaults.minConfidence) + }; + + return { + secret, + rest: minConfidence.rest.filter((arg) => arg !== "--secret-rules-refresh") + }; +} + export function parseScanArgv(argv: string[]): ParseScanResult { const { paths, rest: afterPaths } = collectPaths(argv); + const { secret, rest: afterSecret } = parseSecretOptions(afterPaths); - const onlyParsed = readFlagValue(afterPaths, "--only"); + const onlyParsed = readFlagValue(afterSecret, "--only"); const skipParsed = readFlagValue(onlyParsed.rest, "--skip"); if (skipParsed.rest.some((a) => a === "--help" || a === "-h")) { @@ -129,7 +228,8 @@ export function parseScanArgv(argv: string[]): ParseScanResult { staged: skipParsed.rest.includes("--staged"), paths, only, - skip + skip, + secret }; } @@ -155,11 +255,20 @@ export function printScanHelp(): void { Run local secure-coding guardrails on changed or explicit paths. Options: - --staged Use staged git files instead of unstaged changes - --paths Scan explicit paths (default: git-changed files) - --only Run only listed aspects (comma-separated; default: code) - --skip Skip aspects (applied after --only) - -h, --help Show this help + --staged Use staged git files instead of unstaged changes + --paths Scan explicit paths (default: git-changed files) + --only Run only listed aspects (comma-separated; default: code) + --skip Skip aspects (applied after --only) + --secret-rules Load Semgrep-style secret rules from YAML files or directories + --secret-default-rules Enable bundled secret rules (default: on) + --secret-default-rules-version Select bundled secret rules version + --secret-rules-update-url Download remote secret rule bundle + --secret-rules-refresh Force remote secret rule refresh + --secret-rules-cache-ttl Remote rule cache TTL (for example 24h) + --secret-entropy-threshold Entropy threshold for generic secret detection + --secret-min-length Minimum candidate length for entropy checks + --secret-min-confidence Filter secret findings below low|medium|high confidence + -h, --help Show this help Git-based scans skip: ${formatGitScanIgnoredPrefixes()} (explicit --paths still scans those files) @@ -168,12 +277,21 @@ Aspects (default: code): code Local secure-coding rules on changed source files Environment: - CODEFENCE_ASPECTS Default aspect list (comma-separated; DSEC_ASPECTS accepted) - CODEFENCE_ONLY Same as --only (DSEC_ONLY accepted) - CODEFENCE_SKIP Same as --skip (DSEC_SKIP accepted) + CODEFENCE_ASPECTS Default aspect list (comma-separated; DSEC_ASPECTS accepted) + CODEFENCE_ONLY Same as --only (DSEC_ONLY accepted) + CODEFENCE_SKIP Same as --skip (DSEC_SKIP accepted) + CODEFENCE_SECRET_RULES Default Semgrep-style secret rule paths + CODEFENCE_SECRET_DEFAULT_RULES Same as --secret-default-rules + CODEFENCE_SECRET_DEFAULT_RULES_VERSION Same as --secret-default-rules-version + CODEFENCE_SECRET_RULES_UPDATE_URL Same as --secret-rules-update-url + CODEFENCE_SECRET_RULES_CACHE_TTL Same as --secret-rules-cache-ttl + CODEFENCE_SECRET_ENTROPY_THRESHOLD Same as --secret-entropy-threshold + CODEFENCE_SECRET_MIN_LENGTH Same as --secret-min-length + CODEFENCE_SECRET_MIN_CONFIDENCE Same as --secret-min-confidence Examples: ${cliInvocation("scan", "--staged")} - ${cliInvocation("scan", "--paths src/app.ts")} + ${cliInvocation("scan", "--paths src/app.ts --secret-rules .codefence/rules")} + ${cliInvocation("scan", "--paths src config --secret-entropy-threshold 4.2 --secret-min-confidence medium")} `); } diff --git a/src/scan/runner.ts b/src/scan/runner.ts index c34e1c4..01931eb 100644 --- a/src/scan/runner.ts +++ b/src/scan/runner.ts @@ -20,11 +20,12 @@ export function buildScanContext(options: ScanOptions): ScanContext { cwd, files, staged: options.staged, - explicitPaths: options.paths.length > 0 + explicitPaths: options.paths.length > 0, + options }; } -export function runScan(options: ScanOptions): number { +export async function runScan(options: ScanOptions): Promise { const aspects = resolveAspects(options); if (aspects.length === 0) { @@ -42,7 +43,7 @@ export function runScan(options: ScanOptions): number { for (const aspectId of aspects) { const aspect = ASPECT_REGISTRY[aspectId]; console.log(`\n--- ${aspect.label} (${aspect.id}) ---`); - const outcome = aspect.run(context); + const outcome = await aspect.run(context); outcomes.push(outcome); const statusLabel = outcome.status.toUpperCase(); diff --git a/src/scan/secret/builtinRules.ts b/src/scan/secret/builtinRules.ts new file mode 100644 index 0000000..43cf0ab --- /dev/null +++ b/src/scan/secret/builtinRules.ts @@ -0,0 +1,104 @@ +import { BUILTIN_SECRET_RULES_VERSION, SecretRule } from "./types"; + +export const builtinSecretRules: SecretRule[] = [ + { + id: "secret-github-token", + description: "Detect GitHub personal access tokens", + message: "Potential GitHub token detected", + severity: "high", + confidence: "high", + remediation: "Remove the token, rotate it, and load credentials from environment or secret storage.", + patterns: [{ type: "regex", value: "\\bgh[pousr]_[A-Za-z0-9]{36,255}\\b" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-gitlab-token", + description: "Detect GitLab tokens", + message: "Potential GitLab token detected", + severity: "high", + confidence: "high", + remediation: "Remove the token, rotate it, and move it to a managed secret store.", + patterns: [{ type: "regex", value: "\\bglpat-[A-Za-z0-9_-]{20,255}\\b" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-stripe-key", + description: "Detect Stripe API keys", + message: "Potential Stripe API key detected", + severity: "high", + confidence: "high", + remediation: "Replace embedded Stripe keys with environment-based configuration and rotate exposed keys.", + patterns: [{ type: "regex", value: "\\bsk_(?:live|test)_[A-Za-z0-9]{16,}\\b" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-bearer-token", + description: "Detect bearer tokens", + message: "Potential bearer token detected", + severity: "high", + confidence: "medium", + remediation: "Avoid embedding bearer tokens in source files; inject them from runtime configuration.", + patterns: [{ type: "regex", value: "\\bBearer\\s+[A-Za-z0-9._\\-+/=]{16,}\\b" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-private-key", + description: "Detect PEM private key material", + message: "Potential private key material detected", + severity: "high", + confidence: "high", + remediation: "Remove private keys from source control immediately and rotate any exposed key material.", + patterns: [{ type: "regex", value: "-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-password-assignment", + description: "Detect password-like assignments", + message: "Potential hardcoded password detected", + severity: "high", + confidence: "medium", + remediation: "Do not commit passwords; use environment variables or a secret manager instead.", + patterns: [ + { + type: "regex", + value: + "(?:password|passwd|pwd)\\s*[:=]\\s*[\"'][^\"'\\n]{8,}[\"']" + } + ], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "secret-uri-credentials", + description: "Detect credentials embedded in URIs", + message: "Potential credentials embedded in URI detected", + severity: "high", + confidence: "high", + remediation: "Move credentials out of URIs and into environment or dedicated secret configuration.", + patterns: [{ type: "regex", value: "\\b[a-z][a-z0-9+.-]*://[^\\s:@/]+:[^\\s:@/]+@" }], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + }, + { + id: "no-hardcoded-secret", + description: "Detect generic token-style assignments", + message: "Potential hardcoded secret detected", + severity: "high", + confidence: "medium", + remediation: "Replace embedded credentials with runtime-configured secrets.", + patterns: [ + { + type: "regex", + value: + "(?:api[_-]?key|secret|token|access[_-]?token|client[_-]?secret)\\s*[:=]\\s*[\"'][A-Za-z0-9_\\-+/=]{12,}[\"']" + } + ], + source: "builtin", + sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` + } +]; diff --git a/src/scan/secret/cache.ts b/src/scan/secret/cache.ts new file mode 100644 index 0000000..57cc148 --- /dev/null +++ b/src/scan/secret/cache.ts @@ -0,0 +1,64 @@ +import crypto from "node:crypto"; +import fs from "node:fs"; +import path from "node:path"; +import { cacheDir, ensureDir } from "../../hooks/paths"; + +interface CachedSecretRules { + version: 1; + url: string; + fetchedAt: string; + ttlMs: number; + sha256: string; + body: string; +} + +function hashContent(content: string): string { + return crypto.createHash("sha256").update(content).digest("hex"); +} + +function cacheFilePath(workspace: string, url: string): string { + const key = crypto.createHash("sha256").update(url).digest("hex"); + return path.join(cacheDir(workspace), "secret-rules", `${key}.json`); +} + +export function readCachedSecretRules(workspace: string, url: string): CachedSecretRules | null { + const filePath = cacheFilePath(workspace, url); + if (!fs.existsSync(filePath)) { + return null; + } + + try { + const entry = JSON.parse(fs.readFileSync(filePath, "utf8")) as CachedSecretRules; + if (entry.version !== 1 || entry.url !== url || hashContent(entry.body) !== entry.sha256) { + return null; + } + return entry; + } catch { + return null; + } +} + +export function isSecretRulesCacheFresh(entry: CachedSecretRules, now = Date.now()): boolean { + return new Date(entry.fetchedAt).getTime() + entry.ttlMs > now; +} + +export function writeCachedSecretRules( + workspace: string, + url: string, + body: string, + ttlMs: number +): CachedSecretRules { + const entry: CachedSecretRules = { + version: 1, + url, + fetchedAt: new Date().toISOString(), + ttlMs, + sha256: hashContent(body), + body + }; + + const filePath = cacheFilePath(workspace, url); + ensureDir(path.dirname(filePath)); + fs.writeFileSync(filePath, JSON.stringify(entry, null, 2), "utf8"); + return entry; +} diff --git a/src/scan/secret/config.ts b/src/scan/secret/config.ts new file mode 100644 index 0000000..86fb412 --- /dev/null +++ b/src/scan/secret/config.ts @@ -0,0 +1,125 @@ +import path from "node:path"; +import { ConfidenceLevel } from "../../types"; +import { + DEFAULT_SECRET_ENTROPY_THRESHOLD, + DEFAULT_SECRET_MIN_LENGTH, + DEFAULT_SECRET_RULES_CACHE_TTL_MS, + SecretScanOptions +} from "./types"; + +function envTrim(key: string): string | undefined { + const value = process.env[key]?.trim(); + return value ? value : undefined; +} + +function parseBooleanSetting(value: string | undefined, defaultValue: boolean): boolean { + if (!value) { + return defaultValue; + } + + const normalized = value.trim().toLowerCase(); + if (["1", "true", "on", "yes"].includes(normalized)) { + return true; + } + if (["0", "false", "off", "no"].includes(normalized)) { + return false; + } + throw new Error(`Invalid boolean setting: ${value}`); +} + +export function parseConfidenceLevel(value: string | undefined, defaultValue: ConfidenceLevel): ConfidenceLevel { + if (!value) { + return defaultValue; + } + + const normalized = value.trim().toLowerCase(); + if (normalized === "low" || normalized === "medium" || normalized === "high") { + return normalized; + } + throw new Error(`Invalid confidence level: ${value}`); +} + +export function confidenceWeight(value: ConfidenceLevel): number { + if (value === "high") { + return 3; + } + if (value === "medium") { + return 2; + } + return 1; +} + +export function parsePositiveNumber( + value: string | undefined, + defaultValue: number, + label: string +): number { + if (!value) { + return defaultValue; + } + + const parsed = Number.parseFloat(value); + if (!Number.isFinite(parsed) || parsed <= 0) { + throw new Error(`${label} must be a positive number`); + } + return parsed; +} + +export function parseDurationMs(value: string | undefined, defaultValue: number): number { + if (!value) { + return defaultValue; + } + + const normalized = value.trim().toLowerCase(); + const match = normalized.match(/^(\d+(?:\.\d+)?)(ms|s|m|h|d)?$/); + if (!match) { + throw new Error(`Invalid duration: ${value}`); + } + + const amount = Number.parseFloat(match[1]); + const unit = match[2] ?? "ms"; + const multipliers: Record = { + ms: 1, + s: 1000, + m: 60_000, + h: 3_600_000, + d: 86_400_000 + }; + return amount * multipliers[unit]; +} + +export function parseSecretRulePaths(value: string | undefined): string[] { + if (!value) { + return []; + } + + return value + .split(new RegExp(`[${path.delimiter === ";" ? ";," : ",:"}]`)) + .map((part) => part.trim()) + .filter(Boolean); +} + +export function defaultSecretScanOptions(): SecretScanOptions { + return { + rulePaths: parseSecretRulePaths(envTrim("CODEFENCE_SECRET_RULES")), + defaultRules: parseBooleanSetting(envTrim("CODEFENCE_SECRET_DEFAULT_RULES"), true), + defaultRulesVersion: envTrim("CODEFENCE_SECRET_DEFAULT_RULES_VERSION") ?? null, + rulesUpdateUrl: envTrim("CODEFENCE_SECRET_RULES_UPDATE_URL") ?? null, + rulesRefresh: parseBooleanSetting(envTrim("CODEFENCE_SECRET_RULES_REFRESH"), false), + rulesCacheTtlMs: parseDurationMs( + envTrim("CODEFENCE_SECRET_RULES_CACHE_TTL"), + DEFAULT_SECRET_RULES_CACHE_TTL_MS + ), + entropyThreshold: parsePositiveNumber( + envTrim("CODEFENCE_SECRET_ENTROPY_THRESHOLD"), + DEFAULT_SECRET_ENTROPY_THRESHOLD, + "Secret entropy threshold" + ), + minLength: parsePositiveNumber( + envTrim("CODEFENCE_SECRET_MIN_LENGTH"), + DEFAULT_SECRET_MIN_LENGTH, + "Secret minimum length" + ), + minConfidence: parseConfidenceLevel(envTrim("CODEFENCE_SECRET_MIN_CONFIDENCE"), "low") + }; +} diff --git a/src/scan/secret/engine.ts b/src/scan/secret/engine.ts new file mode 100644 index 0000000..46bca5f --- /dev/null +++ b/src/scan/secret/engine.ts @@ -0,0 +1,136 @@ +import { ConfidenceLevel, Finding } from "../../types"; +import { confidenceWeight } from "./config"; +import { findEntropySecrets } from "./entropy"; +import { loadSecretRules } from "./ruleLoader"; +import { SecretEngineInput, SecretRule } from "./types"; + +function ruleRegex(pattern: string): RegExp { + return new RegExp(pattern, "g"); +} + +function summarizeMatch(match: string): string { + return `matched secret pattern (length=${match.length})`; +} + +function buildRuleFindings(filePath: string, lines: string[], rules: SecretRule[]): Finding[] { + const findings: Finding[] = []; + + lines.forEach((line, index) => { + for (const rule of rules) { + for (const pattern of rule.patterns) { + if (pattern.type === "literal") { + if (!line.includes(pattern.value)) { + continue; + } + findings.push({ + ruleId: rule.id, + message: rule.message, + filePath, + line: index + 1, + severity: rule.severity, + confidence: rule.confidence, + evidence: `matched literal secret rule from ${rule.sourceName}`, + remediation: rule.remediation, + kind: "secret", + detectionMethod: "rule" + }); + continue; + } + + const regex = ruleRegex(pattern.value); + const match = regex.exec(line); + if (!match) { + continue; + } + + findings.push({ + ruleId: rule.id, + message: rule.message, + filePath, + line: index + 1, + severity: rule.severity, + confidence: rule.confidence, + evidence: `${summarizeMatch(match[0])} via ${rule.sourceName}`, + remediation: rule.remediation, + kind: "secret", + detectionMethod: "rule" + }); + } + } + }); + + return findings; +} + +function findingKey(finding: Finding): string { + return `${finding.filePath}:${finding.line}:${finding.ruleId}:${finding.message}`; +} + +function strongerConfidence(a: ConfidenceLevel | undefined, b: ConfidenceLevel | undefined): ConfidenceLevel { + const left = a ?? "low"; + const right = b ?? "low"; + return confidenceWeight(left) >= confidenceWeight(right) ? left : right; +} + +function strongerSeverity( + left: Finding["severity"], + right: Finding["severity"] +): Finding["severity"] { + const weights: Record = { low: 1, medium: 2, high: 3 }; + return weights[left] >= weights[right] ? left : right; +} + +function mergeFindings(findings: Finding[]): Finding[] { + const merged = new Map(); + + for (const finding of findings) { + if ( + finding.ruleId === "secret-high-entropy" && + findings.some( + (other) => + other !== finding && + other.kind === "secret" && + other.ruleId !== "secret-high-entropy" && + other.filePath === finding.filePath && + other.line === finding.line + ) + ) { + continue; + } + + const key = findingKey(finding); + const existing = merged.get(key); + if (!existing) { + merged.set(key, finding); + continue; + } + + merged.set(key, { + ...existing, + severity: strongerSeverity(existing.severity, finding.severity), + confidence: strongerConfidence(existing.confidence, finding.confidence), + evidence: existing.evidence ?? finding.evidence, + remediation: existing.remediation ?? finding.remediation, + detectionMethod: + existing.detectionMethod === finding.detectionMethod + ? existing.detectionMethod + : "rule+entropy" + }); + } + + return [...merged.values()]; +} + +export async function scanSecretFindings(input: SecretEngineInput): Promise { + const rules = await loadSecretRules(input.workspace, input.options); + const lines = input.content.split(/\r?\n/); + const ruleFindings = buildRuleFindings(input.filePath, lines, rules); + const entropyFindings = findEntropySecrets(input.filePath, lines, input.options); + const merged = mergeFindings([...ruleFindings, ...entropyFindings]); + + return merged.filter( + (finding) => + finding.kind !== "secret" || + confidenceWeight(finding.confidence ?? "low") >= confidenceWeight(input.options.minConfidence) + ); +} diff --git a/src/scan/secret/entropy.ts b/src/scan/secret/entropy.ts new file mode 100644 index 0000000..bae8550 --- /dev/null +++ b/src/scan/secret/entropy.ts @@ -0,0 +1,66 @@ +import { Finding } from "../../types"; +import { SecretScanOptions } from "./types"; + +const GENERIC_ASSIGNMENT_REGEX = + /\b(?:api[_-]?key|secret|token|access[_-]?token|client[_-]?secret|password)\b\s*[:=]\s*["']([^"'\\\n]{1,})["']/gi; + +function shannonEntropy(input: string): number { + const counts = new Map(); + for (const char of input) { + counts.set(char, (counts.get(char) ?? 0) + 1); + } + + let entropy = 0; + for (const count of counts.values()) { + const p = count / input.length; + entropy -= p * Math.log2(p); + } + return entropy; +} + +function inferEntropyConfidence(entropy: number, threshold: number): "low" | "medium" | "high" { + if (entropy >= threshold + 0.8) { + return "high"; + } + if (entropy >= threshold + 0.3) { + return "medium"; + } + return "low"; +} + +export function findEntropySecrets( + filePath: string, + lines: string[], + options: SecretScanOptions +): Finding[] { + const findings: Finding[] = []; + + lines.forEach((line, index) => { + for (const match of line.matchAll(GENERIC_ASSIGNMENT_REGEX)) { + const value = match[1]?.trim(); + if (!value || value.length < options.minLength) { + continue; + } + + const entropy = shannonEntropy(value); + if (entropy < options.entropyThreshold) { + continue; + } + + findings.push({ + ruleId: "secret-high-entropy", + message: "Potential hardcoded secret detected via entropy heuristic", + filePath, + line: index + 1, + severity: entropy >= options.entropyThreshold + 0.6 ? "high" : "medium", + confidence: inferEntropyConfidence(entropy, options.entropyThreshold), + evidence: `token-like string length=${value.length} entropy=${entropy.toFixed(2)}`, + remediation: "Move secret-like values into environment variables or a secret manager.", + kind: "secret", + detectionMethod: "entropy" + }); + } + }); + + return findings; +} diff --git a/src/scan/secret/remoteRules.ts b/src/scan/secret/remoteRules.ts new file mode 100644 index 0000000..093c3c2 --- /dev/null +++ b/src/scan/secret/remoteRules.ts @@ -0,0 +1,43 @@ +import { isSecretRulesCacheFresh, readCachedSecretRules, writeCachedSecretRules } from "./cache"; + +function validateRulesUrl(url: string): void { + const parsed = new URL(url); + if (parsed.protocol === "https:") { + return; + } + + if ( + parsed.protocol === "http:" && + (parsed.hostname === "127.0.0.1" || parsed.hostname === "localhost") + ) { + return; + } + + throw new Error("Remote secret rules must use https (http is allowed only for localhost)"); +} + +export async function loadRemoteRuleBundle( + workspace: string, + url: string, + ttlMs: number, + refresh: boolean +): Promise { + validateRulesUrl(url); + + const cached = readCachedSecretRules(workspace, url); + if (!refresh && cached && isSecretRulesCacheFresh(cached)) { + return cached.body; + } + + const response = await fetch(url); + if (!response.ok) { + if (cached) { + return cached.body; + } + throw new Error(`Failed to download remote secret rules: ${response.status} ${response.statusText}`); + } + + const body = await response.text(); + writeCachedSecretRules(workspace, url, body, ttlMs); + return body; +} diff --git a/src/scan/secret/ruleLoader.ts b/src/scan/secret/ruleLoader.ts new file mode 100644 index 0000000..284dbf2 --- /dev/null +++ b/src/scan/secret/ruleLoader.ts @@ -0,0 +1,218 @@ +import fs from "node:fs"; +import path from "node:path"; +import { parseAllDocuments } from "yaml"; +import { ConfidenceLevel } from "../../types"; +import { builtinSecretRules } from "./builtinRules"; +import { loadRemoteRuleBundle } from "./remoteRules"; +import { + BUILTIN_SECRET_RULES_VERSION, + SecretRule, + SecretRulePattern, + SecretScanOptions +} from "./types"; + +function normalizeSeverity(value: unknown): "low" | "medium" | "high" { + const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; + if (normalized === "low" || normalized === "medium" || normalized === "high") { + return normalized; + } + if (normalized === "warning") { + return "medium"; + } + if (normalized === "error") { + return "high"; + } + return "medium"; +} + +function normalizeConfidence(value: unknown): ConfidenceLevel { + const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; + if (normalized === "low" || normalized === "medium" || normalized === "high") { + return normalized; + } + return "medium"; +} + +function collectPatterns(node: unknown): SecretRulePattern[] { + if (!node || typeof node !== "object") { + return []; + } + + const entry = node as Record; + const patterns: SecretRulePattern[] = []; + + if (typeof entry["pattern-regex"] === "string") { + patterns.push({ type: "regex", value: entry["pattern-regex"] }); + } + + if (typeof entry.pattern === "string") { + patterns.push({ type: "literal", value: entry.pattern }); + } + + if (Array.isArray(entry.patterns)) { + for (const child of entry.patterns) { + patterns.push(...collectPatterns(child)); + } + } + + if (Array.isArray(entry["pattern-either"])) { + for (const child of entry["pattern-either"]) { + patterns.push(...collectPatterns(child)); + } + } + + return patterns; +} + +function parseRuleObject( + rawRule: unknown, + sourceName: string, + source: SecretRule["source"] +): SecretRule | null { + if (!rawRule || typeof rawRule !== "object") { + return null; + } + + const rule = rawRule as Record; + const id = typeof rule.id === "string" ? rule.id.trim() : ""; + const message = typeof rule.message === "string" ? rule.message.trim() : ""; + if (!id || !message) { + return null; + } + + const metadata = (rule.metadata ?? {}) as Record; + const patterns = collectPatterns(rule); + if (patterns.length === 0) { + return null; + } + + for (const pattern of patterns) { + if (pattern.type === "regex") { + try { + new RegExp(pattern.value); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid regex in secret rule ${id} from ${sourceName}: ${message}`); + } + } + } + + return { + id, + description: typeof rule.description === "string" ? rule.description : id, + message, + severity: normalizeSeverity(rule.severity), + confidence: normalizeConfidence(metadata.confidence), + remediation: + typeof metadata.remediation === "string" + ? metadata.remediation + : typeof metadata["remediation-guidance"] === "string" + ? metadata["remediation-guidance"] + : undefined, + patterns, + source, + sourceName + }; +} + +function parseRuleBundle( + yamlContent: string, + sourceName: string, + source: SecretRule["source"] +): SecretRule[] { + const rules: SecretRule[] = []; + + for (const doc of parseAllDocuments(yamlContent)) { + const value = doc.toJSON() as Record | null; + if (!value || !Array.isArray(value.rules)) { + continue; + } + + for (const rawRule of value.rules) { + const parsed = parseRuleObject(rawRule, sourceName, source); + if (parsed) { + rules.push(parsed); + } + } + } + + return rules; +} + +function collectYamlFiles(entryPath: string, out: string[]): void { + const stat = fs.statSync(entryPath); + if (stat.isFile()) { + if (/\.(?:ya?ml)$/i.test(entryPath)) { + out.push(entryPath); + } + return; + } + + for (const entry of fs.readdirSync(entryPath, { withFileTypes: true })) { + collectYamlFiles(path.join(entryPath, entry.name), out); + } +} + +function loadRulesFromPaths(rulePaths: string[], workspace: string): SecretRule[] { + const yamlFiles: string[] = []; + for (const raw of rulePaths) { + const absolute = path.isAbsolute(raw) ? raw : path.resolve(workspace, raw); + if (!fs.existsSync(absolute)) { + throw new Error(`Secret rule path not found: ${raw}`); + } + collectYamlFiles(absolute, yamlFiles); + } + + const loaded: SecretRule[] = []; + for (const filePath of yamlFiles.sort()) { + loaded.push(...parseRuleBundle(fs.readFileSync(filePath, "utf8"), filePath, "custom")); + } + return loaded; +} + +function dedupeRules(rules: SecretRule[]): SecretRule[] { + const seen = new Set(); + return rules.filter((rule) => { + const key = `${rule.id}:${rule.sourceName}`; + if (seen.has(key)) { + return false; + } + seen.add(key); + return true; + }); +} + +export async function loadSecretRules( + workspace: string, + options: SecretScanOptions +): Promise { + const loaded: SecretRule[] = []; + + if (options.defaultRules) { + if ( + options.defaultRulesVersion && + options.defaultRulesVersion !== BUILTIN_SECRET_RULES_VERSION + ) { + throw new Error( + `Unknown built-in secret rule version: ${options.defaultRulesVersion} (available: ${BUILTIN_SECRET_RULES_VERSION})` + ); + } + loaded.push(...builtinSecretRules); + } + + if (options.rulePaths.length > 0) { + loaded.push(...loadRulesFromPaths(options.rulePaths, workspace)); + } + + if (options.rulesUpdateUrl) { + const bundle = await loadRemoteRuleBundle( + workspace, + options.rulesUpdateUrl, + options.rulesCacheTtlMs, + options.rulesRefresh + ); + loaded.push(...parseRuleBundle(bundle, options.rulesUpdateUrl, "remote")); + } + + return dedupeRules(loaded); +} diff --git a/src/scan/secret/types.ts b/src/scan/secret/types.ts new file mode 100644 index 0000000..078c8d3 --- /dev/null +++ b/src/scan/secret/types.ts @@ -0,0 +1,42 @@ +import { ConfidenceLevel, Finding } from "../../types"; + +export const BUILTIN_SECRET_RULES_VERSION = "2026-05-25"; +export const DEFAULT_SECRET_ENTROPY_THRESHOLD = 4.2; +export const DEFAULT_SECRET_MIN_LENGTH = 12; +export const DEFAULT_SECRET_RULES_CACHE_TTL_MS = 24 * 60 * 60 * 1000; + +export interface SecretScanOptions { + rulePaths: string[]; + defaultRules: boolean; + defaultRulesVersion: string | null; + rulesUpdateUrl: string | null; + rulesRefresh: boolean; + rulesCacheTtlMs: number; + entropyThreshold: number; + minLength: number; + minConfidence: ConfidenceLevel; +} + +export interface SecretRulePattern { + type: "regex" | "literal"; + value: string; +} + +export interface SecretRule { + id: string; + description: string; + message: string; + severity: Finding["severity"]; + confidence: ConfidenceLevel; + remediation?: string; + patterns: SecretRulePattern[]; + source: "builtin" | "custom" | "remote"; + sourceName: string; +} + +export interface SecretEngineInput { + filePath: string; + content: string; + workspace: string; + options: SecretScanOptions; +} diff --git a/src/scan/types.ts b/src/scan/types.ts index 8398eae..a83b828 100644 --- a/src/scan/types.ts +++ b/src/scan/types.ts @@ -1,3 +1,5 @@ +import { SecretScanOptions } from "./secret/types"; + export const ASPECT_IDS = ["code"] as const; export type AspectId = (typeof ASPECT_IDS)[number]; @@ -12,6 +14,7 @@ export interface ScanContext { staged: boolean; /** True when the user passed explicit --paths (demo/test ignore lists are bypassed). */ explicitPaths: boolean; + options: ScanOptions; } export interface ScanOptions { @@ -19,6 +22,7 @@ export interface ScanOptions { paths: string[]; only: AspectId[] | null; skip: AspectId[]; + secret: SecretScanOptions; } export interface AspectOutcome { @@ -31,5 +35,5 @@ export interface AspectOutcome { export interface ScanAspect { id: AspectId; label: string; - run(context: ScanContext): AspectOutcome; + run(context: ScanContext): Promise | AspectOutcome; } diff --git a/src/scanner.ts b/src/scanner.ts index 198a623..8c8173a 100644 --- a/src/scanner.ts +++ b/src/scanner.ts @@ -2,6 +2,9 @@ import fs from "node:fs"; import path from "node:path"; import { rules } from "./rules"; import { GIT_SCAN_IGNORED_PREFIXES } from "./scan/ignorePaths"; +import { defaultSecretScanOptions } from "./scan/secret/config"; +import { scanSecretFindings } from "./scan/secret/engine"; +import { SecretScanOptions } from "./scan/secret/types"; import { Finding, LineScanContext, Rule } from "./types"; const DEFAULT_PRIOR_WINDOW = 15; @@ -84,7 +87,13 @@ const supportedExtensions = new Set([ ".java", ".cs", ".go", - ".rb" + ".rb", + ".json", + ".yaml", + ".yml", + ".env", + ".ini", + ".conf" ]); function normalizeRelativePath(filePath: string, cwd: string): string { @@ -98,7 +107,10 @@ export function isIgnoredScanPath(filePath: string, cwd: string): boolean { } export function shouldScanFile(filePath: string, options?: { cwd?: string; allowIgnored?: boolean }): boolean { - if (!supportedExtensions.has(path.extname(filePath).toLowerCase())) { + const ext = path.extname(filePath).toLowerCase(); + const baseName = path.basename(filePath).toLowerCase(); + const hasSupportedType = supportedExtensions.has(ext) || baseName === ".env"; + if (!hasSupportedType) { return false; } @@ -146,14 +158,8 @@ export function expandScanPaths(paths: string[], cwd: string): string[] { return [...resolved].sort(); } -export function scanFile(filePath: string): Finding[] { - if (!shouldScanFile(filePath) || !fs.existsSync(filePath)) { - return []; - } - +function scanLegacyRules(filePath: string, lines: string[]): Finding[] { const findings: Finding[] = []; - const lines = fs.readFileSync(filePath, "utf8").split(/\r?\n/); - const windowedRules: Rule[] = []; const plainRules: Rule[] = []; @@ -178,7 +184,8 @@ export function scanFile(filePath: string): Finding[] { message: rule.message, filePath, line: index + 1, - severity: rule.severity + severity: rule.severity, + kind: "code" }); } } @@ -194,7 +201,8 @@ export function scanFile(filePath: string): Finding[] { message: rule.message, filePath, line: index + 1, - severity: rule.severity + severity: rule.severity, + kind: "code" }); } } @@ -203,6 +211,31 @@ export function scanFile(filePath: string): Finding[] { return findings; } -export function scanFiles(filePaths: string[]): Finding[] { - return filePaths.flatMap((filePath) => scanFile(filePath)); +export interface ScanFileOptions { + workspace?: string; + secret?: SecretScanOptions; +} + +export async function scanFile(filePath: string, options: ScanFileOptions = {}): Promise { + if (!shouldScanFile(filePath) || !fs.existsSync(filePath)) { + return []; + } + + const content = fs.readFileSync(filePath, "utf8"); + const lines = content.split(/\r?\n/); + const workspace = path.resolve(options.workspace ?? process.cwd()); + const secretOptions = options.secret ?? defaultSecretScanOptions(); + + const secretFindings = await scanSecretFindings({ + filePath, + content, + workspace, + options: secretOptions + }); + return [...scanLegacyRules(filePath, lines), ...secretFindings]; +} + +export async function scanFiles(filePaths: string[], options: ScanFileOptions = {}): Promise { + const findings = await Promise.all(filePaths.map((filePath) => scanFile(filePath, options))); + return findings.flat(); } diff --git a/src/types.ts b/src/types.ts index e0f6577..101ecec 100644 --- a/src/types.ts +++ b/src/types.ts @@ -4,8 +4,15 @@ export interface Finding { filePath: string; line: number; severity: "low" | "medium" | "high"; + confidence?: ConfidenceLevel; + evidence?: string; + remediation?: string; + kind?: "code" | "secret"; + detectionMethod?: "rule" | "entropy" | "rule+entropy"; } +export type ConfidenceLevel = "low" | "medium" | "high"; + export interface LineScanContext { priorLines: string[]; followingLines: string[]; diff --git a/tests/cliStrings.test.ts b/tests/cliStrings.test.ts index c70a0dd..00de959 100644 --- a/tests/cliStrings.test.ts +++ b/tests/cliStrings.test.ts @@ -23,4 +23,6 @@ test("compiled CLI help output describes local code scan only", () => { const output = `${result.stdout}\n${result.stderr}`; assert.match(output, /codefence scan/); assert.match(output, /Local secure-coding/); + assert.match(output, /--secret-rules/); + assert.match(output, /--secret-min-confidence/); }); diff --git a/tests/scanOptions.test.ts b/tests/scanOptions.test.ts index 3c30f7e..15f4857 100644 --- a/tests/scanOptions.test.ts +++ b/tests/scanOptions.test.ts @@ -1,6 +1,7 @@ import assert from "node:assert/strict"; import test from "node:test"; import { parseAspectList, parseScanArgv, resolveAspects } from "../src/scan/parseOptions"; +import { BUILTIN_SECRET_RULES_VERSION } from "../src/scan/secret/types"; test("parseScanArgv errors when value-taking flags are missing a value", () => { assert.throws(() => parseScanArgv(["--only"]), /--only requires a value/); @@ -8,6 +9,7 @@ test("parseScanArgv errors when value-taking flags are missing a value", () => { assert.throws(() => parseScanArgv(["--only="]), /--only requires a value/); assert.throws(() => parseScanArgv(["--skip"]), /--skip requires a value/); assert.throws(() => parseScanArgv(["--skip", "-h"]), /--skip requires a value/); + assert.throws(() => parseScanArgv(["--secret-rules"]), /--secret-rules requires at least one value/); }); test("parseScanArgv recognizes -h and --help", () => { @@ -25,7 +27,18 @@ test("resolveAspects defaults to code", () => { staged: false, paths: [], only: null, - skip: [] + skip: [], + secret: { + rulePaths: [], + defaultRules: true, + defaultRulesVersion: null, + rulesUpdateUrl: null, + rulesRefresh: false, + rulesCacheTtlMs: 86400000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" + } }); assert.deepEqual(aspects, ["code"]); }); @@ -35,7 +48,50 @@ test("resolveAspects honors --only and --skip", () => { staged: false, paths: [], only: ["code"], - skip: ["code"] + skip: ["code"], + secret: { + rulePaths: [], + defaultRules: true, + defaultRulesVersion: null, + rulesUpdateUrl: null, + rulesRefresh: false, + rulesCacheTtlMs: 86400000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" + } }); assert.deepEqual(aspects, []); }); + +test("parseScanArgv parses secret engine flags", () => { + const parsed = parseScanArgv([ + "--staged", + "--secret-rules", + "rules/a.yml", + "rules/b.yaml", + "--secret-default-rules-version", + BUILTIN_SECRET_RULES_VERSION, + "--secret-rules-update-url", + "https://example.com/rules.yml", + "--secret-rules-refresh", + "--secret-rules-cache-ttl", + "12h", + "--secret-entropy-threshold", + "4.5", + "--secret-min-length", + "18", + "--secret-min-confidence", + "medium" + ]); + + assert.ok(!("help" in parsed)); + assert.deepEqual(parsed.secret.rulePaths, ["rules/a.yml", "rules/b.yaml"]); + assert.equal(parsed.secret.defaultRulesVersion, BUILTIN_SECRET_RULES_VERSION); + assert.equal(parsed.secret.rulesUpdateUrl, "https://example.com/rules.yml"); + assert.equal(parsed.secret.rulesRefresh, true); + assert.equal(parsed.secret.rulesCacheTtlMs, 12 * 60 * 60 * 1000); + assert.equal(parsed.secret.entropyThreshold, 4.5); + assert.equal(parsed.secret.minLength, 18); + assert.equal(parsed.secret.minConfidence, "medium"); +}); diff --git a/tests/scanner.test.ts b/tests/scanner.test.ts index 62ba25d..97d58a8 100644 --- a/tests/scanner.test.ts +++ b/tests/scanner.test.ts @@ -11,7 +11,7 @@ test("isIgnoredScanPath skips examples fixture trees", () => { assert.equal(isIgnoredScanPath("src/app.ts", cwd), false); }); -test("scanFile finds hardcoded secret and eval", () => { +test("scanFile finds hardcoded secret and eval", async () => { const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-")); const file = path.join(tempDir, "bad.ts"); fs.writeFileSync( @@ -22,8 +22,10 @@ eval("console.log(1)"); "utf8" ); - const findings = scanFile(file); + const findings = await scanFile(file); assert.ok(findings.length >= 2); + assert.ok(findings.some((finding) => finding.ruleId === "no-hardcoded-secret")); + assert.ok(findings.some((finding) => finding.ruleId === "no-eval")); fs.rmSync(tempDir, { recursive: true, force: true }); }); diff --git a/tests/secretEngine.cache.test.ts b/tests/secretEngine.cache.test.ts new file mode 100644 index 0000000..998231d --- /dev/null +++ b/tests/secretEngine.cache.test.ts @@ -0,0 +1,57 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import http from "node:http"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { loadSecretRules } from "../src/scan/secret/ruleLoader"; + +test("loadSecretRules refreshes remote bundles and falls back to cache", async () => { + const workspace = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-remote-rules-")); + let requests = 0; + const yaml = `rules: + - id: remote-secret + message: Remote secret detected + severity: high + metadata: + confidence: medium + pattern-regex: "\\\\bremote_[A-Za-z0-9]{12}\\\\b" +`; + + const server = http.createServer((_, res) => { + requests++; + res.writeHead(200, { "content-type": "application/x-yaml" }); + res.end(yaml); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("failed to start test server"); + } + + const url = `http://127.0.0.1:${address.port}/rules.yml`; + const options = { + rulePaths: [], + defaultRules: false, + defaultRulesVersion: null, + rulesUpdateUrl: url, + rulesRefresh: false, + rulesCacheTtlMs: 60_000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" as const + }; + + const first = await loadSecretRules(workspace, options); + assert.equal(first[0]?.id, "remote-secret"); + assert.equal(requests, 1); + + await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve()))); + + const second = await loadSecretRules(workspace, options); + assert.equal(second[0]?.id, "remote-secret"); + assert.equal(requests, 1); + + fs.rmSync(workspace, { recursive: true, force: true }); +}); diff --git a/tests/secretEngine.rules.test.ts b/tests/secretEngine.rules.test.ts new file mode 100644 index 0000000..823d4a5 --- /dev/null +++ b/tests/secretEngine.rules.test.ts @@ -0,0 +1,47 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { loadSecretRules } from "../src/scan/secret/ruleLoader"; + +test("loadSecretRules parses Semgrep-style YAML rules", async () => { + const workspace = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-rules-")); + const rulesDir = path.join(workspace, "rules"); + fs.mkdirSync(rulesDir); + const ruleFile = path.join(rulesDir, "custom.yml"); + fs.writeFileSync( + ruleFile, + `rules: + - id: custom-secret + message: Custom secret detected + severity: high + metadata: + confidence: high + remediation: Rotate the credential + pattern-either: + - pattern-regex: "\\\\bzz_[A-Za-z0-9]{10}\\\\b" + - pattern: "BEGIN CUSTOM SECRET" +`, + "utf8" + ); + + const rules = await loadSecretRules(workspace, { + rulePaths: [rulesDir], + defaultRules: false, + defaultRulesVersion: null, + rulesUpdateUrl: null, + rulesRefresh: false, + rulesCacheTtlMs: 1000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" + }); + + assert.equal(rules.length, 1); + assert.equal(rules[0].id, "custom-secret"); + assert.equal(rules[0].confidence, "high"); + assert.equal(rules[0].patterns.length, 2); + + fs.rmSync(workspace, { recursive: true, force: true }); +}); diff --git a/tests/secretsExamples.test.ts b/tests/secretsExamples.test.ts index 8ad9a47..e736f1a 100644 --- a/tests/secretsExamples.test.ts +++ b/tests/secretsExamples.test.ts @@ -5,7 +5,7 @@ import path from "node:path"; import test from "node:test"; import { scanFiles } from "../src/scanner"; -test("secret scanning flags likely hardcoded secrets", () => { +test("secret scanning flags likely hardcoded secrets", async () => { const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-secrets-")); const file = path.join(tmpDir, "secrets-sample.ts"); fs.writeFileSync( @@ -14,19 +14,37 @@ test("secret scanning flags likely hardcoded secrets", () => { "utf8" ); - const findings = scanFiles([file]); + const findings = await scanFiles([file]); assert.ok(findings.some((f) => f.ruleId === "no-hardcoded-secret")); fs.rmSync(tmpDir, { recursive: true, force: true }); }); -test("scan output includes secrets-focused rule ids only", () => { +test("scan output includes secrets-focused rule ids only", async () => { const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-secrets-")); const file = path.join(tmpDir, "sample.ts"); fs.writeFileSync(file, "const token = \"abcdef1234567890\";\n", "utf8"); - const findings = scanFiles([file]); + const findings = await scanFiles([file]); assert.equal(findings.some((f) => f.ruleId === "no-hardcoded-secret"), true); fs.rmSync(tmpDir, { recursive: true, force: true }); }); + +test("entropy scanning reports confidence and evidence for unknown secret formats", async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-secrets-")); + const file = path.join(tmpDir, "entropy-sample.ts"); + fs.writeFileSync( + file, + `const client_secret = "Q4z8vB2nLp9sTw7xYk3mHc6rJd1f";\n`, + "utf8" + ); + + const findings = await scanFiles([file]); + const entropyFinding = findings.find((f) => f.ruleId === "secret-high-entropy"); + assert.ok(entropyFinding); + assert.equal(entropyFinding?.confidence, "medium"); + assert.match(entropyFinding?.evidence ?? "", /entropy=/); + + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); From 876df709b1d1608d4a7d62379fe7856acb4a4400 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 25 May 2026 17:58:31 +0000 Subject: [PATCH 2/4] feat: wire secret scan options --- README.md | 28 +++++++++++++++++++++++++--- docs/HOOKS.md | 3 +++ docs/README.md | 2 +- src/hooks/scanWorker.ts | 4 +++- src/scan/secret/engine.ts | 2 +- src/scan/secret/entropy.ts | 24 ++++++++++++++++++++---- tests/secretsExamples.test.ts | 4 ++-- 7 files changed, 55 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 0a4b979..2a287f0 100644 --- a/README.md +++ b/README.md @@ -118,12 +118,34 @@ codefence scan --help | ------ | ----------- | | `--staged` | Scan staged git files instead of unstaged changes | | `--paths ` | Scan explicit paths (bypasses git-changed discovery) | -| `--only secrets,deps` | Run only listed aspects (for example secrets, deps) | -| `--skip secrets` | Skip selected aspects | +| `--only code` | Run only listed aspects (currently `code`) | +| `--skip code` | Skip selected aspects | +| `--secret-rules ` | Load Semgrep-style YAML secret rules from files or directories | +| `--secret-default-rules ` | Enable or disable bundled secret rules | +| `--secret-rules-update-url ` | Download and cache a remote YAML rule bundle | +| `--secret-rules-refresh` | Force remote rule refresh before scanning | +| `--secret-entropy-threshold ` | Tune entropy-based secret detection sensitivity | +| `--secret-min-length ` | Ignore short candidates during entropy analysis | +| `--secret-min-confidence ` | Filter lower-confidence secret findings | Git-based scans skip fixture trees such as `examples/` (see `codefence scan --help`). Explicit `--paths` still scans those files. -**Environment:** `CODEFENCE_ASPECTS`, `CODEFENCE_ONLY`, `CODEFENCE_SKIP` (legacy `DSEC_*` names accepted). +Built-in secret scanning now combines: + +- bundled versioned rules for common tokens, private keys, password-like assignments, and URI credentials +- Semgrep-style YAML rule loading from local files or directories +- entropy-based detection for unknown secret formats +- deduplicated findings with confidence and evidence summaries + +```bash +codefence scan --staged --secret-rules .codefence/rules/secrets +codefence scan --paths src config --secret-entropy-threshold 4.2 --secret-min-confidence medium +codefence scan --staged --secret-rules-update-url https://example.com/codefence/secrets-rules.yml --secret-rules-refresh +``` + +Remote rule bundles are cached under `.codefence/cache/secret-rules/` for offline and low-latency scans. + +**Environment:** `CODEFENCE_ASPECTS`, `CODEFENCE_ONLY`, `CODEFENCE_SKIP`, `CODEFENCE_SECRET_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES_VERSION`, `CODEFENCE_SECRET_RULES_UPDATE_URL`, `CODEFENCE_SECRET_RULES_CACHE_TTL`, `CODEFENCE_SECRET_ENTROPY_THRESHOLD`, `CODEFENCE_SECRET_MIN_LENGTH`, `CODEFENCE_SECRET_MIN_CONFIDENCE` (legacy `DSEC_*` names still accepted for aspect selection). ## Git pre-commit and background scanning diff --git a/docs/HOOKS.md b/docs/HOOKS.md index 2673ddc..770216f 100644 --- a/docs/HOOKS.md +++ b/docs/HOOKS.md @@ -5,6 +5,8 @@ Codefence guardrails provides: 1. A **Git pre-commit hook** that runs `codefence scan --staged` and blocks the commit on failure. 2. A **TypeScript background scanner** that scans on save with debouncing and fills `.codefence/cache/` so pre-commit is faster. +Both flows use the same secret engine as `codefence scan`, including bundled rules, optional Semgrep-style YAML rules, and entropy-based secret detection. + These are **not** Kiro-specific for commits — only the optional `afterFileEdit` integration uses Kiro or Cursor hook config. Hooks are **Node.js scripts** (`.cjs`), not Bash-only — they run on **Windows, macOS, and Linux** as long as Node is on `PATH` (same requirement as `codefence`). Shell scripts (`.sh`) are optional wrappers for Git Bash. @@ -129,6 +131,7 @@ codefence background-scan --check-pending ```text .codefence/ cache/code/.json # per-file code scan results (mtime-checked) + cache/secret-rules/*.json # cached remote secret rule bundles debounce.json # pending background scans ``` diff --git a/docs/README.md b/docs/README.md index e126be2..8cd57d9 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,4 +5,4 @@ | [AI-ASSISTANTS.md](AI-ASSISTANTS.md) | App teams using Cursor, Claude, Copilot | `codefence install`, `codefence install-hooks`, automatic local scan loop | | [HOOKS.md](HOOKS.md) | App teams using Git / IDE hooks | Pre-commit, background scanner, cache | -Main package reference: [README.md](../README.md) (install, `codefence scan`, publishing). +Main package reference: [README.md](../README.md) (install, `codefence scan`, Semgrep-compatible secret rules, publishing). diff --git a/src/hooks/scanWorker.ts b/src/hooks/scanWorker.ts index a61c9c1..bda08e1 100644 --- a/src/hooks/scanWorker.ts +++ b/src/hooks/scanWorker.ts @@ -35,7 +35,9 @@ export async function runScanWorker(options: ScanWorkerOptions): Promise if (findings.length > 0) { console.error(`[scan-worker] code ${rel}: ${findings.length} finding(s)`); for (const f of findings) { - console.error(` ${f.severity.toUpperCase()} ${f.ruleId} ${f.filePath}:${f.line}`); + const confidence = f.confidence ? ` confidence=${f.confidence}` : ""; + const evidence = f.evidence ? ` evidence=${f.evidence}` : ""; + console.error(` ${f.severity.toUpperCase()} ${f.ruleId} ${f.filePath}:${f.line}${confidence}${evidence}`); } return 1; } diff --git a/src/scan/secret/engine.ts b/src/scan/secret/engine.ts index 46bca5f..1e1ee52 100644 --- a/src/scan/secret/engine.ts +++ b/src/scan/secret/engine.ts @@ -5,7 +5,7 @@ import { loadSecretRules } from "./ruleLoader"; import { SecretEngineInput, SecretRule } from "./types"; function ruleRegex(pattern: string): RegExp { - return new RegExp(pattern, "g"); + return new RegExp(pattern, "gi"); } function summarizeMatch(match: string): string { diff --git a/src/scan/secret/entropy.ts b/src/scan/secret/entropy.ts index bae8550..ff4a062 100644 --- a/src/scan/secret/entropy.ts +++ b/src/scan/secret/entropy.ts @@ -2,7 +2,19 @@ import { Finding } from "../../types"; import { SecretScanOptions } from "./types"; const GENERIC_ASSIGNMENT_REGEX = - /\b(?:api[_-]?key|secret|token|access[_-]?token|client[_-]?secret|password)\b\s*[:=]\s*["']([^"'\\\n]{1,})["']/gi; + /\b([A-Za-z_][A-Za-z0-9_-]{1,64})\b\s*[:=]\s*["']([^"'\\\n]{1,})["']/g; +const BENIGN_ASSIGNMENT_KEYS = new Set([ + "name", + "version", + "path", + "url", + "host", + "port", + "image", + "sha", + "digest", + "color" +]); function shannonEntropy(input: string): number { const counts = new Map(); @@ -19,10 +31,10 @@ function shannonEntropy(input: string): number { } function inferEntropyConfidence(entropy: number, threshold: number): "low" | "medium" | "high" { - if (entropy >= threshold + 0.8) { + if (entropy >= threshold + 0.6) { return "high"; } - if (entropy >= threshold + 0.3) { + if (entropy >= threshold) { return "medium"; } return "low"; @@ -37,10 +49,14 @@ export function findEntropySecrets( lines.forEach((line, index) => { for (const match of line.matchAll(GENERIC_ASSIGNMENT_REGEX)) { - const value = match[1]?.trim(); + const key = match[1]?.trim().toLowerCase(); + const value = match[2]?.trim(); if (!value || value.length < options.minLength) { continue; } + if (key && BENIGN_ASSIGNMENT_KEYS.has(key)) { + continue; + } const entropy = shannonEntropy(value); if (entropy < options.entropyThreshold) { diff --git a/tests/secretsExamples.test.ts b/tests/secretsExamples.test.ts index e736f1a..8074c5b 100644 --- a/tests/secretsExamples.test.ts +++ b/tests/secretsExamples.test.ts @@ -36,14 +36,14 @@ test("entropy scanning reports confidence and evidence for unknown secret format const file = path.join(tmpDir, "entropy-sample.ts"); fs.writeFileSync( file, - `const client_secret = "Q4z8vB2nLp9sTw7xYk3mHc6rJd1f";\n`, + `const credentialBlob = "Q4z8vB2nLp9sTw7xYk3mHc6rJd1f";\n`, "utf8" ); const findings = await scanFiles([file]); const entropyFinding = findings.find((f) => f.ruleId === "secret-high-entropy"); assert.ok(entropyFinding); - assert.equal(entropyFinding?.confidence, "medium"); + assert.ok(entropyFinding?.confidence === "medium" || entropyFinding?.confidence === "high"); assert.match(entropyFinding?.evidence ?? "", /entropy=/); fs.rmSync(tmpDir, { recursive: true, force: true }); From c88c614640c3cb6526918649cafba1548b04a376 Mon Sep 17 00:00:00 2001 From: kadraman Date: Mon, 25 May 2026 19:45:29 +0100 Subject: [PATCH 3/4] chore: added fake secrets for testing --- README.md | 4 ++-- examples/README.md | 26 +++++++++++++++++++++++ examples/pre-commit-no-npm.sh | 9 -------- examples/secrets/fake-private-key.pem | 4 ++++ examples/secrets/fake-secrets.ts | 12 +++++++++++ examples/secrets/fake-uri-credentials.txt | 4 ++++ tests/secretsExamples.test.ts | 13 ++++++++++++ 7 files changed, 61 insertions(+), 11 deletions(-) create mode 100644 examples/README.md delete mode 100644 examples/pre-commit-no-npm.sh create mode 100644 examples/secrets/fake-private-key.pem create mode 100644 examples/secrets/fake-secrets.ts create mode 100644 examples/secrets/fake-uri-credentials.txt diff --git a/README.md b/README.md index 2a287f0..6c407e0 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Codefence -**Codefence** — guardrails for AI-assisted coding. +**Codefence** - guardrails for AI-assisted coding. - **npm:** [`codefence`](https://www.npmjs.com/package/codefence) - **CLI:** `codefence` @@ -215,4 +215,4 @@ npm publish --access public ## License -ISC — see [LICENSE](LICENSE). +See [LICENSE](LICENSE). diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..fc3eb0c --- /dev/null +++ b/examples/README.md @@ -0,0 +1,26 @@ +# Examples directory + +This directory contains sample fixtures for exercising codefence behavior in local development. + +## Secret-scanning fixtures + +Path: `examples/secrets/` + +All values in these files are intentionally fake test strings. They are designed to trigger secret-detection rules and are not real credentials. + +To reduce GitHub push-protection friction, examples avoid provider-specific token signatures (for example real-looking `ghp_`, `glpat-`, or `sk_live_` forms). + +Run against the fixture set: + +```bash +npm run build +node dist/src/cli.js scan --paths examples/secrets +``` + +You can also target a single fixture file: + +```bash +node dist/src/cli.js scan --paths examples/secrets/fake-secrets.ts +``` + +Note: git-changed scans ignore `examples/` by default. Explicit `--paths` includes these files. diff --git a/examples/pre-commit-no-npm.sh b/examples/pre-commit-no-npm.sh deleted file mode 100644 index 4c2b596..0000000 --- a/examples/pre-commit-no-npm.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -# Legacy Bash wrapper — prefer: codefence install-hooks (installs a Node pre-commit hook). -# Manual install: cp hooks/git/pre-commit.cjs .git/hooks/pre-commit && cp hooks/lib/run-codefence-hook.cjs .git/hooks/codefence-run-hook.cjs -# Or on Windows: codefence install-hooks - -set -euo pipefail -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" -cd "$(git rev-parse --show-toplevel)" -exec node "$ROOT/hooks/git/pre-commit.cjs" "$@" diff --git a/examples/secrets/fake-private-key.pem b/examples/secrets/fake-private-key.pem new file mode 100644 index 0000000..ee00fb0 --- /dev/null +++ b/examples/secrets/fake-private-key.pem @@ -0,0 +1,4 @@ +-----BEGIN EXAMPLE KEY MATERIAL----- +This file intentionally avoids real private-key headers to reduce push-protection blocks. +Use it only as a placeholder fixture in local development. +-----END EXAMPLE KEY MATERIAL----- diff --git a/examples/secrets/fake-secrets.ts b/examples/secrets/fake-secrets.ts new file mode 100644 index 0000000..f3a0d48 --- /dev/null +++ b/examples/secrets/fake-secrets.ts @@ -0,0 +1,12 @@ +// Intentional fake secrets for scanner validation. +// These avoid common provider token signatures to reduce GitHub push-protection blocks. +const accessToken = "access_token = \"exampledevtoken1234567890\""; +const clientSecret = "client_secret = \"devclientsecretvalue123456\""; +const bearer = "Bearer exampledevbearertoken1234567890"; +const password = "password = \"P@ssword123456\""; +const apiKey = "apiKey = \"testapikey1234567890\""; +const entropyBlob = "Q4z8vB2nLp9sTw7xYk3mHc6rJd1f"; + +export function sample() { + return [accessToken, clientSecret, bearer, password, apiKey, entropyBlob].length; +} diff --git a/examples/secrets/fake-uri-credentials.txt b/examples/secrets/fake-uri-credentials.txt new file mode 100644 index 0000000..a02bad9 --- /dev/null +++ b/examples/secrets/fake-uri-credentials.txt @@ -0,0 +1,4 @@ +# Intentional fake URI credential examples for scanner validation. +postgres://demo_user:demo_password@db.example.local:5432/app +https://ci-user:token-1234567890@internal.example.local/artifacts +redis://cache_user:cache_password@cache.example.local:6379/0 diff --git a/tests/secretsExamples.test.ts b/tests/secretsExamples.test.ts index 8074c5b..e7e007a 100644 --- a/tests/secretsExamples.test.ts +++ b/tests/secretsExamples.test.ts @@ -48,3 +48,16 @@ test("entropy scanning reports confidence and evidence for unknown secret format fs.rmSync(tmpDir, { recursive: true, force: true }); }); + +test("examples secret fixtures produce secret findings", async () => { + const workspace = process.cwd(); + const fixtureDir = path.join(workspace, "examples", "secrets"); + const fixtureFiles = [ + path.join(fixtureDir, "fake-secrets.ts"), + path.join(fixtureDir, "fake-uri-credentials.txt") + ]; + + const findings = await scanFiles(fixtureFiles, { workspace }); + assert.ok(findings.length > 0); + assert.ok(findings.some((f) => f.ruleId.startsWith("secret-") || f.ruleId === "no-hardcoded-secret")); +}); From dc7cc27eec6d559e4c7ef65e4f93696cabb6c92c Mon Sep 17 00:00:00 2001 From: kadraman Date: Mon, 25 May 2026 20:04:24 +0100 Subject: [PATCH 4/4] chore: replaced built-in rules with semgrep-style exampes and removed old DSEC and FGR variables --- README.md | 15 +- examples/README.md | 17 ++- examples/rules/README.md | 48 ++++++ examples/rules/extra-secrets-bundle.yml | 25 ++++ examples/secrets/fake-private-key-block.conf | 5 + examples/secrets/fake-secrets.ts | 7 +- ...dentials.txt => fake-uri-credentials.conf} | 0 package.json | 1 + rules/secret/builtin.yml | 74 ++++++++++ src/cli.ts | 6 +- src/hooks/backgroundScanner.ts | 5 +- src/hooks/paths.ts | 7 - src/hooks/preCommit.ts | 4 +- src/index.ts | 2 +- src/scan/parseOptions.ts | 23 ++- src/scan/runner.ts | 2 +- src/scan/secret/builtinRules.ts | 139 +++++------------- src/scan/secret/remoteRules.ts | 54 ++++++- src/scan/secret/ruleLoader.ts | 136 +---------------- src/scan/secret/yamlRuleParser.ts | 131 +++++++++++++++++ tests/packageMetadata.test.ts | 1 - tests/secretEngine.builtinRules.test.ts | 53 +++++++ .../secretEngine.remoteExampleBundle.test.ts | 48 ++++++ tests/secretsExamples.test.ts | 7 +- tests/templates.test.ts | 8 - 25 files changed, 534 insertions(+), 284 deletions(-) create mode 100644 examples/rules/README.md create mode 100644 examples/rules/extra-secrets-bundle.yml create mode 100644 examples/secrets/fake-private-key-block.conf rename examples/secrets/{fake-uri-credentials.txt => fake-uri-credentials.conf} (100%) create mode 100644 rules/secret/builtin.yml create mode 100644 src/scan/secret/yamlRuleParser.ts create mode 100644 tests/secretEngine.builtinRules.test.ts create mode 100644 tests/secretEngine.remoteExampleBundle.test.ts diff --git a/README.md b/README.md index 6c407e0..c45e26c 100644 --- a/README.md +++ b/README.md @@ -132,20 +132,29 @@ Git-based scans skip fixture trees such as `examples/` (see `codefence scan --he Built-in secret scanning now combines: -- bundled versioned rules for common tokens, private keys, password-like assignments, and URI credentials +- a bundled Semgrep-style YAML pack at `rules/secret/builtin.yml` (version `2026-05-25`) for common tokens, private keys, password-like assignments, and URI credentials - Semgrep-style YAML rule loading from local files or directories - entropy-based detection for unknown secret formats - deduplicated findings with confidence and evidence summaries +Sample fixtures and a downloadable example rule bundle are in [`examples/`](examples/README.md). + ```bash codefence scan --staged --secret-rules .codefence/rules/secrets +codefence scan --paths examples/secrets codefence scan --paths src config --secret-entropy-threshold 4.2 --secret-min-confidence medium -codefence scan --staged --secret-rules-update-url https://example.com/codefence/secrets-rules.yml --secret-rules-refresh +codefence scan --paths examples/secrets --secret-rules-update-url http://127.0.0.1:8765/extra-secrets-bundle.yml --secret-rules-refresh +``` + +Serve the example remote bundle locally (`examples/rules/README.md`): + +```bash +npx --yes serve examples/rules -l 8765 ``` Remote rule bundles are cached under `.codefence/cache/secret-rules/` for offline and low-latency scans. -**Environment:** `CODEFENCE_ASPECTS`, `CODEFENCE_ONLY`, `CODEFENCE_SKIP`, `CODEFENCE_SECRET_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES_VERSION`, `CODEFENCE_SECRET_RULES_UPDATE_URL`, `CODEFENCE_SECRET_RULES_CACHE_TTL`, `CODEFENCE_SECRET_ENTROPY_THRESHOLD`, `CODEFENCE_SECRET_MIN_LENGTH`, `CODEFENCE_SECRET_MIN_CONFIDENCE` (legacy `DSEC_*` names still accepted for aspect selection). +**Environment:** `CODEFENCE_ASPECTS`, `CODEFENCE_ONLY`, `CODEFENCE_SKIP`, `CODEFENCE_SECRET_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES`, `CODEFENCE_SECRET_DEFAULT_RULES_VERSION`, `CODEFENCE_SECRET_RULES_UPDATE_URL`, `CODEFENCE_SECRET_RULES_CACHE_TTL`, `CODEFENCE_SECRET_ENTROPY_THRESHOLD`, `CODEFENCE_SECRET_MIN_LENGTH`, `CODEFENCE_SECRET_MIN_CONFIDENCE`. ## Git pre-commit and background scanning diff --git a/examples/README.md b/examples/README.md index fc3eb0c..185dc80 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,7 +8,14 @@ Path: `examples/secrets/` All values in these files are intentionally fake test strings. They are designed to trigger secret-detection rules and are not real credentials. -To reduce GitHub push-protection friction, examples avoid provider-specific token signatures (for example real-looking `ghp_`, `glpat-`, or `sk_live_` forms). +To reduce GitHub push-protection friction, most examples avoid provider-specific token signatures (for example real-looking `ghp_`, `glpat-`, or `sk_live_` forms). The private-key block fixture uses an obviously fake PEM block for the built-in `secret-private-key` rule. + +| Fixture | Typical built-in rule IDs | +| ------- | ------------------------- | +| `fake-secrets.ts` | `no-hardcoded-secret`, `secret-bearer-token`, `secret-password-assignment`, `secret-high-entropy` | +| `fake-uri-credentials.conf` | `secret-uri-credentials` | +| `fake-private-key-block.conf` | `secret-private-key` | +| `fake-private-key.pem` | Placeholder only (no PEM header) | Run against the fixture set: @@ -24,3 +31,11 @@ node dist/src/cli.js scan --paths examples/secrets/fake-secrets.ts ``` Note: git-changed scans ignore `examples/` by default. Explicit `--paths` includes these files. + +Scans against these fixtures are expected to **exit with code 1** (findings are intentional). Use them to verify rules, not as a clean baseline. + +## Secret rule bundles + +Built-in Semgrep-style rules live at [`rules/secret/builtin.yml`](../rules/secret/builtin.yml). + +An extra downloadable bundle for remote-rule demos is under [`examples/rules/`](rules/README.md) (serve locally or fetch via `https://raw.githubusercontent.com/...`). diff --git a/examples/rules/README.md b/examples/rules/README.md new file mode 100644 index 0000000..9001c85 --- /dev/null +++ b/examples/rules/README.md @@ -0,0 +1,48 @@ +# Example secret rule bundles + +Semgrep-style YAML bundles used to demonstrate local and remote rule loading. + +## Built-in bundle (shipped with Codefence) + +Path in the repository: [`rules/secret/builtin.yml`](../../rules/secret/builtin.yml) + +This file is the source of truth for default secret rules. The scanner loads it automatically unless `--secret-default-rules off` is set. + +## Extra bundle (remote download demo) + +[`extra-secrets-bundle.yml`](extra-secrets-bundle.yml) adds example-only rules that match strings in [`../secrets/`](../secrets/) fixtures. + +### Serve over HTTP(S) locally + +From the repository root: + +```bash +npx --yes serve examples/rules -l 8765 +``` + +Scan fixtures with the remote bundle (refresh cache on first run): + +```bash +npm run build +node dist/src/cli.js scan --paths examples/secrets \ + --secret-rules-update-url http://127.0.0.1:8765/extra-secrets-bundle.yml \ + --secret-rules-refresh +``` + +Expect findings from both built-in rules and remote rules such as `example-ci-deploy-token`. Exit code **1** is normal for these fixtures. + +### Published raw URL (when this repo is on GitHub) + +Replace `ORG/REPO` with your fork: + +```text +https://raw.githubusercontent.com/ORG/REPO/main/examples/rules/extra-secrets-bundle.yml +``` + +```bash +codefence scan --paths examples/secrets \ + --secret-rules-update-url https://raw.githubusercontent.com/ORG/REPO/main/examples/rules/extra-secrets-bundle.yml \ + --secret-rules-refresh +``` + +Remote bundles are cached under `.codefence/cache/secret-rules/` in the target workspace. diff --git a/examples/rules/extra-secrets-bundle.yml b/examples/rules/extra-secrets-bundle.yml new file mode 100644 index 0000000..efe7eb5 --- /dev/null +++ b/examples/rules/extra-secrets-bundle.yml @@ -0,0 +1,25 @@ +# Example remote rule bundle for Codefence secret scanning. +# Serve locally, then point --secret-rules-update-url at this file. +# +# npx --yes serve examples/rules -l 8765 +# codefence scan --paths examples/secrets \ +# --secret-rules-update-url http://127.0.0.1:8765/extra-secrets-bundle.yml \ +# --secret-rules-refresh +rules: + - id: example-ci-deploy-token + description: Example rule from a downloadable bundle (CI deploy tokens) + message: Example CI deploy token detected (remote demo bundle) + severity: ERROR + metadata: + confidence: high + remediation: Store deploy tokens in your CI secret store, not in source files. + pattern-regex: '\bdeploy_[A-Za-z0-9]{20,}\b' + + - id: example-internal-api-header + description: Example custom header secret from remote bundle + message: Example internal API header value detected (remote demo bundle) + severity: WARNING + metadata: + confidence: medium + remediation: Load internal API keys from environment configuration. + pattern-regex: '\bX-Internal-Api-Key:\s*[A-Za-z0-9._\-]{16,}\b' diff --git a/examples/secrets/fake-private-key-block.conf b/examples/secrets/fake-private-key-block.conf new file mode 100644 index 0000000..1c632f4 --- /dev/null +++ b/examples/secrets/fake-private-key-block.conf @@ -0,0 +1,5 @@ +# FAKE private-key fixture for built-in secret-private-key rule validation. +# Not real cryptographic material — do not use outside local scanner tests. +-----BEGIN RSA PRIVATE KEY----- +MIIBogIBADIBFAKEBASE64PLACEHOLDERONLYNOTAREALKEYMATERIAL== +-----END RSA PRIVATE KEY----- diff --git a/examples/secrets/fake-secrets.ts b/examples/secrets/fake-secrets.ts index f3a0d48..e058ca5 100644 --- a/examples/secrets/fake-secrets.ts +++ b/examples/secrets/fake-secrets.ts @@ -6,7 +6,12 @@ const bearer = "Bearer exampledevbearertoken1234567890"; const password = "password = \"P@ssword123456\""; const apiKey = "apiKey = \"testapikey1234567890\""; const entropyBlob = "Q4z8vB2nLp9sTw7xYk3mHc6rJd1f"; +// Matches example-ci-deploy-token in examples/rules/extra-secrets-bundle.yml (remote demo bundle). +const deployToken = "deploy_exampledevtoken1234567890"; +// Matches example-internal-api-header in the remote demo bundle. +const internalHeader = "X-Internal-Api-Key: exampleinternalkey123456"; export function sample() { - return [accessToken, clientSecret, bearer, password, apiKey, entropyBlob].length; + return [accessToken, clientSecret, bearer, password, apiKey, entropyBlob, deployToken, internalHeader] + .length; } diff --git a/examples/secrets/fake-uri-credentials.txt b/examples/secrets/fake-uri-credentials.conf similarity index 100% rename from examples/secrets/fake-uri-credentials.txt rename to examples/secrets/fake-uri-credentials.conf diff --git a/package.json b/package.json index 83c12f7..7e8d5c3 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "dist/src/index.js", "files": [ "dist", + "rules", "templates", "hooks", "scripts/install-ai-rules.sh", diff --git a/rules/secret/builtin.yml b/rules/secret/builtin.yml new file mode 100644 index 0000000..d534fab --- /dev/null +++ b/rules/secret/builtin.yml @@ -0,0 +1,74 @@ +# Codefence built-in secret rules (Semgrep-style subset). +# Bundled with the package; version: 2026-05-25 +rules: + - id: secret-github-token + description: Detect GitHub personal access tokens + message: Potential GitHub token detected + severity: ERROR + metadata: + confidence: high + remediation: Remove the token, rotate it, and load credentials from environment or secret storage. + pattern-regex: '\bgh[pousr]_[A-Za-z0-9]{36,255}\b' + + - id: secret-gitlab-token + description: Detect GitLab tokens + message: Potential GitLab token detected + severity: ERROR + metadata: + confidence: high + remediation: Remove the token, rotate it, and move it to a managed secret store. + pattern-regex: '\bglpat-[A-Za-z0-9_-]{20,255}\b' + + - id: secret-stripe-key + description: Detect Stripe API keys + message: Potential Stripe API key detected + severity: ERROR + metadata: + confidence: high + remediation: Replace embedded Stripe keys with environment-based configuration and rotate exposed keys. + pattern-regex: '\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b' + + - id: secret-bearer-token + description: Detect bearer tokens + message: Potential bearer token detected + severity: ERROR + metadata: + confidence: medium + remediation: Avoid embedding bearer tokens in source files; inject them from runtime configuration. + pattern-regex: '\bBearer\s+[A-Za-z0-9._\-+/=]{16,}\b' + + - id: secret-private-key + description: Detect PEM private key material + message: Potential private key material detected + severity: ERROR + metadata: + confidence: high + remediation: Remove private keys from source control immediately and rotate any exposed key material. + pattern-regex: '-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----' + + - id: secret-password-assignment + description: Detect password-like assignments + message: Potential hardcoded password detected + severity: ERROR + metadata: + confidence: medium + remediation: Do not commit passwords; use environment variables or a secret manager instead. + pattern-regex: '(?:password|passwd|pwd)\s*[:=]\s*["''][^"''\n]{8,}["'']' + + - id: secret-uri-credentials + description: Detect credentials embedded in URIs + message: Potential credentials embedded in URI detected + severity: ERROR + metadata: + confidence: high + remediation: Move credentials out of URIs and into environment or dedicated secret configuration. + pattern-regex: '\b[a-z][a-z0-9+.-]*://[^\s:@/]+:[^\s:@/]+@' + + - id: no-hardcoded-secret + description: Detect generic token-style assignments + message: Potential hardcoded secret detected + severity: ERROR + metadata: + confidence: medium + remediation: Replace embedded credentials with runtime-configured secrets. + pattern-regex: '(?:api[_-]?key|secret|token|access[_-]?token|client[_-]?secret)\s*[:=]\s*["''][A-Za-z0-9_\-+/=]{12,}["'']' diff --git a/src/cli.ts b/src/cli.ts index 273a561..dd91be7 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -124,11 +124,13 @@ async function main() { process.exit(0); } - process.exit(await runScan(parsed)); + // Avoid process.exit() after network I/O — on Windows, abrupt exit can trip libuv + // (UV_HANDLE_CLOSING) while undici/http handles are still closing. + process.exitCode = await runScan(parsed); } void main().catch((error) => { const message = error instanceof Error ? error.message : String(error); console.error(message); - process.exit(1); + process.exitCode = 1; }); diff --git a/src/hooks/backgroundScanner.ts b/src/hooks/backgroundScanner.ts index 3f346c0..9e86102 100644 --- a/src/hooks/backgroundScanner.ts +++ b/src/hooks/backgroundScanner.ts @@ -11,8 +11,7 @@ const FILE_ENV_KEYS = [ "KIRO_FILE_PATH", "FILE_PATH", "EDITED_FILE", - "CODEFENCE_EDITED_FILE", - "DSEC_EDITED_FILE" + "CODEFENCE_EDITED_FILE" ]; export interface BackgroundScanOptions { @@ -124,7 +123,7 @@ function resolveEditedFile(options: BackgroundScanOptions, workspace: string): s export function runBackgroundScan(options: BackgroundScanOptions = {}): number { const workspace = path.resolve(options.workspace ?? process.cwd()); const parsedDebounce = Number.parseFloat( - process.env.CODEFENCE_DEBOUNCE_SECONDS ?? process.env.DSEC_DEBOUNCE_SECONDS ?? "" + process.env.CODEFENCE_DEBOUNCE_SECONDS ?? "" ); const debounceSeconds = options.debounceSeconds ?? diff --git a/src/hooks/paths.ts b/src/hooks/paths.ts index 75b789e..bde481c 100644 --- a/src/hooks/paths.ts +++ b/src/hooks/paths.ts @@ -3,10 +3,6 @@ import fs from "node:fs"; import path from "node:path"; export const CODEFENCE_OUTPUT_DIR = ".codefence"; -/** @deprecated Use {@link CODEFENCE_OUTPUT_DIR}. */ -export const DSEC_OUTPUT_DIR = CODEFENCE_OUTPUT_DIR; -/** @deprecated Use {@link CODEFENCE_OUTPUT_DIR}. */ -export const FGR_OUTPUT_DIR = CODEFENCE_OUTPUT_DIR; export const CACHE_VERSION = 1; export const DEFAULT_DEBOUNCE_SECONDS = 2; @@ -19,9 +15,6 @@ export function codefenceDir(workspace: string): string { return path.join(path.resolve(workspace), CODEFENCE_OUTPUT_DIR); } -/** @deprecated Use {@link codefenceDir}. */ -export const dsecDir = codefenceDir; - export function cacheDir(workspace: string): string { return path.join(codefenceDir(workspace), "cache"); } diff --git a/src/hooks/preCommit.ts b/src/hooks/preCommit.ts index 6547637..8117c46 100644 --- a/src/hooks/preCommit.ts +++ b/src/hooks/preCommit.ts @@ -4,8 +4,8 @@ import { defaultSecretScanOptions } from "../scan/secret/config"; import { shouldScanFile } from "../scanner"; import { countCodeCacheHits } from "./cache"; -const DEBUG = process.env.CODEFENCE_HOOK_DEBUG === "1" || process.env.DSEC_HOOK_DEBUG === "1"; -const FAIL_OPEN = process.env.CODEFENCE_HOOK_FAIL_OPEN === "1" || process.env.DSEC_HOOK_FAIL_OPEN === "1"; +const DEBUG = process.env.CODEFENCE_HOOK_DEBUG === "1"; +const FAIL_OPEN = process.env.CODEFENCE_HOOK_FAIL_OPEN === "1"; function header(title: string): void { console.log(`\n=== ${title} ===\n`); diff --git a/src/index.ts b/src/index.ts index 5cc5413..be49119 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,7 +8,7 @@ export { isDependencyManifest, manifestBaseName } from "./manifests"; -export { CODEFENCE_OUTPUT_DIR, DSEC_OUTPUT_DIR, FGR_OUTPUT_DIR } from "./hooks/paths"; +export { CODEFENCE_OUTPUT_DIR } from "./hooks/paths"; export { ASPECT_IDS, DEFAULT_ASPECTS } from "./scan/types"; export type { AspectId, AspectOutcome, ScanContext, ScanOptions } from "./scan/types"; export { parseAspectList, resolveAspects } from "./scan/parseOptions"; diff --git a/src/scan/parseOptions.ts b/src/scan/parseOptions.ts index e2e32bb..f475b6f 100644 --- a/src/scan/parseOptions.ts +++ b/src/scan/parseOptions.ts @@ -118,18 +118,13 @@ function collectFlagValues(argv: string[], flag: string): { values: string[]; re return { values, rest }; } -function envTrim(...keys: string[]): string | undefined { - for (const key of keys) { - const value = process.env[key]?.trim(); - if (value) { - return value; - } - } - return undefined; +function envTrim(key: string): string | undefined { + const value = process.env[key]?.trim(); + return value ? value : undefined; } function defaultAspectsFromEnv(): AspectId[] { - const raw = envTrim("CODEFENCE_ASPECTS", "DSEC_ASPECTS"); + const raw = envTrim("CODEFENCE_ASPECTS"); if (!raw) { return [...DEFAULT_ASPECTS]; } @@ -205,7 +200,7 @@ export function parseScanArgv(argv: string[]): ParseScanResult { } const onlyParsedValue = onlyParsed.value; - const onlyEnv = envTrim("CODEFENCE_ONLY", "DSEC_ONLY"); + const onlyEnv = envTrim("CODEFENCE_ONLY"); let only: AspectId[] | null = null; if (onlyParsedValue !== null) { @@ -218,7 +213,7 @@ export function parseScanArgv(argv: string[]): ParseScanResult { if (skipParsed.value !== null) { skip = parseAspectList(skipParsed.value); } else { - const skipEnv = envTrim("CODEFENCE_SKIP", "DSEC_SKIP"); + const skipEnv = envTrim("CODEFENCE_SKIP"); if (skipEnv) { skip = parseAspectList(skipEnv); } @@ -277,9 +272,9 @@ Aspects (default: code): code Local secure-coding rules on changed source files Environment: - CODEFENCE_ASPECTS Default aspect list (comma-separated; DSEC_ASPECTS accepted) - CODEFENCE_ONLY Same as --only (DSEC_ONLY accepted) - CODEFENCE_SKIP Same as --skip (DSEC_SKIP accepted) + CODEFENCE_ASPECTS Default aspect list (comma-separated) + CODEFENCE_ONLY Same as --only + CODEFENCE_SKIP Same as --skip CODEFENCE_SECRET_RULES Default Semgrep-style secret rule paths CODEFENCE_SECRET_DEFAULT_RULES Same as --secret-default-rules CODEFENCE_SECRET_DEFAULT_RULES_VERSION Same as --secret-default-rules-version diff --git a/src/scan/runner.ts b/src/scan/runner.ts index 01931eb..6accac2 100644 --- a/src/scan/runner.ts +++ b/src/scan/runner.ts @@ -29,7 +29,7 @@ export async function runScan(options: ScanOptions): Promise { const aspects = resolveAspects(options); if (aspects.length === 0) { - console.error("No scan aspects selected. Use --only or adjust CODEFENCE_ASPECTS (or DSEC_ASPECTS) / --skip."); + console.error("No scan aspects selected. Use --only or adjust CODEFENCE_ASPECTS / --skip."); return 1; } diff --git a/src/scan/secret/builtinRules.ts b/src/scan/secret/builtinRules.ts index 43cf0ab..291cf51 100644 --- a/src/scan/secret/builtinRules.ts +++ b/src/scan/secret/builtinRules.ts @@ -1,104 +1,41 @@ +import fs from "node:fs"; +import path from "node:path"; +import { parseRuleBundle } from "./yamlRuleParser"; import { BUILTIN_SECRET_RULES_VERSION, SecretRule } from "./types"; -export const builtinSecretRules: SecretRule[] = [ - { - id: "secret-github-token", - description: "Detect GitHub personal access tokens", - message: "Potential GitHub token detected", - severity: "high", - confidence: "high", - remediation: "Remove the token, rotate it, and load credentials from environment or secret storage.", - patterns: [{ type: "regex", value: "\\bgh[pousr]_[A-Za-z0-9]{36,255}\\b" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-gitlab-token", - description: "Detect GitLab tokens", - message: "Potential GitLab token detected", - severity: "high", - confidence: "high", - remediation: "Remove the token, rotate it, and move it to a managed secret store.", - patterns: [{ type: "regex", value: "\\bglpat-[A-Za-z0-9_-]{20,255}\\b" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-stripe-key", - description: "Detect Stripe API keys", - message: "Potential Stripe API key detected", - severity: "high", - confidence: "high", - remediation: "Replace embedded Stripe keys with environment-based configuration and rotate exposed keys.", - patterns: [{ type: "regex", value: "\\bsk_(?:live|test)_[A-Za-z0-9]{16,}\\b" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-bearer-token", - description: "Detect bearer tokens", - message: "Potential bearer token detected", - severity: "high", - confidence: "medium", - remediation: "Avoid embedding bearer tokens in source files; inject them from runtime configuration.", - patterns: [{ type: "regex", value: "\\bBearer\\s+[A-Za-z0-9._\\-+/=]{16,}\\b" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-private-key", - description: "Detect PEM private key material", - message: "Potential private key material detected", - severity: "high", - confidence: "high", - remediation: "Remove private keys from source control immediately and rotate any exposed key material.", - patterns: [{ type: "regex", value: "-----BEGIN (?:RSA |DSA |EC |OPENSSH |PGP )?PRIVATE KEY-----" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-password-assignment", - description: "Detect password-like assignments", - message: "Potential hardcoded password detected", - severity: "high", - confidence: "medium", - remediation: "Do not commit passwords; use environment variables or a secret manager instead.", - patterns: [ - { - type: "regex", - value: - "(?:password|passwd|pwd)\\s*[:=]\\s*[\"'][^\"'\\n]{8,}[\"']" - } - ], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "secret-uri-credentials", - description: "Detect credentials embedded in URIs", - message: "Potential credentials embedded in URI detected", - severity: "high", - confidence: "high", - remediation: "Move credentials out of URIs and into environment or dedicated secret configuration.", - patterns: [{ type: "regex", value: "\\b[a-z][a-z0-9+.-]*://[^\\s:@/]+:[^\\s:@/]+@" }], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` - }, - { - id: "no-hardcoded-secret", - description: "Detect generic token-style assignments", - message: "Potential hardcoded secret detected", - severity: "high", - confidence: "medium", - remediation: "Replace embedded credentials with runtime-configured secrets.", - patterns: [ - { - type: "regex", - value: - "(?:api[_-]?key|secret|token|access[_-]?token|client[_-]?secret)\\s*[:=]\\s*[\"'][A-Za-z0-9_\\-+/=]{12,}[\"']" - } - ], - source: "builtin", - sourceName: `builtin@${BUILTIN_SECRET_RULES_VERSION}` +export const BUILTIN_SECRET_RULES_BUNDLE = path.join("rules", "secret", "builtin.yml"); + +let cachedBuiltinRules: SecretRule[] | null = null; + +export function resolveBuiltinRulesBundlePath(startDir: string = __dirname): string { + let dir = startDir; + while (dir !== path.dirname(dir)) { + const candidate = path.join(dir, BUILTIN_SECRET_RULES_BUNDLE); + if (fs.existsSync(candidate)) { + return candidate; + } + dir = path.dirname(dir); + } + + throw new Error( + `Built-in secret rules bundle not found (expected ${BUILTIN_SECRET_RULES_BUNDLE} under package root)` + ); +} + +export function loadBuiltinSecretRules(): SecretRule[] { + if (cachedBuiltinRules) { + return cachedBuiltinRules; } -]; + + const bundlePath = resolveBuiltinRulesBundlePath(); + const yamlContent = fs.readFileSync(bundlePath, "utf8"); + const sourceName = `builtin@${BUILTIN_SECRET_RULES_VERSION}`; + const rules = parseRuleBundle(yamlContent, sourceName, "builtin"); + + if (rules.length === 0) { + throw new Error(`Built-in secret rules bundle is empty: ${bundlePath}`); + } + + cachedBuiltinRules = rules; + return rules; +} diff --git a/src/scan/secret/remoteRules.ts b/src/scan/secret/remoteRules.ts index 093c3c2..39ef06e 100644 --- a/src/scan/secret/remoteRules.ts +++ b/src/scan/secret/remoteRules.ts @@ -1,5 +1,10 @@ +import http from "node:http"; +import https from "node:https"; import { isSecretRulesCacheFresh, readCachedSecretRules, writeCachedSecretRules } from "./cache"; +const MAX_REDIRECTS = 5; +const DOWNLOAD_TIMEOUT_MS = 30_000; + function validateRulesUrl(url: string): void { const parsed = new URL(url); if (parsed.protocol === "https:") { @@ -16,6 +21,48 @@ function validateRulesUrl(url: string): void { throw new Error("Remote secret rules must use https (http is allowed only for localhost)"); } +function requestRuleBundle( + url: string, + redirectDepth = 0 +): Promise<{ statusCode: number; body: string }> { + if (redirectDepth > MAX_REDIRECTS) { + return Promise.reject(new Error("Too many redirects while downloading secret rules")); + } + + return new Promise((resolve, reject) => { + const parsed = new URL(url); + const lib = parsed.protocol === "https:" ? https : http; + + const req = lib.request( + url, + { method: "GET", timeout: DOWNLOAD_TIMEOUT_MS }, + (res) => { + const statusCode = res.statusCode ?? 0; + + if (statusCode >= 300 && statusCode < 400 && res.headers.location) { + res.resume(); + const nextUrl = new URL(res.headers.location, url).href; + resolve(requestRuleBundle(nextUrl, redirectDepth + 1)); + return; + } + + const chunks: Buffer[] = []; + res.on("data", (chunk) => chunks.push(chunk)); + res.on("end", () => { + resolve({ statusCode, body: Buffer.concat(chunks).toString("utf8") }); + }); + res.on("error", reject); + } + ); + + req.on("timeout", () => { + req.destroy(new Error(`Timed out downloading secret rules from ${url}`)); + }); + req.on("error", reject); + req.end(); + }); +} + export async function loadRemoteRuleBundle( workspace: string, url: string, @@ -29,15 +76,14 @@ export async function loadRemoteRuleBundle( return cached.body; } - const response = await fetch(url); - if (!response.ok) { + const { statusCode, body } = await requestRuleBundle(url); + if (statusCode < 200 || statusCode >= 300) { if (cached) { return cached.body; } - throw new Error(`Failed to download remote secret rules: ${response.status} ${response.statusText}`); + throw new Error(`Failed to download remote secret rules: ${statusCode}`); } - const body = await response.text(); writeCachedSecretRules(workspace, url, body, ttlMs); return body; } diff --git a/src/scan/secret/ruleLoader.ts b/src/scan/secret/ruleLoader.ts index 284dbf2..5e3e276 100644 --- a/src/scan/secret/ruleLoader.ts +++ b/src/scan/secret/ruleLoader.ts @@ -1,143 +1,13 @@ import fs from "node:fs"; import path from "node:path"; -import { parseAllDocuments } from "yaml"; -import { ConfidenceLevel } from "../../types"; -import { builtinSecretRules } from "./builtinRules"; +import { loadBuiltinSecretRules } from "./builtinRules"; import { loadRemoteRuleBundle } from "./remoteRules"; import { BUILTIN_SECRET_RULES_VERSION, SecretRule, - SecretRulePattern, SecretScanOptions } from "./types"; - -function normalizeSeverity(value: unknown): "low" | "medium" | "high" { - const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; - if (normalized === "low" || normalized === "medium" || normalized === "high") { - return normalized; - } - if (normalized === "warning") { - return "medium"; - } - if (normalized === "error") { - return "high"; - } - return "medium"; -} - -function normalizeConfidence(value: unknown): ConfidenceLevel { - const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; - if (normalized === "low" || normalized === "medium" || normalized === "high") { - return normalized; - } - return "medium"; -} - -function collectPatterns(node: unknown): SecretRulePattern[] { - if (!node || typeof node !== "object") { - return []; - } - - const entry = node as Record; - const patterns: SecretRulePattern[] = []; - - if (typeof entry["pattern-regex"] === "string") { - patterns.push({ type: "regex", value: entry["pattern-regex"] }); - } - - if (typeof entry.pattern === "string") { - patterns.push({ type: "literal", value: entry.pattern }); - } - - if (Array.isArray(entry.patterns)) { - for (const child of entry.patterns) { - patterns.push(...collectPatterns(child)); - } - } - - if (Array.isArray(entry["pattern-either"])) { - for (const child of entry["pattern-either"]) { - patterns.push(...collectPatterns(child)); - } - } - - return patterns; -} - -function parseRuleObject( - rawRule: unknown, - sourceName: string, - source: SecretRule["source"] -): SecretRule | null { - if (!rawRule || typeof rawRule !== "object") { - return null; - } - - const rule = rawRule as Record; - const id = typeof rule.id === "string" ? rule.id.trim() : ""; - const message = typeof rule.message === "string" ? rule.message.trim() : ""; - if (!id || !message) { - return null; - } - - const metadata = (rule.metadata ?? {}) as Record; - const patterns = collectPatterns(rule); - if (patterns.length === 0) { - return null; - } - - for (const pattern of patterns) { - if (pattern.type === "regex") { - try { - new RegExp(pattern.value); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - throw new Error(`Invalid regex in secret rule ${id} from ${sourceName}: ${message}`); - } - } - } - - return { - id, - description: typeof rule.description === "string" ? rule.description : id, - message, - severity: normalizeSeverity(rule.severity), - confidence: normalizeConfidence(metadata.confidence), - remediation: - typeof metadata.remediation === "string" - ? metadata.remediation - : typeof metadata["remediation-guidance"] === "string" - ? metadata["remediation-guidance"] - : undefined, - patterns, - source, - sourceName - }; -} - -function parseRuleBundle( - yamlContent: string, - sourceName: string, - source: SecretRule["source"] -): SecretRule[] { - const rules: SecretRule[] = []; - - for (const doc of parseAllDocuments(yamlContent)) { - const value = doc.toJSON() as Record | null; - if (!value || !Array.isArray(value.rules)) { - continue; - } - - for (const rawRule of value.rules) { - const parsed = parseRuleObject(rawRule, sourceName, source); - if (parsed) { - rules.push(parsed); - } - } - } - - return rules; -} +import { parseRuleBundle } from "./yamlRuleParser"; function collectYamlFiles(entryPath: string, out: string[]): void { const stat = fs.statSync(entryPath); @@ -197,7 +67,7 @@ export async function loadSecretRules( `Unknown built-in secret rule version: ${options.defaultRulesVersion} (available: ${BUILTIN_SECRET_RULES_VERSION})` ); } - loaded.push(...builtinSecretRules); + loaded.push(...loadBuiltinSecretRules()); } if (options.rulePaths.length > 0) { diff --git a/src/scan/secret/yamlRuleParser.ts b/src/scan/secret/yamlRuleParser.ts new file mode 100644 index 0000000..0e79670 --- /dev/null +++ b/src/scan/secret/yamlRuleParser.ts @@ -0,0 +1,131 @@ +import { parseAllDocuments } from "yaml"; +import { ConfidenceLevel } from "../../types"; +import { SecretRule, SecretRulePattern } from "./types"; + +function normalizeSeverity(value: unknown): "low" | "medium" | "high" { + const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; + if (normalized === "low" || normalized === "medium" || normalized === "high") { + return normalized; + } + if (normalized === "warning") { + return "medium"; + } + if (normalized === "error") { + return "high"; + } + return "medium"; +} + +function normalizeConfidence(value: unknown): ConfidenceLevel { + const normalized = typeof value === "string" ? value.trim().toLowerCase() : "medium"; + if (normalized === "low" || normalized === "medium" || normalized === "high") { + return normalized; + } + return "medium"; +} + +function collectPatterns(node: unknown): SecretRulePattern[] { + if (!node || typeof node !== "object") { + return []; + } + + const entry = node as Record; + const patterns: SecretRulePattern[] = []; + + if (typeof entry["pattern-regex"] === "string") { + patterns.push({ type: "regex", value: entry["pattern-regex"] }); + } + + if (typeof entry.pattern === "string") { + patterns.push({ type: "literal", value: entry.pattern }); + } + + if (Array.isArray(entry.patterns)) { + for (const child of entry.patterns) { + patterns.push(...collectPatterns(child)); + } + } + + if (Array.isArray(entry["pattern-either"])) { + for (const child of entry["pattern-either"]) { + patterns.push(...collectPatterns(child)); + } + } + + return patterns; +} + +function parseRuleObject( + rawRule: unknown, + sourceName: string, + source: SecretRule["source"] +): SecretRule | null { + if (!rawRule || typeof rawRule !== "object") { + return null; + } + + const rule = rawRule as Record; + const id = typeof rule.id === "string" ? rule.id.trim() : ""; + const message = typeof rule.message === "string" ? rule.message.trim() : ""; + if (!id || !message) { + return null; + } + + const metadata = (rule.metadata ?? {}) as Record; + const patterns = collectPatterns(rule); + if (patterns.length === 0) { + return null; + } + + for (const pattern of patterns) { + if (pattern.type === "regex") { + try { + new RegExp(pattern.value); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Invalid regex in secret rule ${id} from ${sourceName}: ${message}`); + } + } + } + + return { + id, + description: typeof rule.description === "string" ? rule.description : id, + message, + severity: normalizeSeverity(rule.severity), + confidence: normalizeConfidence(metadata.confidence), + remediation: + typeof metadata.remediation === "string" + ? metadata.remediation + : typeof metadata["remediation-guidance"] === "string" + ? metadata["remediation-guidance"] + : undefined, + patterns, + source, + sourceName + }; +} + +export function parseRuleBundle( + yamlContent: string, + sourceName: string, + source: SecretRule["source"] +): SecretRule[] { + const rules: SecretRule[] = []; + + for (const doc of parseAllDocuments(yamlContent)) { + const value = doc.toJSON() as Record | null; + if (!value || !Array.isArray(value.rules)) { + continue; + } + + for (const rawRule of value.rules) { + const parsed = parseRuleObject(rawRule, sourceName, source); + if (parsed) { + rules.push(parsed); + } + } + } + + return rules; +} diff --git a/tests/packageMetadata.test.ts b/tests/packageMetadata.test.ts index 098357d..b9f0127 100644 --- a/tests/packageMetadata.test.ts +++ b/tests/packageMetadata.test.ts @@ -14,7 +14,6 @@ const packageJson = JSON.parse( test("package.json publishes codefence with codefence binary", () => { assert.equal(packageJson.name, "codefence"); assert.deepEqual(packageJson.bin, { codefence: "dist/src/cli.js" }); - assert.equal("fgr" in (packageJson.bin ?? {}), false); assert.match(packageJson.scripts?.codefence ?? "", /scan --staged/); assert.equal(packageJson.scripts?.guardrails, undefined); }); diff --git a/tests/secretEngine.builtinRules.test.ts b/tests/secretEngine.builtinRules.test.ts new file mode 100644 index 0000000..9886a69 --- /dev/null +++ b/tests/secretEngine.builtinRules.test.ts @@ -0,0 +1,53 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import test from "node:test"; +import { + BUILTIN_SECRET_RULES_BUNDLE, + loadBuiltinSecretRules, + resolveBuiltinRulesBundlePath +} from "../src/scan/secret/builtinRules"; +import { loadSecretRules } from "../src/scan/secret/ruleLoader"; +import { BUILTIN_SECRET_RULES_VERSION } from "../src/scan/secret/types"; + +const EXPECTED_BUILTIN_RULE_IDS = [ + "secret-github-token", + "secret-gitlab-token", + "secret-stripe-key", + "secret-bearer-token", + "secret-private-key", + "secret-password-assignment", + "secret-uri-credentials", + "no-hardcoded-secret" +]; + +test("built-in rules load from bundled Semgrep-style YAML", () => { + const bundlePath = resolveBuiltinRulesBundlePath(); + assert.ok(bundlePath.endsWith("builtin.yml")); + assert.ok(fs.existsSync(bundlePath)); + + const rules = loadBuiltinSecretRules(); + assert.equal(rules.length, EXPECTED_BUILTIN_RULE_IDS.length); + assert.deepEqual( + rules.map((rule) => rule.id).sort(), + [...EXPECTED_BUILTIN_RULE_IDS].sort() + ); + assert.ok(rules.every((rule) => rule.source === "builtin")); + assert.ok(rules.every((rule) => rule.sourceName === `builtin@${BUILTIN_SECRET_RULES_VERSION}`)); + assert.ok(rules.every((rule) => rule.patterns.length > 0)); +}); + +test("loadSecretRules uses YAML built-ins when default rules are enabled", async () => { + const rules = await loadSecretRules(process.cwd(), { + rulePaths: [], + defaultRules: true, + defaultRulesVersion: null, + rulesUpdateUrl: null, + rulesRefresh: false, + rulesCacheTtlMs: 1000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" + }); + + assert.equal(rules.length, EXPECTED_BUILTIN_RULE_IDS.length); +}); diff --git a/tests/secretEngine.remoteExampleBundle.test.ts b/tests/secretEngine.remoteExampleBundle.test.ts new file mode 100644 index 0000000..1ac308a --- /dev/null +++ b/tests/secretEngine.remoteExampleBundle.test.ts @@ -0,0 +1,48 @@ +import assert from "node:assert/strict"; +import fs from "node:fs"; +import http from "node:http"; +import os from "node:os"; +import path from "node:path"; +import test from "node:test"; +import { scanFiles } from "../src/scanner"; + +test("examples remote bundle adds deploy-token and header findings", async () => { + const workspace = fs.mkdtempSync(path.join(os.tmpdir(), "codefence-remote-example-")); + const bundlePath = path.join(process.cwd(), "examples", "rules", "extra-secrets-bundle.yml"); + const fixture = path.join(process.cwd(), "examples", "secrets", "fake-secrets.ts"); + const bundleBody = fs.readFileSync(bundlePath, "utf8"); + + const server = http.createServer((_, res) => { + res.writeHead(200, { "content-type": "application/x-yaml" }); + res.end(bundleBody); + }); + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("failed to start test server"); + } + + const url = `http://127.0.0.1:${address.port}/extra-secrets-bundle.yml`; + const findings = await scanFiles([fixture], { + workspace, + secret: { + rulePaths: [], + defaultRules: true, + defaultRulesVersion: null, + rulesUpdateUrl: url, + rulesRefresh: true, + rulesCacheTtlMs: 60_000, + entropyThreshold: 4.2, + minLength: 12, + minConfidence: "low" + } + }); + + await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve()))); + + assert.ok(findings.some((f) => f.ruleId === "example-ci-deploy-token")); + assert.ok(findings.some((f) => f.ruleId === "example-internal-api-header")); + + fs.rmSync(workspace, { recursive: true, force: true }); +}); diff --git a/tests/secretsExamples.test.ts b/tests/secretsExamples.test.ts index e7e007a..1f1b944 100644 --- a/tests/secretsExamples.test.ts +++ b/tests/secretsExamples.test.ts @@ -54,10 +54,13 @@ test("examples secret fixtures produce secret findings", async () => { const fixtureDir = path.join(workspace, "examples", "secrets"); const fixtureFiles = [ path.join(fixtureDir, "fake-secrets.ts"), - path.join(fixtureDir, "fake-uri-credentials.txt") + path.join(fixtureDir, "fake-uri-credentials.conf"), + path.join(fixtureDir, "fake-private-key-block.conf") ]; const findings = await scanFiles(fixtureFiles, { workspace }); assert.ok(findings.length > 0); - assert.ok(findings.some((f) => f.ruleId.startsWith("secret-") || f.ruleId === "no-hardcoded-secret")); + assert.ok(findings.some((f) => f.ruleId === "secret-uri-credentials")); + assert.ok(findings.some((f) => f.ruleId === "secret-private-key")); + assert.ok(findings.some((f) => f.ruleId === "no-hardcoded-secret" || f.ruleId === "secret-bearer-token")); }); diff --git a/tests/templates.test.ts b/tests/templates.test.ts index f0d08c1..b25db87 100644 --- a/tests/templates.test.ts +++ b/tests/templates.test.ts @@ -6,14 +6,6 @@ import test from "node:test"; const repoRoot = path.resolve(__dirname, "..", ".."); const templatesDir = path.join(repoRoot, "templates", "ai"); -const LEGACY_PATTERNS = [ - /\bfgr\b/, - /\.fgr\//, - /npm run codefence/, -]; - -const REQUIRED_PATTERNS = [/codefence scan/, /npm i codefence|`codefence`/]; - test("codefence-guardrails.mdc matches installed Cursor rule template", () => { const template = fs.readFileSync(path.join(templatesDir, "codefence-guardrails.mdc"), "utf8"); const installed = fs.readFileSync(