Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions pkg/common/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,21 @@ import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
)

// IgnoreFileName is the filename trufflehog auto-discovers at scan roots, in
// the spirit of .gitignore / .gitleaksignore. Patterns inside use the same
// gitignore-style globs (one per line, '#' for comments) and are appended to
// the exclude rules used by the scan filter.
//
// See https://github.com/trufflesecurity/trufflehog/issues/2687.
const IgnoreFileName = ".trufflehogignore"

type Filter struct {
include *FilterRuleSet
exclude *FilterRuleSet
Expand Down Expand Up @@ -121,3 +131,184 @@ func (rules *FilterRuleSet) Matches(object string) bool {
func (filter *Filter) ShouldExclude(path string) bool {
return filter.exclude.Matches(path)
}

// AddTrufflehogIgnoreFiles loads .trufflehogignore files from each of the
// supplied scan roots (when present) and appends their patterns to the
// filter's exclude rules. An empty or missing ignore file is a no-op. Patterns
// use gitignore-style globs (one per line, '#' for comments) and are
// converted to anchored regexes before being added; this gives users the
// .gitleaksignore-style UX they want without inventing a new fingerprint
// scheme. Returns the slice of paths actually loaded so callers can log them.
//
// See https://github.com/trufflesecurity/trufflehog/issues/2687.
func (filter *Filter) AddTrufflehogIgnoreFiles(scanRoots ...string) ([]string, error) {
if filter == nil {
return nil, nil
}
if filter.exclude == nil {
empty := FilterRuleSet{}
filter.exclude = &empty
}

var loaded []string
seen := make(map[string]struct{})
for _, root := range scanRoots {
if root == "" {
continue
}
path := filepath.Join(root, IgnoreFileName)
if _, ok := seen[path]; ok {
continue
}
seen[path] = struct{}{}
extra, err := filterRulesFromGlobFile(path)
if err != nil {
return loaded, err
}
if extra == nil {
continue
}
*filter.exclude = append(*filter.exclude, *extra...)
loaded = append(loaded, path)
}
return loaded, nil
}

// filterRulesFromGlobFile reads a gitignore-style file and converts each entry
// to an anchored regex. Returns nil when the file does not exist (so callers
// can treat ignore-file discovery as best-effort). When the supplied scan
// root is itself a regular file (e.g. trufflehog filesystem scan invoked on a
// single file), the join produces a path whose parent isn't a directory; we
// surface that as "not present" rather than an error so the auto-discovery
// stays out of the way.
func filterRulesFromGlobFile(path string) (*FilterRuleSet, error) {
if path == "" {
return nil, nil
}
file, err := os.Open(path)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
// "<path>/.trufflehogignore: not a directory" when the scan root is a
// regular file rather than a directory. Treat as no ignore file.
if strings.Contains(err.Error(), "not a directory") {
return nil, nil
}
return nil, fmt.Errorf("unable to open %s: %w", path, err)
}
defer file.Close()

rules := FilterRuleSet{}
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" || strings.HasPrefix(line, "#") {
continue
}
// "!"-prefixed re-include patterns are not supported in this initial
// implementation; surface a clear error so users don't silently get
// wrong behavior. Plain patterns work as expected.
if strings.HasPrefix(line, "!") {
return nil, fmt.Errorf(
"%s: re-include patterns (lines starting with '!') are not yet supported (offending line: %q)",
path, line,
)
}
pattern, err := globToRegex(line)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
rules = append(rules, *pattern)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
return &rules, nil
}

// globToRegex converts a gitignore-style glob pattern into a compiled
// regular expression suitable for the existing FilterRuleSet matching.
//
// Supported syntax:
// - "*" matches any run of characters that are not "/"
// - "**/" matches zero or more path segments (so "a/**/b" matches "a/b" and "a/x/b")
// - "/**" at end matches everything under the prefix
// - "?" matches any single character that is not "/"
// - "/" at start anchors at the scan root
// - "/" at end matches a directory and everything under it
//
// Unsupported (returns an error so the user is not silently fooled):
// - character classes ("[...]")
// - "!"-prefixed re-includes (handled by the caller)
func globToRegex(glob string) (*regexp.Regexp, error) {
if strings.ContainsAny(glob, "[]") {
return nil, fmt.Errorf("character class globs ('[...]') are not yet supported (offending line: %q)", glob)
}

anchored := strings.HasPrefix(glob, "/")
trailingSlash := strings.HasSuffix(glob, "/")
body := strings.TrimPrefix(glob, "/")
body = strings.TrimSuffix(body, "/")

var b strings.Builder
if anchored {
b.WriteString("^")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anchored glob patterns silently fail with non-dot scan roots

High Severity

globToRegex converts leading-/ globs (e.g., /secrets/known.json) into a regex anchored with ^ (^secrets/known\.json(?:$|/)). However, the filesystem source passes full paths (rooted at the scan directory) to ShouldExclude and Pass — e.g., /home/user/project/secrets/known.json or myproject/secrets/known.json. The ^ anchor forces matching at position 0, so the pattern never matches unless the scan root happens to be .. The unit tests pass only because they use bare relative paths like "secrets/known.json" rather than paths prefixed by a scan root.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 43bce16. Configure here.

} else {
// non-anchored entries match anywhere in the path, just like .gitignore.
b.WriteString("(?:^|/)")
}

// Walk the body, collapsing "/**/" runs into an "any-number-of-dirs"
// regex fragment. This is the standard gitignore semantic where
// "a/**/b" matches "a/b" as well as "a/x/b" or "a/x/y/b".
i := 0
for i < len(body) {
// "/**/" → zero or more path segments + "/"
if i+3 < len(body) && body[i] == '/' && body[i+1] == '*' && body[i+2] == '*' && body[i+3] == '/' {
b.WriteString("(?:/|/.*/)")
i += 4
continue
}
// "**/" at start of body → zero or more path segments
if i == 0 && i+2 < len(body) && body[i] == '*' && body[i+1] == '*' && body[i+2] == '/' {
b.WriteString("(?:|.*/)")
i += 3
continue
}
// trailing "/**" → everything under
// i indexes the leading '/', so i+2 must be the last valid byte ('*'),
// which means i+3 == len(body). Using i+2 == len(body) here would index
// past the end of the slice when the condition fired.
if i+3 == len(body) && body[i] == '/' && body[i+1] == '*' && body[i+2] == '*' {
b.WriteString("/.*")
i += 3
continue
}
c := body[i]
switch c {
case '*':
if i+1 < len(body) && body[i+1] == '*' {
b.WriteString(".*")
i += 2
continue
}
b.WriteString("[^/]*")
case '?':
b.WriteString("[^/]")
case '.', '+', '(', ')', '|', '{', '}', '$', '^', '\\':
b.WriteString(regexp.QuoteMeta(string(c)))
default:
b.WriteByte(c)
}
i++
}

if trailingSlash {
b.WriteString("(?:/|$)")
} else {
b.WriteString("(?:$|/)")
}

return regexp.Compile(b.String())
}
126 changes: 126 additions & 0 deletions pkg/common/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package common

import (
"os"
"path/filepath"
"regexp"
"strings"
"testing"
)

Expand Down Expand Up @@ -176,3 +178,127 @@ func testFilterWriteFile(filename string, content []byte) error {
}
return f.Close()
}

// TestAddTrufflehogIgnoreFiles guards the .trufflehogignore auto-discovery
// added in https://github.com/trufflesecurity/trufflehog/issues/2687.
func TestAddTrufflehogIgnoreFiles(t *testing.T) {
root := t.TempDir()
ignorePath := filepath.Join(root, IgnoreFileName)
contents := strings.Join([]string{
"# .gitignore-style ignore file",
"",
"vendor/",
"*.lock",
"/secrets/known.json",
"src/**/*.test.go",
}, "\n")
if err := os.WriteFile(ignorePath, []byte(contents), 0o644); err != nil {
t.Fatalf("write ignore file: %v", err)
}

filter := FilterEmpty()
loaded, err := filter.AddTrufflehogIgnoreFiles(root)
if err != nil {
t.Fatalf("AddTrufflehogIgnoreFiles: %v", err)
}
if len(loaded) != 1 || loaded[0] != ignorePath {
t.Fatalf("expected loaded=[%q], got %v", ignorePath, loaded)
}

cases := []struct {
path string
excluded bool
}{
{"vendor/foo.go", true},
{"src/vendor/foo.go", true},
{"main.go", false},
{"yarn.lock", true},
{"deeply/nested/yarn.lock", true},
{"secrets/known.json", true},
{"other/secrets/known.json", false},
{"src/foo/bar/baz.test.go", true},
{"src/baz.test.go", true},
}
for _, tc := range cases {
if got := filter.ShouldExclude(tc.path); got != tc.excluded {
t.Errorf("path=%q: expected excluded=%v, got %v", tc.path, tc.excluded, got)
}
}
}

// TestAddTrufflehogIgnoreFiles_NoFile is a no-op when the ignore file is missing.
func TestAddTrufflehogIgnoreFiles_NoFile(t *testing.T) {
filter := FilterEmpty()
loaded, err := filter.AddTrufflehogIgnoreFiles(t.TempDir())
if err != nil {
t.Fatalf("expected no error for missing ignore file, got %v", err)
}
if len(loaded) != 0 {
t.Errorf("expected no files loaded, got %v", loaded)
}
}

// TestAddTrufflehogIgnoreFiles_DedupeRoots ensures the same root is processed once.
func TestAddTrufflehogIgnoreFiles_DedupeRoots(t *testing.T) {
root := t.TempDir()
ignorePath := filepath.Join(root, IgnoreFileName)
if err := os.WriteFile(ignorePath, []byte("vendor/\n"), 0o644); err != nil {
t.Fatalf("write ignore: %v", err)
}
filter := FilterEmpty()
loaded, err := filter.AddTrufflehogIgnoreFiles(root, root, root)
if err != nil {
t.Fatalf("AddTrufflehogIgnoreFiles: %v", err)
}
if len(loaded) != 1 {
t.Errorf("expected 1 loaded file (deduped), got %d", len(loaded))
}
}

// TestAddTrufflehogIgnoreFiles_RejectsNegation surfaces a clear error for unsupported "!".
func TestAddTrufflehogIgnoreFiles_RejectsNegation(t *testing.T) {
root := t.TempDir()
if err := os.WriteFile(filepath.Join(root, IgnoreFileName), []byte("!keep_me.go\n"), 0o644); err != nil {
t.Fatalf("write ignore: %v", err)
}
filter := FilterEmpty()
_, err := filter.AddTrufflehogIgnoreFiles(root)
if err == nil || !strings.Contains(err.Error(), "re-include") {
t.Errorf("expected re-include error, got %v", err)
}
}

// TestGlobToRegex pins the supported gitignore-style syntax.
func TestGlobToRegex(t *testing.T) {
cases := []struct {
glob string
match []string
miss []string
}{
{"*.lock", []string{"yarn.lock", "deep/yarn.lock"}, []string{"yarnlock"}},
{"vendor/", []string{"vendor/foo.go", "src/vendor/foo.go"}, []string{"vendorish.go"}},
{"/secrets/key.txt", []string{"secrets/key.txt"}, []string{"src/secrets/key.txt"}},
{"src/**/*.go", []string{"src/main.go", "src/a/b/c.go"}, []string{"main.go"}},
{"foo?bar", []string{"foo1bar", "fooXbar"}, []string{"foo/bar", "foobar"}},
{"build/**", []string{"build/foo", "build/foo/bar/baz", "build/x.txt"}, []string{"build", "src/build.go"}},
{"**/test", []string{"test", "src/test", "a/b/c/test"}, []string{"testless", "test.go"}},
}
for _, tc := range cases {
t.Run(tc.glob, func(t *testing.T) {
rx, err := globToRegex(tc.glob)
if err != nil {
t.Fatalf("globToRegex(%q): %v", tc.glob, err)
}
for _, m := range tc.match {
if !rx.MatchString(m) {
t.Errorf("glob %q expected match for %q (regex=%s)", tc.glob, m, rx.String())
}
}
for _, m := range tc.miss {
if rx.MatchString(m) {
t.Errorf("glob %q expected NO match for %q (regex=%s)", tc.glob, m, rx.String())
}
}
})
}
}
10 changes: 10 additions & 0 deletions pkg/sources/filesystem/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ func (s *Source) Init(aCtx trContext.Context, name string, jobId sources.JobID,
if err != nil {
return fmt.Errorf("unable to create filter: %w", err)
}
// Auto-discover .trufflehogignore in each scan root and merge its
// patterns into the filter's exclude set. See
// https://github.com/trufflesecurity/trufflehog/issues/2687.
if loaded, err := filter.AddTrufflehogIgnoreFiles(s.paths...); err != nil {
return fmt.Errorf("unable to load .trufflehogignore: %w", err)
} else {
for _, path := range loaded {
s.log.V(2).Info("loaded ignore file", "file", path)
}
}
s.filter = filter
err = s.setMaxSymlinkDepth(&conn)
if err != nil {
Expand Down