From ff241efb3aa773cdca9239e6b10f0fbf3e621ce0 Mon Sep 17 00:00:00 2001 From: Sascha Egerer Date: Tue, 16 Jun 2026 11:42:56 +0200 Subject: [PATCH] Update docs for 0.6: with()/ConfigLayer, companion OWASP, preset pages, trusted-bot wiring --- docs/.vitepress/config.ts | 3 + docs/advanced/config-composition.md | 28 +++--- docs/advanced/portable-config.md | 12 +-- docs/advanced/presets.md | 14 +-- docs/common-attacks.md | 27 ++++-- docs/examples.md | 81 ++++++++++++------ docs/faq.md | 8 +- docs/features/bad-ip-preset.md | 49 +++++++++++ docs/features/bot-detection.md | 50 +++++++---- docs/features/bot-presets.md | 48 +++++++++++ docs/features/owasp-crs.md | 119 ++++++++++++++++++-------- docs/features/safelists-blocklists.md | 45 +++++++--- docs/features/trusted-bots.md | 108 +++++++++++++++++++++++ docs/getting-started.md | 18 ++-- 14 files changed, 474 insertions(+), 136 deletions(-) create mode 100644 docs/features/bad-ip-preset.md create mode 100644 docs/features/bot-presets.md create mode 100644 docs/features/trusted-bots.md diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index 3d023b6..5e71836 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -40,7 +40,10 @@ export default defineConfig({ { text: 'Rate Limiting', link: '/features/rate-limiting' }, { text: 'Fail2Ban & Allow2Ban', link: '/features/fail2ban' }, { text: 'Bot Detection & Matchers', link: '/features/bot-detection' }, + { text: 'Trusted Bots', link: '/features/trusted-bots' }, { text: 'OWASP Core Rule Set', link: '/features/owasp-crs' }, + { text: 'Bot & AI Crawler Presets', link: '/features/bot-presets' }, + { text: 'Bad-IP Blocklist Preset', link: '/features/bad-ip-preset' }, { text: 'Storage Backends', link: '/features/storage' }, ] }, diff --git a/docs/advanced/config-composition.md b/docs/advanced/config-composition.md index 2421f3d..6b1c926 100644 --- a/docs/advanced/config-composition.md +++ b/docs/advanced/config-composition.md @@ -4,7 +4,7 @@ outline: deep # Config Composition -Real deployments rarely have a single source of firewall rules. A vendor ships a baseline, an environment (staging vs. production) adds its own rules, a tenant overrides a few, and a single deployment applies a last-minute tweak. `Config::compose()` (and the fluent `Config::mergedWith()`) merges these layers into one effective `Config` (**without mutating any input**) so each layer can be owned, versioned, and shipped independently, often as a [`PortableConfig`](/advanced/portable-config). +Real deployments rarely have a single source of firewall rules. A vendor ships a baseline, an environment (staging vs. production) adds its own rules, a tenant overrides a few, and a single deployment applies a last-minute tweak. `Config::with()` applies these layers into one effective `Config` (**without mutating any input**) so each layer can be owned, versioned, and shipped independently. A layer is any `ConfigLayer` - a live `Config` or a [`PortableConfig`](/advanced/portable-config). ## Usage @@ -12,26 +12,20 @@ Real deployments rarely have a single source of firewall rules. A vendor ships a use Flowd\Phirewall\Config; // Each layer is owned and versioned independently, usually as a PortableConfig. -// Materialize them onto your cache with Config::combine(); later layers win. +// Apply them onto your cache with Config::with(); later layers win. // The cache lives only on Config; the portable layers never carry one. -$effective = (new Config($cache))->combine( +$effective = (new Config($cache))->with( $vendorPortable, // shared product defaults $environmentPortable, // staging vs. production $tenantPortable, // per-customer policy ); -// Already holding Config instances? compose() / mergedWith() layer those directly +// A Config is itself a ConfigLayer, so configs apply directly through the same call // (same precedence; later layers win): -$effective = $vendorConfig->mergedWith($environmentConfig, $tenantConfig); -$effective = Config::compose($vendorConfig, $environmentConfig, $tenantConfig); +$effective = $base->with($vendorConfig, $environmentConfig, $tenantConfig); ``` -`compose()` is static and reads as "base first, overlays after"; `mergedWith()` is the instance form for when you already hold the base. Both return a fresh `Config`; the base and every overlay are left untouched. - -| Form | Signature | Reads as | -|------|-----------|----------| -| `Config::compose(...$configs)` | static, variadic | base first, overlays after | -| `$base->mergedWith(...$overlays)` | instance, variadic | overlays applied onto `$base` | +`with()` is the one instance method for composition: it takes variadic `ConfigLayer`s and returns a fresh `Config`; the base and every overlay are left untouched. ## Merge semantics @@ -53,13 +47,13 @@ A `Config` does not track which options were *set* versus *left at their default Because "default-valued" is read as "no opinion", an overlay **cannot turn a toggle back off** once an earlier layer turned it on. If the vendor baseline calls `enableResponseHeaders()` (changing the toggle from its `false` default to `true`), a tenant overlay that leaves the toggle at `false` will *not* switch it back off; its `false` is indistinguishable from "unspecified", so the baseline's explicit `true` wins. The same applies to `failOpen` and the other boolean toggles. (`enabled` is the deliberate exception: as its row above notes, it uses last-layer-wins, so a later layer *can* re-assert it.) -If you need a later layer to *force* a non-default option back to the default, do not rely on composition: build the final `Config` and set the option explicitly after composing, e.g. `Config::compose(...)->setFailOpen(true)`. +If you need a later layer to *force* a non-default option back to the default, do not rely on composition: build the final `Config` and set the option explicitly after composing, e.g. `(new Config($cache))->with(...)->setFailOpen(true)`. -### Limitation: composing the IP resolver does not rewrite IP-aware matchers +### IP resolver: autowired matchers compose, an explicit resolver is fixed -IP-aware matchers (`IpMatcher`, the file/snapshot IP blocklists, `TrustedBotMatcher`) capture their IP resolver **when the rule is constructed**. Because composition copies already-built rule objects, composing a layer with a different IP resolver only affects rules added *after* it; it does **not** retroactively change how earlier layers' IP rules resolve the client IP. Set the resolver on each source `Config` (via `setIpResolver()`) **before** adding its IP rules, rather than expecting a later layer to override it. +IP-aware matchers (`IpMatcher`, the file/snapshot IP blocklists, `TrustedBotMatcher`) **autowire** the client-IP resolver. Constructed without an explicit resolver, they resolve the client IP through the `Config` they run under, at request time. So a composed `Config` applies its own merged IP resolver to these matchers no matter which layer defined them, the same way keyless counter rules (throttle, fail2ban, allow2ban, track) resolve their default IP key against the `Config` they run under. -This limitation does **not** apply to counter rules (throttle, fail2ban, allow2ban, track) added **without** an explicit `key`. Their default IP key is resolved per request against the `Config` they run under, so a composed `Config` correctly applies its own merged IP resolver to such rules no matter which layer defined them. +The exception is a matcher **given an explicit resolver in its constructor**: it keeps that resolver and ignores the composed `Config`'s. Composition copies already-built rule objects, so it cannot rewrite a resolver baked into a matcher. If you want every layer's IP rules to follow one resolver, leave the matchers' resolver unset and set it once on the final `Config` with `setIpResolver()`; reserve an explicit per-matcher resolver for the rare rule that must resolve the client IP differently from the rest. ## Example @@ -67,7 +61,7 @@ This limitation does **not** apply to counter rules (throttle, fail2ban, allow2b use Flowd\Phirewall\Config; use Flowd\Phirewall\Http\Firewall; -$effective = $vendorBaseline->mergedWith($environmentOverlay, $tenantOverlay, $deploymentTweak); +$effective = $vendorBaseline->with($environmentOverlay, $tenantOverlay, $deploymentTweak); // Rules unioned by name, base ordering preserved: $effective->blocklists->rules(); // ['scanners' (tenant wins), 'bad-net', 'admin-probe', ...] diff --git a/docs/advanced/portable-config.md b/docs/advanced/portable-config.md index c665992..7b0861a 100644 --- a/docs/advanced/portable-config.md +++ b/docs/advanced/portable-config.md @@ -11,11 +11,11 @@ outline: deep - **diff and review it in git**, or - **share one ruleset across many apps, processes, or languages** -…and then materialize a live [`Config`](/getting-started) from it with [`Config::combine()`](/advanced/config-composition); the schema is pure data and never carries a cache. Closures are never serialized, so the surface is intentionally a safe, declarative subset (see [Not portable by design](#not-portable-by-design)). +…and then apply it onto a live [`Config`](/getting-started) with [`Config::with()`](/advanced/config-composition); the schema is pure data and never carries a cache. A `PortableConfig` is a [`ConfigLayer`](/advanced/config-composition), so it composes through the same `with()` call as any other layer. Closures are never serialized, so the surface is intentionally a safe, declarative subset (see [Not portable by design](#not-portable-by-design)). ## Building and round-tripping -Build a ruleset fluently, export it with `toArray()` (or `json_encode()` the result), and rebuild it with `fromArray()`, then materialize it onto a `Config` with `Config::combine()`: +Build a ruleset fluently, export it with `toArray()` (or `json_encode()` the result), and rebuild it with `fromArray()`, then apply it onto a `Config` with `Config::with()`: ```php use Flowd\Phirewall\Config; @@ -43,7 +43,7 @@ $portable = PortableConfig::create() $json = json_encode($portable->toArray(), JSON_THROW_ON_ERROR); // … and rebuild a live Config somewhere else. -$config = (new Config($cache))->combine(PortableConfig::fromArray(json_decode($json, true, 512, JSON_THROW_ON_ERROR))); +$config = (new Config($cache))->with(PortableConfig::fromArray(json_decode($json, true, 512, JSON_THROW_ON_ERROR))); $firewall = new Firewall($config); ``` @@ -142,7 +142,7 @@ use Flowd\Phirewall\Portable\PortableConfig; // $store->load() returns the signed blob from your DB / cache / config service. $portable = PortableConfig::loadSigned($store->load(), $secret); -$firewall = new Firewall((new Config($cache))->combine($portable)); +$firewall = new Firewall((new Config($cache))->with($portable)); ``` Under classic PHP-FPM userland state does not carry over between requests, so this runs once per request and always reflects the current rules. To avoid querying the database on every request, put a shared cache (APCu, for example) in front of the store. @@ -156,7 +156,7 @@ Under a long-running worker runtime (Swoole, RoadRunner, FrankenPHP worker mode, $row = $store->load(); if ($loadedVersion !== $row['version']) { $portable = PortableConfig::loadSigned($row['blob'], $secret); - $firewall = new Firewall((new Config($cache))->combine($portable)); + $firewall = new Firewall((new Config($cache))->with($portable)); $loadedVersion = $row['version']; } ``` @@ -191,7 +191,7 @@ A few capabilities cannot be represented as pure data and are intentionally **ex | Excluded | Why | |----------|-----| | Trusted-bot reverse-DNS safelisting (`TrustedBotMatcher`) | needs live DNS resolution and an optional cache at request time | -| OWASP Core Rule Set (`blocklists->owasp()`) | a ruleset is parsed `SecRule` objects / rule files, not a small data blob | +| OWASP Core Rule Set (`CoreRuleSetMatcher`, in `flowd/phirewall-preset-owasp-crs`) | a ruleset is parsed `SecRule` objects / rule files, not a small data blob | | File-backed lists (`fileIp`, `filePatternBackend`) | filesystem paths are environment-specific; the in-memory pattern backend is the portable equivalent | | Closure-driven dynamic throttle limits/periods, `$config->throttles->multi()` | limits/periods can be arbitrary PHP closures and cannot be serialized (express the multi-window case as several `throttles->add()` entries; `sliding` is supported) | | Response factories, `ipResolver`, `discriminatorNormalizer` | these are closures / objects, not declarative data | diff --git a/docs/advanced/presets.md b/docs/advanced/presets.md index 9a590e5..f038ec1 100644 --- a/docs/advanced/presets.md +++ b/docs/advanced/presets.md @@ -4,9 +4,9 @@ outline: deep # Presets -Presets are ready-to-use rule bundles for recurring scenarios, so you don't have to hand-write the same rules each time. Each preset is a [`PortableConfig`](/advanced/portable-config) returned by an accessor (e.g. `Presets::scannerBlocking()`): plain, inspectable, serializable data you can diff, sign, or layer. +Presets are ready-to-use rule bundles for recurring scenarios, so you don't have to hand-write the same rules each time. Each preset is a [`PortableConfig`](/advanced/portable-config) returned by an accessor (e.g. `Presets::scannerBlocking()`): plain, inspectable, serializable data you can diff, sign, or layer. Every preset is a `ConfigLayer`, so it composes through the same `Config::with()` call as any other layer. -Materialize one or several onto your own cache with [`Config::combine()`](/advanced/config-composition); presets are pure data and never receive a cache. Every rule is namespaced `preset..*`, so a later layer that redefines it by name overrides predictably. +Apply one or several onto your own cache with [`Config::with()`](/advanced/config-composition); presets are pure data and never receive a cache. Every rule is namespaced `preset..*`, so a later layer that redefines it by name overrides predictably. ## Usage @@ -14,21 +14,21 @@ Materialize one or several onto your own cache with [`Config::combine()`](/advan use Flowd\Phirewall\Config; use Flowd\Phirewall\Preset\Presets; -// A preset on its own; combine it onto a Config you build with your cache: -$config = (new Config($cache))->combine(Presets::scannerBlocking()); +// A preset on its own; apply it onto a Config you build with your cache: +$config = (new Config($cache))->with(Presets::scannerBlocking()); // Inspect / serialize the underlying portable schema: $schema = Presets::scannerBlocking()->toArray(); // Stack several presets, then your own rules last (later layers win by name): -$config = (new Config($cache))->combine( +$config = (new Config($cache))->with( Presets::scannerBlocking(), Presets::sensitivePathBlocking(), $myPortable, ); ``` -Preset rules emit the same [observability events](/advanced/observability) as hand-written ones; wire your PSR-14 dispatcher into the `Config` you combine onto (`new Config($cache, $dispatcher)`). +Preset rules emit the same [observability events](/advanced/observability) as hand-written ones; wire your PSR-14 dispatcher into the `Config` you apply onto (`new Config($cache, $dispatcher)`). ## Shipped presets @@ -42,7 +42,7 @@ Resolve any preset by name with `Presets::get($name)` (a `PortableConfig`), pass ## Conventions and overrides - The shipped presets target signals that are universal across applications (scanner User-Agents, missing browser headers, well-known sensitive paths), so they assume nothing about your routing. A preset you build yourself is just a `PortableConfig`, so it can key on whatever fits your environment, including routes your own apps standardize. -- Override any rule by combining the preset with your own portable rules that redefine the rule by the same name (later layer wins), or by rebuilding the preset's schema. +- Override any rule by applying the preset with your own portable rules that redefine the rule by the same name (later layer wins), or by rebuilding the preset's schema. > **Note:** `scannerBlocking()`'s `suspicious-headers` rule is the more aggressive of the two: some legitimate API clients, privacy tools, and embedded browsers also omit `Accept-*` headers. Drop or override it by name if your traffic includes non-browser clients. diff --git a/docs/common-attacks.md b/docs/common-attacks.md index 4f1b775..c411ba6 100644 --- a/docs/common-attacks.md +++ b/docs/common-attacks.md @@ -83,8 +83,15 @@ $config->throttles->add('account-throttle', Block common SQL injection patterns using OWASP CRS rules. +::: tip OWASP CRS is a separate package +The SecRule engine and CRS presets ship in the companion package. Install it first: +`composer require flowd/phirewall-preset-owasp-crs`. See [OWASP CRS](/features/owasp-crs) for details. +::: + ```php -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; $rules = SecRuleLoader::fromString(<<<'CRS' # UNION SELECT attacks @@ -120,7 +127,7 @@ SecRule ARGS "@rx (?i)information_schema" \ "id:942170,phase:2,deny,msg:'SQL Injection: DB enumeration'" CRS); -$config->blocklists->owasp('sqli', $rules); +$config->blocklists->addRule(new BlocklistRule('sqli', new CoreRuleSetMatcher($rules))); ``` ::: tip @@ -158,7 +165,7 @@ SecRule ARGS "@rx (?i)<(object|embed|applet)[^>]*>" \ "id:941150,phase:2,deny,msg:'XSS: Object/embed tag'" CRS); -$config->blocklists->owasp('xss', $rules); +$config->blocklists->addRule(new BlocklistRule('xss', new CoreRuleSetMatcher($rules))); ``` ## Remote Code Execution (RCE) @@ -180,7 +187,7 @@ SecRule ARGS "@rx `[^`]+`" \ "id:933120,phase:2,deny,msg:'RCE: Backtick execution'" CRS); -$config->blocklists->owasp('rce', $rules); +$config->blocklists->addRule(new BlocklistRule('rce', new CoreRuleSetMatcher($rules))); ``` ## Path Traversal @@ -202,7 +209,7 @@ SecRule ARGS "@rx \.\.[\\/]" \ "id:930120,phase:2,deny,msg:'Path Traversal in parameter'" CRS); -$config->blocklists->owasp('path-traversal', $rules); +$config->blocklists->addRule(new BlocklistRule('path-traversal', new CoreRuleSetMatcher($rules))); ``` Or use a simple blocklist closure: @@ -407,10 +414,14 @@ Combine all layers into a production-ready configuration: ```php use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\RedisCache; use Psr\Http\Message\ServerRequestInterface; use Nyholm\Psr7\Factory\Psr17Factory; @@ -427,7 +438,7 @@ $config->setIpResolver(KeyExtractors::clientIp($proxy)); $config->safelists->add('health', fn($req): bool => $req->getUri()->getPath() === '/health' ); -$config->safelists->trustedBots(cache: new RedisCache($redis)); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: new RedisCache($redis)))); $config->safelists->ip('office', ['203.0.113.0/24']); // ── Layer 2: Blocklists ──────────────────────────────────────────────── @@ -450,7 +461,7 @@ SecRule ARGS "@rx (?i)\bon(load|error|click)\s*=" "id:941110,phase:2,deny,msg:'X SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" SecRule REQUEST_URI "@rx \.\.[\\/]" "id:930100,phase:2,deny,msg:'Path Traversal'" CRS); -$config->blocklists->owasp('owasp', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); // ── Layer 4: Fail2Ban ───────────────────────────────────────────────── // The filter never matches pre-handler; your login handler signals each diff --git a/docs/examples.md b/docs/examples.md index 956375c..665f67c 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -57,6 +57,11 @@ php examples/01-basic-setup.php Production-ready integration examples for popular PHP frameworks. Each example includes storage, safelists, blocklists, rate limiting, brute-force protection, OWASP rules, and observability. Copy, paste, adapt. +::: tip OWASP CRS is a separate package +The OWASP rules in these examples use the companion package. Install it first: +`composer require flowd/phirewall-preset-owasp-crs`. See [OWASP CRS](/features/owasp-crs) for the full preset and engine API. +::: + ### PSR-15 (Generic / Plain PHP) Works with any PSR-15 compatible stack (custom dispatchers, runtimes, etc.; Mezzio has its own section below). Requires `nyholm/psr7`. @@ -69,10 +74,14 @@ declare(strict_types=1); require __DIR__ . '/vendor/autoload.php'; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\ApcuCache; use Nyholm\Psr7\Factory\Psr17Factory; use Nyholm\Psr7\Response; @@ -110,7 +119,7 @@ $config->safelists->add('metrics', $req->getUri()->getPath() === '/metrics' ); $config->safelists->ip('office', ['10.0.0.0/8', '192.168.1.0/24']); -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // ── Blocklists ─────────────────────────────────────────────────────── $config->blocklists->knownScanners(); @@ -126,7 +135,7 @@ SecRule ARGS "@rx (?i)\bunion\b.*\bselect\b" "id:942100,phase:2,deny,msg:'SQLi'" SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" CRS); -$config->blocklists->owasp('owasp', $owaspRules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // ── Fail2Ban ───────────────────────────────────────────────────────── $config->fail2ban->add('login-abuse', @@ -203,10 +212,14 @@ declare(strict_types=1); namespace App\Factory; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\ApcuCache; use Nyholm\Psr7\Factory\Psr17Factory; use Psr\Http\Message\ServerRequestInterface; @@ -251,7 +264,7 @@ class PhirewallFactory fn(ServerRequestInterface $req): bool => str_starts_with($req->getUri()->getPath(), '/_profiler') ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // ── Blocklists ─────────────────────────────────────────── $config->blocklists->knownScanners(); @@ -263,7 +276,7 @@ class PhirewallFactory SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" CRS); - $config->blocklists->owasp('owasp', $owaspRules); + $config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // ── Fail2Ban ───────────────────────────────────────────── // No key: these rules default to the client IP from the @@ -428,10 +441,14 @@ declare(strict_types=1); namespace App\Providers; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\ApcuCache; use Illuminate\Support\ServiceProvider; use Nyholm\Psr7\Factory\Psr17Factory; @@ -485,7 +502,7 @@ class PhirewallServiceProvider extends ServiceProvider fn(ServerRequestInterface $req): bool => str_starts_with($req->getUri()->getPath(), '/horizon') ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // ── Blocklists ─────────────────────────────────────── $config->blocklists->knownScanners(); @@ -501,7 +518,7 @@ class PhirewallServiceProvider extends ServiceProvider SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" CRS); - $config->blocklists->owasp('owasp', $owaspRules); + $config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // ── Fail2Ban ───────────────────────────────────────── // No key: these rules default to the client IP from the @@ -661,10 +678,14 @@ declare(strict_types=1); require __DIR__ . '/vendor/autoload.php'; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\ApcuCache; use Psr\Http\Message\ResponseInterface; use Psr\Http\Message\ServerRequestInterface; @@ -690,7 +711,7 @@ $config->safelists->add('health', fn(ServerRequestInterface $req): bool => $req->getUri()->getPath() === '/health' ); -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // ── Blocklists ─────────────────────────────────────────────────────── $config->blocklists->knownScanners(); @@ -702,7 +723,7 @@ SecRule ARGS "@rx (?i)\bunion\b.*\bselect\b" "id:942100,phase:2,deny,msg:'SQLi'" SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" CRS); -$config->blocklists->owasp('owasp', $owaspRules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // ── Fail2Ban ───────────────────────────────────────────────────────── $config->fail2ban->add('login-abuse', @@ -764,10 +785,14 @@ declare(strict_types=1); namespace App\Factory; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\ApcuCache; use Nyholm\Psr7\Factory\Psr17Factory; use Psr\Container\ContainerInterface; @@ -802,7 +827,7 @@ class PhirewallMiddlewareFactory fn(ServerRequestInterface $req): bool => $req->getUri()->getPath() === '/health' ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // ── Blocklists ─────────────────────────────────────────── $config->blocklists->knownScanners(); @@ -814,7 +839,7 @@ class PhirewallMiddlewareFactory SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" CRS); - $config->blocklists->owasp('owasp', $owaspRules); + $config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // ── Fail2Ban ───────────────────────────────────────────── // No key: these rules default to the client IP from the @@ -1237,6 +1262,8 @@ Safelist verified search engine bots using reverse DNS (RDNS) verification. Only ```php use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Store\InMemoryCache; $cache = new InMemoryCache(); @@ -1244,12 +1271,12 @@ $config = new Config($cache); // Safelist known bots (Googlebot, Bingbot, Baidu, etc.) via RDNS // Pass a PSR-16 cache to avoid repeated DNS lookups -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // Safelist a custom internal bot -$config->safelists->trustedBots('custom-bots', [ +$config->safelists->addRule(new SafelistRule('custom-bots', new TrustedBotMatcher([ ['ua' => 'mycompany-crawler', 'hostname' => '.crawler.mycompany.com'], -], cache: $cache); +], ipResolver: $config->getIpResolver(), cache: $cache))); ``` See [Bot Detection](/features/bot-detection) for details. @@ -1413,10 +1440,14 @@ A production-ready configuration combining safelists, blocklists, OWASP rules, b ```php use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\Http\TrustedProxyResolver; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Pattern\PatternEntry; use Flowd\Phirewall\Pattern\PatternKind; use Flowd\Phirewall\Store\RedisCache; @@ -1452,7 +1483,7 @@ $config->safelists->add('health', $config->safelists->add('metrics', fn($req) => $req->getUri()->getPath() === '/metrics' ); -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(ipResolver: $config->getIpResolver(), cache: $cache))); // === BLOCKLISTS === @@ -1486,7 +1517,7 @@ SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" SecRule ARGS "@rx (?i)(eval|exec|system|shell_exec|passthru)\s*\(" "id:933100,phase:2,deny,msg:'RCE'" SecRule REQUEST_URI "@rx \.\.\/" "id:930100,phase:2,deny,msg:'Path Traversal'" CRS); -$config->blocklists->owasp('owasp', $owaspRules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($owaspRules))); // === FAIL2BAN === $config->fail2ban->add('login-abuse', @@ -1572,11 +1603,13 @@ $middleware = new Middleware($config); ## Production: OWASP Protection Suite -SQL injection (SQLi), XSS (Cross-Site Scripting), PHP injection, and path traversal detection: +SQL injection (SQLi), XSS (Cross-Site Scripting), PHP injection, and path traversal detection. Requires the companion package: `composer require flowd/phirewall-preset-owasp-crs`. ```php use Flowd\Phirewall\Config; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\RedisCache; use Predis\Client as PredisClient; @@ -1611,7 +1644,7 @@ SecRule REQUEST_URI "@rx (?i)(%2e%2e%2f|%2e%2e/)" \ "id:930110,phase:2,deny,msg:'Encoded Path Traversal'" CRS); -$config->blocklists->owasp('owasp-suite', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp-suite', new CoreRuleSetMatcher($rules))); // Optionally disable specific rules that cause false positives // $rules->disable(941110); // XSS Event Handler might be too aggressive diff --git a/docs/faq.md b/docs/faq.md index 9dc7fc4..af4d27e 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -294,8 +294,12 @@ For comprehensive OWASP CRS coverage, use a dedicated WAF (like ModSecurity) alo ### How do I load custom OWASP rules? +OWASP CRS support ships in the companion package `flowd/phirewall-preset-owasp-crs` (`composer require flowd/phirewall-preset-owasp-crs`): + ```php -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; // From a string of rules $crs = SecRuleLoader::fromString($rulesText); @@ -306,7 +310,7 @@ $crs = SecRuleLoader::fromFile('/path/to/rules.conf'); // From a directory (all .conf files) $crs = SecRuleLoader::fromDirectory('/path/to/rules/'); -$config->blocklists->owasp('owasp', $crs); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($crs))); ``` ### Can I disable specific OWASP rules? diff --git a/docs/features/bad-ip-preset.md b/docs/features/bad-ip-preset.md new file mode 100644 index 0000000..149e3d0 --- /dev/null +++ b/docs/features/bad-ip-preset.md @@ -0,0 +1,49 @@ +--- +outline: deep +--- + +# Bad-IP Blocklist Preset + +Block requests from known malicious IP addresses. Shipped as the companion package +[`flowd/phirewall-preset-bad-ips`](https://github.com/flowd/phirewall-preset-bad-ips), which +bundles a public-domain snapshot of the [stamparm/ipsum](https://github.com/stamparm/ipsum) +threat feed and exposes it as a `PortableConfig` blocklist. + +## Installation + +```bash +composer require flowd/phirewall-preset-bad-ips +``` + +## Usage + +```php +use Flowd\Phirewall\Config; +use Flowd\PhirewallPresetBadIps\Presets; + +$config = (new Config($cache))->with(Presets::blocklist()); +``` + +| Preset | Effect | +| --- | --- | +| `Presets::blocklist()` | Blocks requests whose client IP is in the bundled snapshot. | +| `Presets::track(period)` | Counts matches without blocking, to measure false positives first. | + +## Updating the list + +The snapshot is stamparm/ipsum `levels/3.txt` (addresses on at least three source blacklists), +public domain under The Unlicense. Refresh it with `bin/badip-import` (or the scheduled +`Bad-IP Update` workflow, which opens a pull request). A higher level means fewer false +positives: + +```bash +bin/badip-import --level=4 +``` + +## Limits + +- **The blocklist keys on `REMOTE_ADDR`.** Behind a proxy or CDN, configure a trusted client-IP + resolver on the `Config`, or it sees the proxy instead of the client. +- **A bundled snapshot goes stale** between refreshes, and a shared host or CGNAT address can be + listed for one offender. Prefer a higher level, try `track()` first, and combine with your own + allowlist by overriding the rule by name. diff --git a/docs/features/bot-detection.md b/docs/features/bot-detection.md index b331683..d74134b 100644 --- a/docs/features/bot-detection.md +++ b/docs/features/bot-detection.md @@ -4,7 +4,7 @@ outline: deep # Bot Detection -Phirewall provides three specialized matchers for bot and scanner detection: **Known Scanner Blocking**, **Suspicious Headers Detection**, and **Trusted Bot Verification**. Each is available as a one-liner convenience method on the blocklist or safelist section. +Phirewall provides three specialized matchers for bot and scanner detection: **Known Scanner Blocking**, **Suspicious Headers Detection**, and **Trusted Bot Verification**. Known scanners and suspicious headers are available as one-liner convenience methods on the blocklist section; trusted bot verification is wired by adding a `SafelistRule` with a `TrustedBotMatcher` to the safelist. ## Known Scanner Blocking @@ -156,31 +156,41 @@ Some legitimate clients may not send all standard headers: API clients, embedded ## Trusted Bot Verification (rDNS) -The `trustedBots()` method safelists verified search engine bots using **reverse DNS (rDNS) verification**. This prevents fake bots: anyone can send `Googlebot` as a User-Agent, but only Google's real crawlers have IPs that resolve to `*.googlebot.com`. +Wiring a `TrustedBotMatcher` on the safelist safelists verified search engine bots using **reverse DNS (rDNS) verification**. This prevents fake bots: anyone can send `Googlebot` as a User-Agent, but only Google's real crawlers have IPs that resolve to `*.googlebot.com`. + +For rate-limiting verified bots instead of fully safelisting them, see the dedicated [Trusted Bots](/features/trusted-bots) page. ### Quick Setup ```php +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; + // Safelist verified search engine bots -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher( + ipResolver: $config->getIpResolver(), + cache: $cache, +))); ``` +Pass `ipResolver: $config->getIpResolver()` so verification uses the correct client IP behind a proxy. Omit it only if you deliberately want to verify against `REMOTE_ADDR`. + ### Configuration +The matcher accepts these constructor arguments: + ```php -$config->safelists->trustedBots( - string $name = 'trusted-bots', +new TrustedBotMatcher( array $additionalBots = [], ?callable $ipResolver = null, ?CacheInterface $cache = null -): SafelistSection +) ``` | Parameter | Type | Description | |-----------|------|-------------| -| `$name` | `string` | Unique rule identifier (default: `'trusted-bots'`) | | `$additionalBots` | `list` | Extra bots to recognize | -| `$ipResolver` | `callable\|null` | Custom IP resolver. Defaults to config's global IP resolver. | +| `$ipResolver` | `callable\|null` | IP resolver. Pass `$config->getIpResolver()` to use the config's global resolver (correct client IP behind a proxy). | | `$cache` | `CacheInterface\|null` | PSR-16 cache for DNS results (highly recommended) | ### Verification Flow @@ -226,10 +236,10 @@ Both IPv4 and IPv6 are supported. Forward confirmation uses both `gethostbynamel Add your organization's internal crawlers: ```php -$config->safelists->trustedBots('custom-bots', [ +$config->safelists->addRule(new SafelistRule('custom-bots', new TrustedBotMatcher([ ['ua' => 'mycompany-crawler', 'hostname' => '.crawler.mycompany.com'], ['ua' => 'internal-monitor', 'hostname' => '.monitoring.mycompany.com'], -], cache: $cache); +], ipResolver: $config->getIpResolver(), cache: $cache))); ``` ::: danger @@ -241,7 +251,10 @@ The hostname suffix **must** start with a dot (e.g., `.googlebot.com`, not `goog DNS lookups are blocking I/O operations. **Always provide a PSR-16 cache in production** to avoid latency: ```php -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher( + ipResolver: $config->getIpResolver(), + cache: $cache, +))); ``` | Cache Behavior | TTL | @@ -296,10 +309,15 @@ For more comprehensive attack pattern detection beyond path matching, consider u Use all three matchers together for comprehensive bot management: ```php +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; // 1. Safelist verified search engine bots (they bypass all other rules) -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher( + ipResolver: $config->getIpResolver(), + cache: $cache, +))); // 2. Block known attack tools $config->blocklists->knownScanners(); @@ -344,7 +362,7 @@ This layered approach ensures: Request | v -Safelists (trustedBots) --> match? --> ALLOW immediately +Safelists (verified bots) --> match? --> ALLOW immediately | No match | @@ -376,11 +394,11 @@ ALLOW (pass to handler) 1. **Layer your defenses.** Use multiple strategies together: User-Agent blocking, path blocking, header analysis, and Fail2Ban for persistence. -2. **Verify before you safelist.** Never safelist bots based solely on their User-Agent. Always verify with rDNS using `trustedBots()` for search engine bots. +2. **Verify before you safelist.** Never safelist bots based solely on their User-Agent. Always verify with rDNS by wiring `TrustedBotMatcher` on the safelist for search engine bots. -3. **Always cache DNS lookups.** Pass a PSR-16 cache to `trustedBots()` to avoid blocking DNS calls on every request. +3. **Always cache DNS lookups.** Pass a PSR-16 cache to `TrustedBotMatcher` to avoid blocking DNS calls on every request. -4. **Safelist before blocklist.** Place `trustedBots()` on the safelist so verified search engine bots pass through before `knownScanners()` or `suspiciousHeaders()` can block them. +4. **Safelist before blocklist.** Place the `TrustedBotMatcher` safelist rule first so verified search engine bots pass through before `knownScanners()` or `suspiciousHeaders()` can block them. 5. **Don't rely on User-Agent alone.** Sophisticated attackers rotate User-Agents to look like browsers. Combine UA-based detection with header analysis, rate limiting, and [Fail2Ban](/features/fail2ban) for defense in depth. diff --git a/docs/features/bot-presets.md b/docs/features/bot-presets.md new file mode 100644 index 0000000..e81637d --- /dev/null +++ b/docs/features/bot-presets.md @@ -0,0 +1,48 @@ +--- +outline: deep +--- + +# Bot & AI Crawler Presets + +Block AI crawlers and rate-limit aggressive SEO crawlers. Shipped as the companion package +[`flowd/phirewall-preset-bots`](https://github.com/flowd/phirewall-preset-bots) - `PortableConfig` +presets you materialize with `Config::with()`. + +## Installation + +```bash +composer require flowd/phirewall-preset-bots +``` + +## Usage + +```php +use Flowd\Phirewall\Config; +use Flowd\PhirewallPresetBots\Presets; + +$config = (new Config($cache))->with( + Presets::blockAiCrawlers(), // 403 for AI/LLM crawlers + Presets::throttleSeoCrawlers(limit: 60, period: 60), // rate-limit SEO crawlers per IP +); +``` + +| Preset | Effect | +| --- | --- | +| `Presets::blockAiCrawlers()` | Blocks requests whose `User-Agent` matches a known AI/LLM crawler. | +| `Presets::throttleAiCrawlers(limit, period)` | Rate-limits AI crawlers per client IP; keeps the site indexable. | +| `Presets::throttleSeoCrawlers(limit, period)` | Rate-limits aggressive SEO/marketing crawlers per client IP. | + +The matched tokens are curated in `CrawlerCatalog` (GPTBot, ClaudeBot, CCBot, PerplexityBot, +Bytespider, Meta-ExternalAgent, and more for AI; AhrefsBot, SemrushBot, DotBot, and more for +SEO). General search and link-preview agents (Googlebot, bingbot, Applebot, facebookexternalhit) +are deliberately excluded, as are robots.txt-only opt-out tokens like `Google-Extended`. + +## Limits + +- **User-Agent matching is policy enforcement, not a security control.** It stops crawlers that + send a truthful `User-Agent`; a hostile scraper can send anything. Use the + [OWASP CRS](/features/owasp-crs) and [rate limiting](/features/rate-limiting) presets for + hostile traffic. +- **Throttles key on `REMOTE_ADDR`.** Behind a proxy or CDN, configure a trusted client-IP + resolver on the `Config` or every client buckets together. +- The catalogue is opinionated; override a rule by name to keep a crawler you value. diff --git a/docs/features/owasp-crs.md b/docs/features/owasp-crs.md index 4e9862a..e31e30c 100644 --- a/docs/features/owasp-crs.md +++ b/docs/features/owasp-crs.md @@ -4,13 +4,71 @@ outline: deep # OWASP Core Rule Set -Phirewall includes a built-in OWASP CRS (Core Rule Set) engine that parses and evaluates ModSecurity-compatible `SecRule` directives. This provides web application firewall (WAF) capabilities for detecting SQL injection, XSS, remote code execution, path traversal, and other common attack vectors. +OWASP CRS support lives in a separate companion package, +[`flowd/phirewall-preset-owasp-crs`](https://github.com/flowd/phirewall-preset-owasp-crs). +It provides a ModSecurity-compatible `SecRule` engine - parsing and evaluating +`SecRule` directives for detecting SQL injection, XSS, remote code execution, path +traversal, and other common attack vectors - plus ready-made, per-paranoia-level CRS +presets you can drop into a `Config`. + +::: info Extracted in 0.6 +The SecRule engine used to ship inside the core `flowd/phirewall` package under the +`Flowd\Phirewall\Owasp\` namespace. As of 0.6 it lives in the companion package under +`Flowd\PhirewallPresetOwaspCrs\Engine\`, and the `$config->blocklists->owasp()` +shortcut was removed - register a `CoreRuleSetMatcher` as a normal blocklist rule +instead (shown throughout this page). +::: + +## Installation + +```bash +composer require flowd/phirewall-preset-owasp-crs +``` ## Quick Start +The fastest way to get CRS protection is the bundled presets, which ship a +pre-filtered, per-paranoia-level snapshot of the OWASP CRS rules: + ```php use Flowd\Phirewall\Config; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\PhirewallPresetOwaspCrs\ParanoiaLevel; +use Flowd\PhirewallPresetOwaspCrs\Presets; +use Flowd\Phirewall\Store\InMemoryCache; + +$config = new Config(new InMemoryCache()); + +// Block requests matching any active CRS rule at paranoia level 1. +$config = $config->with(Presets::blocklist(ParanoiaLevel::Level1)); +``` + +Prefer to ban repeat offenders rather than block every single match? Use the +fail2ban preset instead: + +```php +use Flowd\PhirewallPresetOwaspCrs\ParanoiaLevel; +use Flowd\PhirewallPresetOwaspCrs\Presets; + +$config = $config->with( + Presets::fail2ban(ParanoiaLevel::Level1, threshold: 5, period: 600, ban: 3600), +); +``` + +See the [package README](https://github.com/flowd/phirewall-preset-owasp-crs) for the +preset API, paranoia-level guidance, and how the bundled rules are imported and kept +up to date. + +## Writing Your Own Rules + +You are not limited to the bundled CRS snapshot - the SecRule engine can parse and +evaluate any ModSecurity-style ruleset you provide. Load rules and register them as a +blocklist rule via a `CoreRuleSetMatcher`: + +```php +use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\InMemoryCache; $config = new Config(new InMemoryCache()); @@ -20,7 +78,7 @@ SecRule ARGS "@rx (?i)\bunion\b.*\bselect\b" "id:942100,phase:2,deny,msg:'SQL In SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" CRS); -$config->blocklists->owasp('owasp', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); ``` ## Loading Rules @@ -30,7 +88,7 @@ $config->blocklists->owasp('owasp', $rules); Inline rules for simple configurations: ```php -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; $rules = SecRuleLoader::fromString(<<<'CRS' SecRule ARGS "@rx (?i)\bunion\b.*\bselect\b" "id:942100,phase:2,deny,msg:'SQL Injection'" @@ -87,27 +145,11 @@ $skipped = $report['skipped']; // int - Lines that were skipped | Method | Parameters | Description | |--------|-----------|-------------| -| `fromString()` | `string $rulesText, ?string $contextFolder = null, ?int $maxValuesPerCrsVariable = null` | Parse rules from a string | -| `fromFile()` | `string $filePath, ?int $maxValuesPerCrsVariable = null` | Load rules from a single file | -| `fromFiles()` | `list $paths, ?int $maxValuesPerCrsVariable = null` | Load and merge multiple files | -| `fromDirectory()` | `string $dir, ?callable $filter = null, ?int $maxValuesPerCrsVariable = null` | Load all files in a directory | -| `fromStringWithReport()` | `string $rulesText, ?int $maxValuesPerCrsVariable = null` | Parse with statistics | - -## Per-Variable Value Cap (CPU-DoS Guard) - -Every `SecRuleLoader` factory accepts an optional trailing `?int $maxValuesPerCrsVariable`. It bounds how many collected values a single CRS variable (such as `ARGS`) may contribute to evaluation, so an attacker cannot drive up per-request WAF cost by submitting thousands of parameters, headers, or cookies. The cap is **per variable**, not aggregate. - -```php -use Flowd\Phirewall\Owasp\SecRuleLoader; - -// Cap each CRS variable at 5000 collected values. -$rules = SecRuleLoader::fromFile('/etc/phirewall/owasp.conf', maxValuesPerCrsVariable: 5000); -$rules = SecRuleLoader::fromString($rulesText, maxValuesPerCrsVariable: 5000); -``` - -- **Default (`null`):** twice PHP's `max_input_vars`, falling back to `2000` (`RequestVariableValues::DEFAULT_MAX_VALUES_PER_CRS_VARIABLE`) when `max_input_vars` is unset or non-positive. Doubling `max_input_vars` sizes the budget to the parameter count the runtime actually accepts (variables such as `ARGS` emit both a name and a value per parameter), so a request PHP can fully parse is never falsely truncated. -- **Fail-closed:** when a variable's values are truncated at the cap, the affected deny rules fail **closed** (the request is blocked) rather than evaluating a partial value set. An attacker therefore cannot pad a payload past the cap to slip past a deny rule. -- **Explicit non-positive value throws:** passing an explicit cap below `1` raises `\InvalidArgumentException`, because a non-positive cap would fail every deny rule closed and block all traffic. +| `fromString()` | `string $rulesText, ?string $contextFolder` | Parse rules from a string | +| `fromFile()` | `string $filePath` | Load rules from a single file | +| `fromFiles()` | `list $paths` | Load and merge multiple files | +| `fromDirectory()` | `string $dir, ?callable $filter` | Load all files in a directory | +| `fromStringWithReport()` | `string $rulesText` | Parse with statistics | ## Supported SecRule Syntax @@ -122,7 +164,7 @@ Phirewall supports a subset of the ModSecurity SecRule language: | `REQUEST_URI` | Full request URI including query string | | `REQUEST_METHOD` | HTTP method (GET, POST, etc.) | | `QUERY_STRING` | Raw query string | -| `REQUEST_FILENAME` | Basename (final path segment), without query string | +| `REQUEST_FILENAME` | Request path without query string | | `REQUEST_HEADERS` | All request header values | | `REQUEST_HEADERS_NAMES` | Names of all request headers | | `REQUEST_COOKIES` | All cookie values | @@ -138,7 +180,7 @@ Phirewall supports a subset of the ModSecurity SecRule language: | `@startswith` | `@startswith text` | Case-insensitive prefix match | | `@beginswith` | `@beginswith text` | Alias for `@startswith` | | `@endswith` | `@endswith text` | Case-insensitive suffix match | -| `@pm` | `@pm word1 word2` | Phrase match (case-insensitive, any of the listed words) | +| `@pm` | `@pm word1 word2` | Phrase match (case-insensitive substring match against any of the listed phrases) | | `@pmFromFile` | `@pmFromFile file.txt` | Phrase match from a file (one phrase per line) | ### Actions @@ -148,7 +190,7 @@ Phirewall supports a subset of the ModSecurity SecRule language: | `id:N` | Rule ID (required, must be unique) | | `phase:N` | Processing phase (currently informational) | | `deny` | Block the request (required for the rule to trigger blocking) | -| `block` | Alias for `deny`, both trigger blocking | +| `block` | Alias for `deny` -- both trigger blocking | | `msg:'text'` | Human-readable description for logging | ### Line Continuation @@ -181,7 +223,7 @@ $rules = SecRuleLoader::fromString(/* ... */); // Disable a specific rule by ID $rules->disable(941110); // XSS Event Handler (too aggressive for some apps) -$config->blocklists->owasp('owasp', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); ``` ### Re-enabling Rules @@ -283,7 +325,9 @@ A comprehensive rule set for production: ```php use Flowd\Phirewall\Config; -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; use Flowd\Phirewall\Store\RedisCache; use Predis\Client as PredisClient; @@ -321,7 +365,7 @@ CRS); // Disable rules that cause false positives in your application // $rules->disable(941110); // XSS Event Handler -$config->blocklists->owasp('owasp', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); ``` ## File-Based Rule Management @@ -358,7 +402,7 @@ insert into ``` ::: warning -`@pmFromFile` paths are resolved relative to the rule file's directory, and `..` traversal segments are rejected. Treat SecRule files as trusted operator configuration; never build rule text from untrusted input, since the operand selects which file is read. +`@pmFromFile` includes path traversal protection. Paths containing `..` are rejected to prevent loading files outside the rules directory. ::: ## Architecture @@ -389,7 +433,7 @@ Each CRS variable maps to a `VariableCollectorInterface` implementation: | `REQUEST_URI` | `RequestUriCollector` | Full URI including query string | | `REQUEST_METHOD` | `RequestMethodCollector` | HTTP method | | `QUERY_STRING` | `QueryStringCollector` | Raw query string | -| `REQUEST_FILENAME` | `RequestFilenameCollector` | Basename (final path segment), without query string | +| `REQUEST_FILENAME` | `RequestFilenameCollector` | URI path without query string | | `REQUEST_HEADERS` | `RequestHeadersCollector` | All header values | | `REQUEST_HEADERS_NAMES` | `RequestHeadersNamesCollector` | Header names | | `REQUEST_COOKIES` | `RequestCookiesCollector` | All cookie values | @@ -412,7 +456,7 @@ Each CRS operator maps to an `OperatorEvaluatorInterface` implementation: Unsupported operators resolve to `UnsupportedOperatorEvaluator`, which never matches (safe no-op). ::: warning ReDoS protection: 8 KiB length guard on `@rx` -`RegexEvaluator` skips any value whose byte length exceeds 8,192 bytes; the value is treated as non-matching. This is an intentional trade-off: running PCRE on unbounded attacker-controlled input risks catastrophic backtracking that can freeze the PHP process (ReDoS). Skipping overlength values mirrors the behavior of standard WAFs such as ModSecurity's `SecRequestBodyLimit`. +`RegexEvaluator` skips any value whose byte length exceeds 8,192 bytes — the value is treated as non-matching. This is an intentional trade-off: running PCRE on unbounded attacker-controlled input risks catastrophic backtracking that can freeze the PHP process (ReDoS). Skipping overlength values mirrors the behavior of standard WAFs such as ModSecurity's `SecRequestBodyLimit`. In practice, legitimate request values (query parameters, header values, cookie values) are rarely larger than a few kilobytes. If you are matching multi-megabyte request bodies via `@rx`, consider pre-processing them before passing to the firewall. ::: @@ -422,7 +466,7 @@ In practice, legitimate request values (query parameters, header values, cookie Implement `OperatorEvaluatorInterface` and register it in `OperatorEvaluatorFactory`: ```php -namespace Flowd\Phirewall\Owasp\Operator; +namespace Flowd\PhirewallPresetOwaspCrs\Engine\Operator; final readonly class IpMatchEvaluator implements OperatorEvaluatorInterface { @@ -453,7 +497,7 @@ final readonly class IpMatchEvaluator implements OperatorEvaluatorInterface Implement `VariableCollectorInterface` and register it in `VariableCollectorFactory`: ```php -namespace Flowd\Phirewall\Owasp\Variable; +namespace Flowd\PhirewallPresetOwaspCrs\Engine\Variable; use Psr\Http\Message\ServerRequestInterface; @@ -497,10 +541,11 @@ Use `@pm` for simple keyword matching and `@rx` for complex patterns. `@pm` is s 3. **Combine with fail2ban.** Use OWASP rules to detect attacks and fail2ban to ban repeat offenders: ```php - $config->blocklists->owasp('owasp', $rules); + $config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); $config->fail2ban->add('persistent-attacker', threshold: 5, period: 60, ban: 86400, filter: fn($req) => true, + key: KeyExtractors::ip() ); ``` diff --git a/docs/features/safelists-blocklists.md b/docs/features/safelists-blocklists.md index 3ee9c3d..5c159d8 100644 --- a/docs/features/safelists-blocklists.md +++ b/docs/features/safelists-blocklists.md @@ -95,32 +95,47 @@ $config->safelists->ip('ipv6-loopback', '::1'); ### Trusted Bot Verification -Safelist verified search engine bots via reverse DNS verification. See [Bot Detection](/features/bot-detection) for full details. +Safelist verified search engine bots via reverse DNS verification. Wire `TrustedBotMatcher` onto the safelist with `addRule()`. See [Bot Detection](/features/bot-detection) for full details, and [Trusted Bots](/features/trusted-bots) for the matcher itself. ```php -$config->safelists->trustedBots( - string $name = 'trusted-bots', - array $additionalBots = [], - ?callable $ipResolver = null, - ?CacheInterface $cache = null -): SafelistSection +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; + +$config->safelists->addRule(new SafelistRule($name, new TrustedBotMatcher( + additionalBots: [], + ipResolver: $config->getIpResolver(), + cache: null, +))); ``` +Pass `ipResolver: $config->getIpResolver()` so verification uses the real client IP behind a proxy, matching the [global IP resolver](#ip-resolution). + ```php +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; + // Safelist Google, Bing, Yahoo, Baidu, DuckDuckGo, Yandex, and Apple bots -$config->safelists->trustedBots(); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher( + ipResolver: $config->getIpResolver(), +))); // Add custom bots on top of the built-in list -$config->safelists->trustedBots('bots', [ +$config->safelists->addRule(new SafelistRule('bots', new TrustedBotMatcher([ ['ua' => 'mypartnerbot', 'hostname' => '.partner.example.com'], -]); +], ipResolver: $config->getIpResolver()))); ``` ::: warning Without a PSR-16 cache, each request with a bot-like User-Agent triggers blocking DNS lookups. In production, always provide a cache: ```php -$config->safelists->trustedBots(cache: $cache); +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; + +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher( + ipResolver: $config->getIpResolver(), + cache: $cache, +))); ``` ::: @@ -322,17 +337,19 @@ This file holds live security state. Store it **outside your web document root** ### OWASP Core Rule Set -Register OWASP CRS rules as a blocklist to detect SQL injection, XSS, and other attacks: +Register OWASP CRS rules as a blocklist to detect SQL injection, XSS, and other attacks. The SecRule engine ships in the companion package `flowd/phirewall-preset-owasp-crs` (`composer require flowd/phirewall-preset-owasp-crs`): ```php -use Flowd\Phirewall\Owasp\SecRuleLoader; +use Flowd\Phirewall\Config\Rule\BlocklistRule; +use Flowd\PhirewallPresetOwaspCrs\Engine\CoreRuleSetMatcher; +use Flowd\PhirewallPresetOwaspCrs\Engine\SecRuleLoader; $rules = SecRuleLoader::fromString(<<<'CRS' SecRule ARGS "@rx (?i)\bunion\b.*\bselect\b" "id:942100,phase:2,deny,msg:'SQLi'" SecRule ARGS "@rx (?i)]*>" "id:941100,phase:2,deny,msg:'XSS'" CRS); -$config->blocklists->owasp('owasp', $rules); +$config->blocklists->addRule(new BlocklistRule('owasp', new CoreRuleSetMatcher($rules))); ``` See [OWASP CRS](/features/owasp-crs) for full details on loading rules from files and directories. diff --git a/docs/features/trusted-bots.md b/docs/features/trusted-bots.md new file mode 100644 index 0000000..4eee1e9 --- /dev/null +++ b/docs/features/trusted-bots.md @@ -0,0 +1,108 @@ +--- +outline: deep +--- + +# Trusted Bots + +Trusted bots are the beneficial crawlers you *want* visiting your site: search-engine +indexers such as Googlebot, Bingbot, Baidu, DuckDuckBot, Yandex, Yahoo (Slurp), and +Applebot. Letting them through keeps your pages indexed and discoverable. They are the +opposite of the crawlers you curb (aggressive scrapers and AI training bots). + +The companion package [`flowd/phirewall-preset-bots`](/features/bot-presets) deliberately +**excludes** these search engines, because blocking them de-indexes your site. That package +does the opposite job: blocking AI crawlers and throttling aggressive SEO crawlers. Use it to +restrict unwanted traffic, and use the `TrustedBotMatcher` described here to recognise and +protect the wanted crawlers. + +## The spoofing problem + +A `User-Agent` header is forgeable. Any client can send `Googlebot` in its `User-Agent`, so +matching a trusted bot on the User-Agent alone is not a security control - it hands every +scraper a free pass. + +`TrustedBotMatcher` solves this with **reverse + forward DNS verification**. For a request +claiming to be Googlebot it performs a reverse DNS lookup on the client IP, checks that the +resulting hostname ends in a trusted suffix (`.googlebot.com`), then performs a forward lookup +on that hostname and confirms it resolves back to the same IP. A genuine Googlebot request +passes all three steps; a spoofed one fails the rDNS check. + +## Safelisting verified bots + +Wire `TrustedBotMatcher` on the safelist to let verified crawlers bypass the rest of the +firewall: + +```php +use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; +use Flowd\Phirewall\Store\InMemoryCache; + +$cache = new InMemoryCache(); // use RedisCache in production +$config = new Config($cache); + +$config->safelists->addRule(new SafelistRule( + 'trusted-bots', + new TrustedBotMatcher(cache: $cache), +)); +``` + +## Rate-limiting verified bots + +Safelisting gives verified crawlers unlimited access. If a crawler is well-behaved but you +still want a ceiling on its request rate, throttle it instead of safelisting it. + +Build a throttle whose key closure returns a key only for verified bots and `null` otherwise. +A `null` key skips the rule, so the limit applies exclusively to real, DNS-verified crawlers +and never touches ordinary visitors: + +```php +$trustedBots = new TrustedBotMatcher(cache: $cache); + +$config->throttles->add( + 'trusted-bot-rate', + limit: 60, + period: 60, + key: fn($request): ?string => + $trustedBots->match($request)->isMatch() ? 'trusted-bot' : null, +); +``` + +`match()` returns a `MatchResult`; check it with `->isMatch()`. The key you return decides how +the limit is bucketed: + +- A constant like `'trusted-bot'` shares one bucket across every verified crawler (a global + cap on all trusted bots combined). +- The client IP - `KeyExtractors::ip()($request)` - gives each verified crawler IP its own + cap. +- A per-bot token gives each crawler family a separate cap. + +## Custom bots + +To recognise your own crawlers, pass additional entries as the first `TrustedBotMatcher` +argument. Each entry pairs a User-Agent token with the hostname suffix its IPs must resolve to: + +```php +new TrustedBotMatcher( + [['ua' => 'mycompany-crawler', 'hostname' => '.crawler.mycompany.com']], + cache: $cache, +); +``` + +The hostname suffix must start with a dot. The leading dot anchors the match to the domain +boundary, so `evil-crawler.mycompany.com.attacker.test` cannot pose as a subdomain of +`.crawler.mycompany.com`. + +## DNS caching + +Every verification does a reverse and a forward DNS lookup. Pass a PSR-16 cache so those +lookups are memoised: successful results are cached for 24 hours by default and negative +results for 5 minutes. Without a cache, every request from a candidate bot triggers live DNS +calls on the request path. + +## Ordering + +Place the trusted-bot safelist **before** your blocklists and throttles. Safelist rules run +first, so a verified crawler passes through before a scanner or suspicious-header rule can +catch it. If the blocklist ran first, a legitimate crawler could be blocked before +verification ever happened. diff --git a/docs/getting-started.md b/docs/getting-started.md index 4833abe..8b3b129 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -115,7 +115,7 @@ $config->safelists->ip('office', ['10.0.0.0/8', '192.168.1.0/24']); // Safelist verified search engine bots (Googlebot, Bingbot, etc.). // Verified via reverse DNS; pass a cache to skip repeat lookups (see Bot Detection). -$config->safelists->trustedBots(); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher())); ``` ### Blocklists (Deny Malicious Traffic) @@ -302,7 +302,9 @@ echo $response->getBody(); namespace App\Factory; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware; use Flowd\Phirewall\Store\ApcuCache; use Nyholm\Psr7\Factory\Psr17Factory; @@ -327,7 +329,7 @@ class PhirewallFactory fn(ServerRequestInterface $req): bool => str_starts_with($req->getUri()->getPath(), '/_profiler') ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(cache: $cache))); // Blocklists $config->blocklists->knownScanners(); @@ -453,7 +455,9 @@ final class PhirewallListener namespace App\Providers; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; use Flowd\Phirewall\Store\ApcuCache; use Illuminate\Support\ServiceProvider; @@ -487,7 +491,7 @@ class PhirewallServiceProvider extends ServiceProvider fn(ServerRequestInterface $req): bool => $req->getUri()->getPath() === '/health' ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(cache: $cache))); // Blocklists $config->blocklists->knownScanners(); @@ -594,7 +598,9 @@ final readonly class Phirewall // Add Phirewall LAST so it executes FIRST (outermost). use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; use Flowd\Phirewall\Store\ApcuCache; use Psr\Http\Message\ServerRequestInterface; @@ -613,7 +619,7 @@ $config->safelists->add('health', fn(ServerRequestInterface $req): bool => $req->getUri()->getPath() === '/health' ); -$config->safelists->trustedBots(cache: $cache); +$config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(cache: $cache))); // Blocklists $config->blocklists->knownScanners(); @@ -660,7 +666,9 @@ $app->run(); namespace App\Factory; use Flowd\Phirewall\Config; +use Flowd\Phirewall\Config\Rule\SafelistRule; use Flowd\Phirewall\KeyExtractors; +use Flowd\Phirewall\Matchers\TrustedBotMatcher; use Flowd\Phirewall\Middleware as PhirewallMiddleware; use Flowd\Phirewall\Store\ApcuCache; use Nyholm\Psr7\Factory\Psr17Factory; @@ -683,7 +691,7 @@ class PhirewallMiddlewareFactory fn(ServerRequestInterface $req): bool => $req->getUri()->getPath() === '/health' ); - $config->safelists->trustedBots(cache: $cache); + $config->safelists->addRule(new SafelistRule('trusted-bots', new TrustedBotMatcher(cache: $cache))); // Blocklists $config->blocklists->knownScanners();