diff --git a/Cargo.lock b/Cargo.lock index aa5a779..3db90d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2522,6 +2522,16 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" +[[package]] +name = "tree-sitter-php" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-python" version = "0.25.0" @@ -3282,6 +3292,7 @@ dependencies = [ "tree-sitter-java", "tree-sitter-javascript", "tree-sitter-kotlin-ng", + "tree-sitter-php", "tree-sitter-python", "tree-sitter-ruby", "tree-sitter-typescript", diff --git a/Cargo.toml b/Cargo.toml index b30ffe4..e75f514 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ tree-sitter-go = "0.25" tree-sitter-c-sharp = "0.23" tree-sitter-kotlin-ng = "1.1" tree-sitter-ruby = "0.23" +tree-sitter-php = "0.24" ignore = "0.4" sha2 = "0.11" regex = "1" diff --git a/README.md b/README.md index a2a1e31..1643916 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Sift through your codebase for embedded authorization logic. Extract it into Policy as Code (PaC) — [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/) for [OPA](https://www.openpolicyagent.org/), or [Cedar](https://www.cedarpolicy.com/) for [AWS Verified Permissions](https://aws.amazon.com/verified-permissions/), Arbiter, and other Cedar-compatible engines. -> **Status:** v0.2 — structural scanning ready for TypeScript, JavaScript, Java, Python, Go, C#, Kotlin, and Ruby. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. +> **Status:** v0.2 — structural scanning ready for TypeScript, JavaScript, Java, Python, Go, C#, Kotlin, Ruby, and PHP. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint or MCP-capable agent host. ## What is zift? @@ -27,7 +27,7 @@ zift report . # detailed findings report 1. **Structural scan** (tree-sitter) — fast, deterministic, zero-cost. Finds known authorization patterns: role checks, permission guards, auth middleware, security annotations. -2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (PHP, etc.). +2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control. ## Supported languages @@ -40,7 +40,7 @@ zift report . # detailed findings report | C# | yes (v0.2) | yes (v0.1) | ASP.NET Core | | Kotlin | yes (v0.2) | yes (v0.1) | Spring (Kotlin), Ktor | | Ruby | yes (v0.2) | yes (v0.1) | Rails, Pundit, CanCanCan, Devise | -| PHP | planned (v0.2) | yes (v0.1) | Laravel | +| PHP | yes (v0.2) | yes (v0.1) | Laravel, Symfony | Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands. diff --git a/docs/corpus/README.md b/docs/corpus/README.md index dd413c1..2c0e0f1 100644 --- a/docs/corpus/README.md +++ b/docs/corpus/README.md @@ -31,6 +31,7 @@ We are **not** shipping policies for these projects. The runs exist to stress-te | C# | [bitwarden/server](https://github.com/bitwarden/server) | 318 | 88 (AdminConsole subset) | ASP.NET Core resource authorization dominates structurally; deep surfaces generic `[Authorize]`, ownership checks, and helper gates. See [csharp.md](csharp.md). | | Kotlin | [ktorio/ktor-samples](https://github.com/ktorio/ktor-samples) | 13 | — | Ktor `install(Authentication)` + named `authenticate(...) { ... }` route guards account for every finding; Spring-Kotlin rules need a separate corpus target to calibrate. See [kotlin.md](kotlin.md). | | Ruby | [discourse/discourse](https://github.com/discourse/discourse) | 339 | — | Rails `before_action` filters (180) and `current_user.?` predicates (159) carry every finding; Discourse's `Guardian` call sites are the gap. Pundit/CanCanCan rules need a Pundit-flavored target to calibrate. See [ruby.md](ruby.md). | +| PHP | [monicahq/monica](https://github.com/monicahq/monica) + [symfony/demo](https://github.com/symfony/demo) | 41 + 4 | — | Laravel Gates + Policies + route-middleware carry every Monica finding (41); the Symfony demo exercises every Voter / `#[IsGranted]` rule with no overlap. See [php.md](php.md). | > The "deep" column is intentionally a **scoped subset** rather than the whole repo — running deep against 5,000+ files per language is neither cheap nor necessary to surface gaps. Each per-language doc explains the subset and why. > diff --git a/docs/corpus/php.md b/docs/corpus/php.md new file mode 100644 index 0000000..9d6711d --- /dev/null +++ b/docs/corpus/php.md @@ -0,0 +1,138 @@ +# PHP — Monica + Symfony demo + +Real-world results from running Zift against two open-source PHP codebases: [monicahq/monica](https://github.com/monicahq/monica) (Laravel) and [symfony/demo](https://github.com/symfony/demo) (Symfony). The pair is intentional — neither framework appears in the other's authz idioms, so they calibrate the Laravel and Symfony rule families independently. + +## Why these targets + +Monica is one of the largest mature open-source Laravel apps in active use (CRM/contact manager). It uses Laravel Gates *and* Policies *and* `Route::middleware('can:…')` simultaneously — so every Laravel rule has a real target to fire against, with no Spatie permissions plugin in the way to muddy the signal. The Symfony demo is the official example app maintained by the Symfony team; it's small but exercises the modern PHP 8 attribute syntax (`#[IsGranted]`), the docblock-free Voter idiom, and `denyAccessUnlessGranted` — every Symfony rule has a hit. + +The pair confirms two things: that the Laravel rules don't fire on Symfony code (and vice versa), and that the Symfony attribute rule survives the PHP 8 `name: 'value'` argument shape without false positives on neighbouring attributes (`#[Route]`, `#[Cache]`, …). + +## Target metadata + +### Monica (Laravel) + +| | | +|---|---| +| Repo | [monicahq/monica](https://github.com/monicahq/monica) | +| Commit | `e08e917` | +| PHP files (excl. `vendor/`) | 1,656 | +| LOC (`.php`, excl. `vendor/`) | 134,702 | +| Externalized PaC | None observed | +| Zift version | 0.2.2 | + +### Symfony demo + +| | | +|---|---| +| Repo | [symfony/demo](https://github.com/symfony/demo) | +| Commit | `83d4ac1` | +| PHP files (excl. `vendor/`) | 52 | +| LOC (`.php`, excl. `vendor/`) | 6,256 | +| Externalized PaC | None observed | +| Zift version | 0.2.2 | + +## Structural pass — Monica + +```bash +zift scan ~/zift-corpus/php/monica --language php --format json -o structural.json +``` + +| | | +|---|---| +| Wall time | 1.4s | +| Total findings | **41** | +| Files with findings | 13 | +| Externalized % | 0% (no policy-import enforcement points emitted) | + +**Findings per rule** + +| Rule | Count | +|------|------:| +| `php-laravel-route-middleware` | 13 | +| `php-laravel-gate-define` | 11 | +| `php-laravel-gate-allows-denies` | 7 | +| `php-laravel-authorize-helper` | 5 | +| `php-laravel-policy-class` | 5 | + +**Findings per category** + +| Category | Count | +|----------|------:| +| `rbac` | 28 | +| `middleware` | 13 | + +**Top findings (sample)** + +| File | Line | Snippet | +|------|-----:|---------| +| `app/Providers/AuthServiceProvider.php` | 32 | `Gate::define('administrator', function (User $user): bool { … })` | +| `app/Providers/AuthServiceProvider.php` | 42 | `Gate::define('vault-editor', function (User $user, $vault): bool { … })` | +| `app/Policies/VaultPolicy.php` | 22 | `public function view(User $user, Vault $vault): bool { … }` | +| `app/Policies/VaultPolicy.php` | 46 | `public function update(User $user, Vault $vault): bool { … }` | +| `app/Domains/Vault/ManageVault/Api/Controllers/VaultController.php` | 23 | `$this->middleware('abilities:read')` | +| `app/Domains/Contact/ManageContact/Web/Controllers/ContactController.php` | 57 | `Gate::authorize('vault-editor', $vault)` | +| `routes/web.php` | 199 | `Route::middleware('can:vault-viewer,vault')` | +| `routes/web.php` | 250 | `Route::middleware('can:contact-owner,vault,contact')` | + +The shape lines up exactly with how Monica's authz is laid out: a single `AuthServiceProvider::boot()` declares the ability vocabulary (11 `Gate::define`s — `administrator`, `vault-editor`, `vault-viewer`, `vault-manager`, `contact-owner`, …), the controllers call `Gate::authorize('vault-editor', …)` per request, and `routes/web.php` attaches `can:,` middleware to grouped route trees. `VaultPolicy` carries the CRUD method shape the policy-class rule was designed for. Five `php-laravel-authorize-helper` matches in `tests/Feature/Auth/*` are real `$token->can('read')` calls in test fixtures — they're authz state assertions in tests, which is the right behaviour for the rule. + +## Structural pass — Symfony demo + +```bash +zift scan ~/zift-corpus/php/demo --language php --format json -o structural.json +``` + +| | | +|---|---| +| Wall time | 0.08s | +| Total findings | **4** | +| Files with findings | 3 | +| Externalized % | 0% | + +**Findings per rule** + +| Rule | Count | +|------|------:| +| `php-symfony-is-granted-attribute` | 3 | +| `php-symfony-voter-class` | 1 | + +**All findings** + +| File | Line | Snippet | +|------|-----:|---------| +| `src/Controller/Admin/BlogController.php` | 138 | `IsGranted('edit', subject: 'post', message: 'Posts can only be edited by their authors.')` | +| `src/Controller/Admin/BlogController.php` | 161 | `IsGranted('delete', subject: 'post')` | +| `src/Controller/BlogController.php` | 107 | `IsGranted('IS_AUTHENTICATED')` | +| `src/Security/PostVoter.php` | 30 | `final class PostVoter extends Voter { … }` | + +Every Symfony idiom the demo uses gets exactly one hit per call site. `PostVoter` is the canonical Symfony Voter shape — class extends the framework `Voter` parent. The three `#[IsGranted]` attributes cover the PHP 8 attribute form including the named-argument syntax (`subject: 'post'`, `message: '…'`); the anchor in the rule's query keeps the captured `@role` pinned to the first positional argument so the downstream Rego template emits `IS_AUTHENTICATED` / `edit` / `delete` and not the message string. + +## Zero-coverage rules (intentional) + +Six rules fired zero findings across both targets — and that's correct: + +| Rule | Monica | Symfony demo | Why zero is expected | +|------|------:|------:|---------------------| +| `php-symfony-voter-class` | 0 | 1 | Laravel doesn't use Voters; Symfony does. | +| `php-symfony-is-granted` | 0 | 0 | Symfony demo uses `#[IsGranted]` attributes exclusively; `$this->denyAccessUnlessGranted(…)` does appear in `BlogController.php:127` but its first positional argument is `PostVoter::SHOW` (a class constant ref), which the rule deliberately doesn't capture — see [Gaps & follow-ups](#gaps--follow-ups). | +| `php-symfony-is-granted-attribute` | 0 | 3 | Laravel/Monica isn't on Symfony's attribute family. | +| `php-role-equals-check` | 0 | 0 | Neither codebase spells RBAC as `$user->role === 'admin'` — both use Gates/Voters. | +| `php-in-array-role-check` | 0 | 0 | Same — neither hand-rolls `in_array('manager', $user->roles)`. | +| `php-has-role-call` | 0 | 0 | Neither pulls in spatie/laravel-permission, so `$user->hasRole(…)` doesn't appear. | + +The three idiomatic rules (`role-equals-check`, `in-array-role-check`, `has-role-call`) are exercised by the inline rule tests (`cargo run -- rules test`) and need a hand-rolled-authz Laravel app or a Spatie-flavoured target for end-to-end calibration. + +## Gaps & follow-ups + +**Constant-ref first argument is intentionally dropped.** Symfony's `denyAccessUnlessGranted(PostVoter::SHOW, $post, …)` in `BlogController.php:127` *is* an authz call, but the first positional argument is a class constant (`PostVoter::SHOW`) — Zift can't resolve it to a literal at scan time, so the rule's anchor lets the call slip through structurally rather than fabricate a Rego template against the trailing message string. The trade-off here is a known FN on `::CONSTANT`-shaped calls in exchange for honest output on the calls that *do* expose a literal. A follow-up could either widen the capture (record the constant ref textually as `@attribute` and let the template TODO-out the value) or pair the structural miss with a deep-pass rule. Tracked as a future ruleset refinement. + +**`Gate::authorize` lives under the `allows`/`denies` rule.** Laravel ships `Gate::authorize($ability, $resource)` as a fourth verb alongside `allows`/`denies`/`check`. The rule's `method_name` regex includes it, so all seven `Gate::authorize('vault-editor', …)` calls in Monica land in `php-laravel-gate-allows-denies` rather than a dedicated rule. The category is correct (`rbac`); the verbosity is the only cost. Worth splitting only if downstream consumers need to bucket "decision boundaries" (`allows`/`denies`) separately from "imperative throws" (`authorize`) for reporting. + +**`$this->middleware(…)` works inside controller constructors.** Monica uses both `Route::middleware(…)` at the route-tree level and `$this->middleware('abilities:read')` inside `__construct` of API controllers (e.g. `VaultController.php:23`). Both surface here because the rule matches any call literally named `middleware` whose argument list looks like an auth alias — the receiver isn't constrained. This is the right call for Laravel where both shapes are idiomatic, but worth pinning if a Lumen/Slim-flavoured corpus target later turns up other libraries that expose a same-named no-op. + +**FP risk: low across the board.** Every match on both targets is a real authz surface. No false positives observed; the rule-level predicates (method-name regex, scope check, arg-shape constraints) carry the load. The one rule that survives both targets without firing — `php-role-equals-check` — would benefit from a corpus target that genuinely uses the `$user->role === 'admin'` shape to confirm the predicate breadth on the property name (`role|roles|user_role|account_type|user_type|permission|permissions`) is calibrated correctly. + +## Deep pass + +Not run for either target. Monica's structural pass yields a tight, high-signal slice of the privileged routes, gate definitions, and policy methods; Symfony demo is small enough that the structural pass essentially exhausts the visible authz surface. Deep would primarily add value on the parts of Monica's `Service` layer that wrap authz inside business logic without a Gate call — those are the gap to investigate first if a deep pass is run later. diff --git a/rules/php/has-role-call.toml b/rules/php/has-role-call.toml new file mode 100644 index 0000000..3f044d1 --- /dev/null +++ b/rules/php/has-role-call.toml @@ -0,0 +1,74 @@ +[rule] +id = "php-has-role-call" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "hasRole / hasPermission / hasAnyRole call (PHP)" +# Matches `$user->hasRole('admin')`, `$user->hasPermission('posts.edit')`, +# `$user->hasAnyRole(['admin', 'editor'])` — the Spatie/laravel-permission +# and hand-rolled idiom. The method name is gated to the role/permission +# family so generic predicates (`hasMany`, `hasField`) don't fire. +query = """ +(member_call_expression + name: (name) @method_name + arguments: (arguments + . + (argument + [ + (string (string_content) @role_value) + (encapsed_string (string_content) @role_value) + ])) +) @match +""" + +[rule.predicates.method_name] +match = "^(hasRole|hasAnyRole|hasAllRoles|hasPermission|hasPermissionTo|hasAnyPermission|hasAllPermissions)$" + +[rule.rego_template] +template = """ +default allow := false + +# ->{{method_name}}('{{role_value}}'). +allow if { + "{{role_value}}" in input.user.roles +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal.roles.contains("{{role_value}}") +}; +""" + +[[rule.tests]] +input = """ +hasRole('admin'); +} +""" +expect_match = true + +[[rule.tests]] +input = """ +hasPermissionTo('posts.edit'); +} +""" +expect_match = true + +[[rule.tests]] +input = """ +hasMany('App\\Comment'); +} +""" +expect_match = false diff --git a/rules/php/in-array-role-check.toml b/rules/php/in-array-role-check.toml new file mode 100644 index 0000000..0bdf55b --- /dev/null +++ b/rules/php/in-array-role-check.toml @@ -0,0 +1,81 @@ +[rule] +id = "php-in-array-role-check" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "in_array role/permission membership check (PHP)" +# Matches `in_array('manager', $user->roles)` — the canonical PHP shape for +# "is this principal in this role collection?". First arg is the role +# literal; second arg's trailing property name must be role-shaped +# (`roles`, `permissions`, `authorities`, ...) so unrelated `in_array($x, +# $widgets)` calls don't fire. +query = """ +(function_call_expression + function: (name) @fn + arguments: (arguments + . + (argument + [ + (string (string_content) @role_value) + (encapsed_string (string_content) @role_value) + ]) + . + (argument + (member_access_expression + name: (name) @collection))) +) @match +""" + +[rule.predicates.fn] +eq = "in_array" + +[rule.predicates.collection] +match = "(?i)^(roles|permissions|authorities|scopes|granted_authorities|groups)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + "{{role_value}}" in input.user.roles +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal.roles.contains("{{role_value}}") +}; +""" + +[[rule.tests]] +input = """ +roles); +} +""" +expect_match = true + +[[rule.tests]] +input = """ +permissions); +} +""" +expect_match = true + +[[rule.tests]] +input = """ +tags); +} +""" +expect_match = false diff --git a/rules/php/laravel-authorize-helper.toml b/rules/php/laravel-authorize-helper.toml new file mode 100644 index 0000000..988d0fa --- /dev/null +++ b/rules/php/laravel-authorize-helper.toml @@ -0,0 +1,147 @@ +[rule] +id = "php-laravel-authorize-helper" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "Laravel $this->authorize / can / cannot policy helper (PHP)" +# Matches Laravel's controller-side helpers: `$this->authorize('update', $post)`, +# `$user->can('view', $post)`, and `$user->cannot(...)`. The shape is a +# `member_call_expression` (or its `?->` nullsafe variant — idiomatic in +# PHP 8) where the method name is a Laravel policy verb. The receiver is +# pinned to principal-shaped receivers — `$this`, a user-named variable +# (`$user`, `$currentUser`, `$me`, …), or a chain ending in `->user()` / +# `::user()` (the canonical `Auth::user()` / `auth()->user()` / +# `$request->user()` shapes) — so unrelated `->can(…)` / `->cannot(…)` +# calls on domain objects don't fire. `can`/`cannot` are common method +# names well outside authz; the receiver predicate is what keeps the +# rule's high-confidence promise honest. The first arg is the ability +# (string literal) — captured so downstream tooling can map it to a +# permission. +query = """ +[ + (member_call_expression + object: (_) @receiver + name: (name) @method_name + arguments: (arguments + . + (argument + [ + (string (string_content) @ability) + (encapsed_string (string_content) @ability) + ]))) + (nullsafe_member_call_expression + object: (_) @receiver + name: (name) @method_name + arguments: (arguments + . + (argument + [ + (string (string_content) @ability) + (encapsed_string (string_content) @ability) + ]))) +] @match +""" + +[rule.predicates.method_name] +match = "^(authorize|can|cannot)$" + +[rule.predicates.receiver] +match = "(?i)^(\\$this|\\$(user|currentUser|current_user|account|member|principal|viewer|actor|me|admin|customer|loggedInUser|authUser|authenticatedUser|requestUser)|.+(?:->|\\?->)user\\(\\)|.+::user\\(\\))$" + +[rule.rego_template] +template = """ +default allow := false + +# Laravel ->{{method_name}} {{ability}}. +allow if { + input.action == "{{ability}}" + input.user.permissions[_] == "{{ability}}" +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action == Action::"{{ability}}", + resource +); +""" + +[[rule.tests]] +input = """ +authorize('update', $post); + $post->save(); + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +user()->can('view', $post)) { + return view('post'); + } + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +can('view', $post)) { + return true; + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +user()?->can('view', $post)) { + return view('post'); + } + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +can('render'); +} +""" +expect_match = false + +[[rule.tests]] +input = """ +cannot('disconnect'); +} +""" +expect_match = false diff --git a/rules/php/laravel-gate-allows-denies.toml b/rules/php/laravel-gate-allows-denies.toml new file mode 100644 index 0000000..c4666cf --- /dev/null +++ b/rules/php/laravel-gate-allows-denies.toml @@ -0,0 +1,87 @@ +[rule] +id = "php-laravel-gate-allows-denies" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "Laravel Gate::allows / Gate::denies / Gate::check call (PHP)" +# Matches `Gate::allows('update-post', $post)`, `Gate::denies(...)`, and +# `Gate::check(...)` — Laravel's facade-style permission gate. The scope is +# `Gate` and the method is one of the predicate-shaped verbs. The first +# argument is the ability name (string literal or constant); we capture it so +# downstream tooling can route it through a permission predicate. +query = """ +(scoped_call_expression + scope: (name) @scope + name: (name) @method_name + arguments: (arguments + . + (argument + [ + (string (string_content) @ability) + (encapsed_string (string_content) @ability) + ])) +) @match +""" + +[rule.predicates.scope] +eq = "Gate" + +[rule.predicates.method_name] +match = "^(allows|denies|check|any|none|authorize|forUser)$" + +[rule.rego_template] +template = """ +default allow := false + +# Laravel Gate::{{method_name}} {{ability}}. +allow if { + input.action == "{{ability}}" + input.user.permissions[_] == "{{ability}}" +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action == Action::"{{ability}}", + resource +); +""" + +[[rule.tests]] +input = """ +id === $post->user_id; + }); + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +id === $post->user_id; + } + public function delete(User $user, Post $post) { + return $user->is_admin; + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +logger = $logger; + } +} +""" +expect_match = false diff --git a/rules/php/laravel-route-middleware.toml b/rules/php/laravel-route-middleware.toml new file mode 100644 index 0000000..728cd57 --- /dev/null +++ b/rules/php/laravel-route-middleware.toml @@ -0,0 +1,78 @@ +[rule] +id = "php-laravel-route-middleware" +languages = ["php"] +category = "middleware" +confidence = "high" +description = "Laravel Route::middleware / ->middleware auth filter (PHP)" +# Laravel routers attach authz via `Route::middleware('auth')`, +# `Route::middleware(['auth', 'can:update,post'])`, and chained +# `Route::get(...)->middleware('auth')`. The structural signature is a call +# to a method literally named `middleware`. We don't constrain the receiver +# (it can be the `Route` facade or a chained route builder), and we don't +# constrain arg shape (string, array, multiple) — instead a regex on the +# whole arg list filters to entries that look like an auth/role middleware +# alias (`auth`, `can:…`, `role:…`, `permission:…`, `verified`, …) so we +# don't flag `->middleware('throttle:60,1')` as authz. +query = """ +[ + (member_call_expression + name: (name) @method_name + arguments: (arguments) @args) + (scoped_call_expression + name: (name) @method_name + arguments: (arguments) @args) +] @match +""" + +[rule.predicates.method_name] +eq = "middleware" + +[rule.predicates.args] +match = "(?i)['\\\"]\\s*(auth|auth\\.[a-z]+|guest|verified|signed|can:|role:|permission:|abilities:|ability:|password\\.confirm)" + +[rule.rego_template] +template = """ +default allow := false + +# Laravel route middleware — translate the matched alias into an auth predicate. +allow if { + input.user.authenticated == true +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal has authenticated && principal.authenticated == true +}; +""" + +[[rule.tests]] +input = """ +group(function () { + Route::get('/dashboard', [DashboardController::class, 'index']); +}); +""" +expect_match = true + +[[rule.tests]] +input = """ +middleware(['auth', 'can:admin']); +""" +expect_match = true + +[[rule.tests]] +input = """ +group(function () { + Route::get('/api/foo', [FooController::class, 'index']); +}); +""" +expect_match = false diff --git a/rules/php/role-equals-check.toml b/rules/php/role-equals-check.toml new file mode 100644 index 0000000..7ab159a --- /dev/null +++ b/rules/php/role-equals-check.toml @@ -0,0 +1,92 @@ +[rule] +id = "php-role-equals-check" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "Inline role/permission equality check (PHP)" +# Matches `$user->role === 'admin'` and friends — the idiomatic PHP RBAC +# shape. The receiver is gated to a role-shaped property name and the +# operator is one of PHP's two equality operators (`==`, `===`). Loose `==` +# is included because real-world Laravel/Symfony code uses both even though +# the strict form is recommended. `!=` / `!==` are intentionally excluded — +# their semantics invert relative to the allow-style template. +query = """ +(binary_expression + left: (member_access_expression + name: (name) @prop) + operator: [ + "===" + "==" + ] + right: [ + (string (string_content) @role_value) + (encapsed_string (string_content) @role_value) + ] +) @match +""" + +[rule.predicates.prop] +match = "(?i)^(role|roles|user_role|account_type|user_type|permission|permissions)$" + +[rule.rego_template] +template = """ +default allow := false + +allow if { + input.user.role == "{{role_value}}" +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal.role == "{{role_value}}" +}; +""" + +[[rule.tests]] +input = """ +role === 'admin') { + return true; + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +account_type == "enterprise"; +} +""" +expect_match = true + +[[rule.tests]] +input = """ +name === 'admin') { + echo "hi admin"; + } +} +""" +expect_match = false + +[[rule.tests]] +input = """ +role !== 'admin') { + return false; + } +} +""" +expect_match = false diff --git a/rules/php/symfony-is-granted-attribute.toml b/rules/php/symfony-is-granted-attribute.toml new file mode 100644 index 0000000..9a9d62b --- /dev/null +++ b/rules/php/symfony-is-granted-attribute.toml @@ -0,0 +1,92 @@ +[rule] +id = "php-symfony-is-granted-attribute" +languages = ["php"] +category = "rbac" +confidence = "high" +description = "Symfony #[IsGranted] attribute (PHP 8+)" +# Matches Symfony's PHP-8-attribute-based authorization: +# `#[IsGranted('ROLE_ADMIN')]`, `#[IsGranted('EDIT', subject: 'post')]`. +# The structure is `attribute_list > attribute_group > attribute` with the +# attribute name and the first string argument captured. The docblock-style +# `@IsGranted` form (a comment) isn't structurally parseable and is out of +# scope here; it can be picked up by the deep pass when it appears. +query = """ +(attribute + [ + (name) @attr_name + (qualified_name (name) @attr_name) + ] + parameters: (arguments + . + (argument + [ + (string (string_content) @role) + (encapsed_string (string_content) @role) + ])) +) @match +""" + +[rule.predicates.attr_name] +eq = "IsGranted" + +[rule.rego_template] +template = """ +default allow := false + +# Symfony #[IsGranted('{{role}}')]. +allow if { + input.user.roles[_] == "{{role}}" +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal.roles.contains("{{role}}") +}; +""" + +[[rule.tests]] +input = """ +isGranted / denyAccessUnlessGranted call (PHP)" +# Matches Symfony's controller authz call sites: `$this->isGranted('ROLE_ADMIN', $subject)` +# and `$this->denyAccessUnlessGranted('EDIT', $post)`. The first argument is +# the attribute (role string or ability slug) — captured so downstream +# tooling can translate the check to a permission predicate. +query = """ +(member_call_expression + name: (name) @method_name + arguments: (arguments + . + (argument + [ + (string (string_content) @attribute) + (encapsed_string (string_content) @attribute) + ])) +) @match +""" + +[rule.predicates.method_name] +match = "^(isGranted|denyAccessUnlessGranted)$" + +[rule.rego_template] +template = """ +default allow := false + +# Symfony ->{{method_name}} {{attribute}}. +allow if { + input.user.roles[_] == "{{attribute}}" +} +""" + +[rule.cedar_template] +template = """ +permit ( + principal, + action, + resource +) +when { + principal.roles.contains("{{attribute}}") +}; +""" + +[[rule.tests]] +input = """ +denyAccessUnlessGranted('EDIT', $post); + return $this->render('post/edit.html.twig'); + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ +isGranted('ROLE_ADMIN')) { + return $this->render('admin.html.twig'); + } + } +} +""" +expect_match = true + +[[rule.tests]] +input = """ + Result> { diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs index 95629da..2a37afe 100644 --- a/src/scanner/discovery.rs +++ b/src/scanner/discovery.rs @@ -30,6 +30,7 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> { "cs" => Some((Language::CSharp, false)), "kt" | "kts" => Some((Language::Kotlin, false)), "rb" | "rake" => Some((Language::Ruby, false)), + "php" | "phtml" => Some((Language::Php, false)), _ => None, } } @@ -233,6 +234,19 @@ mod tests { ); } + #[test] + fn detect_php_extensions() { + assert_eq!( + detect_language(Path::new("foo.php")), + Some((Language::Php, false)) + ); + // Laravel/legacy templating extension. + assert_eq!( + detect_language(Path::new("foo.phtml")), + Some((Language::Php, false)) + ); + } + #[test] fn detect_unknown_extension() { assert_eq!(detect_language(Path::new("foo.rs")), None); @@ -306,16 +320,6 @@ mod tests { assert_eq!(detect_language_for_deep(Path::new("foo.txt")), None); } - #[test] - fn structural_detect_language_does_not_pick_up_unsupported_languages() { - // Sanity: the structural detector must NOT include languages without - // a wired-up tree-sitter grammar — otherwise the structural pass - // would try to parse files it can't handle. The deep detector picks - // them up; the structural one doesn't. (Ruby / Kotlin / C# were here - // before their structural support landed.) - assert_eq!(detect_language(Path::new("foo.php")), None); - } - #[test] fn discover_for_deep_picks_up_extra_languages() { use std::collections::HashSet; @@ -327,6 +331,7 @@ mod tests { fs::write(dir.path().join("d.cs"), "class C {}").unwrap(); fs::write(dir.path().join("e.kt"), "class K\n").unwrap(); fs::write(dir.path().join("f.rb"), "class R\nend\n").unwrap(); + fs::write(dir.path().join("g.php"), " = structural.iter().map(|f| f.language).collect(); @@ -339,8 +344,9 @@ mod tests { Language::CSharp, Language::Kotlin, Language::Ruby, + Language::Php, ]), - "structural should include TS + Python + Go + C# + Kotlin + Ruby", + "structural should include TS + Python + Go + C# + Kotlin + Ruby + PHP", ); let deep = discover_files_for_deep(dir.path(), &[], &[]); @@ -354,8 +360,9 @@ mod tests { Language::CSharp, Language::Kotlin, Language::Ruby, + Language::Php, ]), - "deep should include TS + Python + Go + C# + Kotlin + Ruby", + "deep should include TS + Python + Go + C# + Kotlin + Ruby + PHP", ); } } diff --git a/src/scanner/imports.rs b/src/scanner/imports.rs index a88d682..01dac6f 100644 --- a/src/scanner/imports.rs +++ b/src/scanner/imports.rs @@ -165,6 +165,7 @@ pub fn find_policy_imports( Language::Python => find_py_policy_imports(tree, source), Language::Java => find_java_policy_imports(tree, source), Language::CSharp => find_csharp_policy_imports(tree, source), + Language::Php => find_php_policy_imports(tree, source), // Other languages: no import detection yet. _ => HashSet::new(), }; @@ -577,6 +578,146 @@ fn csharp_node_can_be_policy_path(node: tree_sitter::Node) -> bool { ) } +fn find_php_policy_imports(tree: &tree_sitter::Tree, source: &[u8]) -> HashSet { + let mut policy_names = HashSet::new(); + + iter_named_descendants(tree.root_node(), |node| { + if node.kind() != "namespace_use_declaration" { + return; + } + + // PHP `use` comes in two structural shapes: + // A) `use Company\Policy\Authorize [as A];` — one or more direct + // `namespace_use_clause` children, each carrying the full + // `qualified_name`. + // B) `use Company\Policy\{Authorize, Engine as E};` — a single + // `namespace_name` prefix child plus a `namespace_use_group` + // whose children are leaf `namespace_use_clause`s. + // `use function …` / `use const …` carry a leading `type` field on + // the clause; we don't filter on it — the binding shape is identical + // and any of those names can legitimately point at a policy module. + let mut prefix: Option<&str> = None; + let mut group_node: Option = None; + let mut top_clauses: Vec = Vec::new(); + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + match child.kind() { + "namespace_name" => { + if let Ok(t) = child.utf8_text(source) { + prefix = Some(t); + } + } + "namespace_use_group" => { + group_node = Some(child); + } + "namespace_use_clause" => { + top_clauses.push(child); + } + _ => {} + } + } + + if let Some(group) = group_node { + let pfx = prefix.unwrap_or(""); + let mut g_cursor = group.walk(); + for clause in group.named_children(&mut g_cursor) { + if clause.kind() == "namespace_use_clause" { + process_php_use_clause(clause, source, pfx, &mut policy_names); + } + } + } else { + for clause in top_clauses { + process_php_use_clause(clause, source, "", &mut policy_names); + } + } + }); + + policy_names +} + +/// Resolve one `namespace_use_clause` to its (full path, binding) pair and +/// insert the binding when the path looks policy-y. `prefix` is non-empty +/// only inside a group import (`use Company\Policy\{Authorize, Engine};`); +/// in the simple-form path the clause's own `qualified_name` already carries +/// the full namespace. +fn process_php_use_clause( + clause: tree_sitter::Node, + source: &[u8], + prefix: &str, + out: &mut HashSet, +) { + let alias_node = clause.child_by_field_name("alias"); + let alias = alias_node.and_then(|n| n.utf8_text(source).ok()); + + // Find the qualified_name (simple form) and the leaf `name` (either form). + // We skip the alias `name` child by node id so it isn't mistaken for the + // imported leaf. + let alias_id = alias_node.map(|n| n.id()); + let mut qname: Option = None; + let mut leaf_name: Option<&str> = None; + + let mut cursor = clause.walk(); + for child in clause.named_children(&mut cursor) { + match child.kind() { + "qualified_name" => qname = Some(child), + "name" => { + if Some(child.id()) != alias_id + && let Ok(t) = child.utf8_text(source) + { + leaf_name = Some(t); + } + } + _ => {} + } + } + + // Full path used to decide policy-ness. Group form concatenates + // prefix + "\" + leaf; simple form already has it on the qualified_name. + let full_path = if let Some(qn) = qname { + qn.utf8_text(source).unwrap_or("").to_string() + } else if let Some(leaf) = leaf_name { + if prefix.is_empty() { + leaf.to_string() + } else { + format!("{prefix}\\{leaf}") + } + } else { + return; + }; + + if !is_policy_path(&full_path) { + return; + } + + // Binding actually used in code: alias if present, otherwise the leaf + // name (the last identifier of the qualified path for the simple form, + // or the clause's bare name for the group form). + let binding = if let Some(a) = alias { + a.to_string() + } else if let Some(qn) = qname { + let mut last: Option<&str> = None; + let mut c = qn.walk(); + for child in qn.named_children(&mut c) { + if child.kind() == "name" + && let Ok(t) = child.utf8_text(source) + { + last = Some(t); + } + } + match last { + Some(t) => t.to_string(), + None => return, + } + } else if let Some(leaf) = leaf_name { + leaf.to_string() + } else { + return; + }; + + out.insert(binding); +} + /// Walk the tree once, collecting `(lhs_name, rhs_source_text)` edges from /// assignment-shaped nodes. The propagation step then checks each RHS for /// any current binding and adds the LHS if it matches. @@ -599,6 +740,7 @@ fn extract_propagation_edges( Language::Python => visit_py_edge(node, source, &mut edges), Language::Java => visit_java_edge(node, source, &mut edges), Language::CSharp => visit_csharp_edge(node, source, &mut edges), + Language::Php => visit_php_edge(node, source, &mut edges), _ => {} }); @@ -975,6 +1117,87 @@ fn visit_csharp_edge(node: tree_sitter::Node, source: &[u8], edges: &mut Vec<(St } } +/// Pull a binding-shaped name out of the LHS of a PHP edge. `$x` exposes its +/// trailing identifier (`x`); `$this->guard` propagates the field name +/// (`guard`) so later `$anything->guard(...)` calls match via `\bguard\b`, +/// mirroring the Java `field_access` / TS `member_expression` treatment. +fn php_lhs_name(node: tree_sitter::Node, source: &[u8]) -> Option { + match node.kind() { + "variable_name" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() == "name" { + return child.utf8_text(source).ok().map(str::to_string); + } + } + None + } + "member_access_expression" => node + .child_by_field_name("name") + .and_then(|n| n.utf8_text(source).ok()) + .map(str::to_string), + _ => None, + } +} + +fn visit_php_edge(node: tree_sitter::Node, source: &[u8], edges: &mut Vec<(String, String)>) { + match node.kind() { + // `$x = expr;`, `$this->guard = $guard;`. PHP doesn't separate + // declaration from assignment for locals — every binding update + // comes through this node. + "assignment_expression" => { + let (Some(left), Some(right)) = ( + node.child_by_field_name("left"), + node.child_by_field_name("right"), + ) else { + return; + }; + if let Some(lhs) = php_lhs_name(left, source) { + push_edge(&lhs, right.utf8_text(source).unwrap_or(""), edges); + } + } + // Constructor/method DI: `function __construct(Authorize $guard)`. + // Bind `$guard`'s identifier to the type so that an `Authorize` + // policy binding propagates into method bodies via the parameter. + "simple_parameter" => { + let (Some(name), Some(ty)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("type"), + ) else { + return; + }; + if name.kind() != "variable_name" { + return; + } + let Some(lhs) = php_lhs_name(name, source) else { + return; + }; + let rhs = ty.utf8_text(source).unwrap_or(""); + push_edge(&lhs, rhs, edges); + } + // Class property with an initializer: `private $guard = Foo::DEFAULT;`. + // PHP grammar puts the initializer on the `property_element` under + // field `default_value`; the `variable_name` is field `name`. + "property_element" => { + let (Some(name), Some(value)) = ( + node.child_by_field_name("name"), + node.child_by_field_name("default_value"), + ) else { + return; + }; + if name.kind() != "variable_name" { + return; + } + let Some(lhs) = php_lhs_name(name, source) else { + return; + }; + let rhs = value.utf8_text(source).unwrap_or(""); + push_edge(&lhs, rhs, edges); + } + _ => {} + } +} + fn visit_ts_js_edge(node: tree_sitter::Node, source: &[u8], edges: &mut Vec<(String, String)>) { match node.kind() { // `const x = ...`, `let x = ...`, `var x = ...`. @@ -1532,6 +1755,148 @@ using PolicyAlias = Company.Policy.Authorizer; assert!(!is_enforcement_point("User.IsInRole(\"Admin\")", &imports)); } + // ---------- PHP ---------- + + #[test] + fn php_detects_simple_class_use() { + let source = r#"role === 'admin'", &imports)); + } + + #[test] + fn php_propagates_through_constructor_di_and_field() { + // Symfony-flavoured DI: a controller injects a policy service via + // the constructor and stashes it on a private field. Both `$authz` + // (parameter) and `authz` (field assigned via `$this->authz =`) + // should propagate so later `$this->authz->isGranted(...)` calls + // count as enforcement points. + let source = r#"authz = $authz; + } + + public function check($user, $resource) { + return $this->authz->isGranted($user, $resource); + } +} +"#; + let tree = parse_lang(source, Language::Php); + let imports = find_policy_imports(&tree, source.as_bytes(), Language::Php); + assert!(imports.contains("Authorize")); + assert!( + imports.contains("authz"), + "parameter + field assignment should propagate to `authz`; got: {imports:?}", + ); + assert!(is_enforcement_point( + "$this->authz->isGranted($user, $resource)", + &imports, + )); + } + + #[test] + fn php_propagates_through_property_default_value() { + // `private $guard = Authorize::INSTANCE;` — the property element's + // default_value flows into the property's binding. + let source = r#"helper = $helper; + } +} +"#; + let tree = parse_lang(source, Language::Php); + let imports = find_policy_imports(&tree, source.as_bytes(), Language::Php); + assert!(imports.is_empty(), "got: {imports:?}"); + } + // ---------- Local data-flow propagation (option #2) ---------- #[test] diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs index d22b922..4861a41 100644 --- a/src/scanner/matcher.rs +++ b/src/scanner/matcher.rs @@ -1880,6 +1880,452 @@ end ); } + // -- PHP rule tests -- + + fn parse_and_match_php(source: &str, rule_toml: &str) -> Vec { + let rule = rules::parse_rule_for_test(rule_toml); + let mut ts_parser = tree_sitter::Parser::new(); + let lang = Language::Php; + let ts_lang = parser::get_language(lang, false).unwrap(); + let tree = parser::parse_source(&mut ts_parser, source.as_bytes(), lang, false).unwrap(); + let compiled = compile_rule(&rule, &ts_lang).unwrap(); + execute_query( + &compiled, + &tree, + source.as_bytes(), + Path::new("test.php"), + lang, + ) + .unwrap() + } + + #[test] + fn php_laravel_gate_allows_matches() { + let findings = parse_and_match_php( + r#"id === $post->user_id; }); + } +} +"#, + include_str!("../../rules/php/laravel-gate-define.toml"), + ); + assert!(!findings.is_empty(), "should match Gate::define"); + } + + #[test] + fn php_laravel_authorize_helper_matches() { + let findings = parse_and_match_php( + r#"authorize('update', $post); + $post->save(); + } +} +"#, + include_str!("../../rules/php/laravel-authorize-helper.toml"), + ); + assert!(!findings.is_empty(), "should match $this->authorize"); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn php_laravel_can_helper_matches() { + let findings = parse_and_match_php( + r#"user()->can('view', $post)) { return view('post'); } } } +"#, + include_str!("../../rules/php/laravel-authorize-helper.toml"), + ); + assert!(!findings.is_empty(), "should match ->can()"); + } + + #[test] + fn php_laravel_authorize_helper_excludes_non_principal_receiver() { + // `can`/`cannot` are common method names well outside authz (network + // clients, render gates, feature toggles…). The receiver predicate is + // exactly what keeps this rule's high-confidence promise honest — a + // bare `$widget->can(...)` or `$client->cannot(...)` must NOT fire. + let findings = parse_and_match_php( + r#"can('render'); + $client->cannot('disconnect'); +} +"#, + include_str!("../../rules/php/laravel-authorize-helper.toml"), + ); + assert!( + findings.is_empty(), + "must not match non-principal receivers; got: {findings:?}", + ); + } + + #[test] + fn php_laravel_can_helper_matches_facade_chain() { + // `Auth::user()->can(...)` and `auth()->user()->can(...)` are the + // canonical Laravel non-controller idioms. The `.+->user\(\)` / + // `.+::user\(\)` receiver alternatives are what catch them. + let findings = parse_and_match_php( + r#"can('view', $post)) { return true; } + if (auth()->user()->cannot('delete', $post)) { abort(403); } +} +"#, + include_str!("../../rules/php/laravel-authorize-helper.toml"), + ); + assert_eq!( + findings.len(), + 2, + "should match both Auth::user()->can and auth()->user()->cannot; got: {findings:?}", + ); + } + + #[test] + fn php_laravel_can_helper_matches_nullsafe() { + // PHP 8 nullsafe call (`$user?->can(...)`) takes a separate grammar + // node (`nullsafe_member_call_expression`). The query alternation + // covers it explicitly so the modern idiom matches the same way. + let findings = parse_and_match_php( + r#"can('view', $post); +} +"#, + include_str!("../../rules/php/laravel-authorize-helper.toml"), + ); + assert!( + !findings.is_empty(), + "should match $user?->can(...) nullsafe call; got: {findings:?}", + ); + } + + #[test] + fn php_laravel_policy_class_matches() { + let findings = parse_and_match_php( + r#"id === $post->user_id; } + public function delete(User $user, Post $post) { return $user->is_admin; } +} +"#, + include_str!("../../rules/php/laravel-policy-class.toml"), + ); + // Two policy-verb methods on a *Policy class — each becomes its own + // ability finding. + assert_eq!( + findings.len(), + 2, + "policy class with two ability methods should produce two findings" + ); + } + + #[test] + fn php_laravel_policy_class_excludes_non_policy_class() { + let findings = parse_and_match_php( + r#"group(function () { + Route::get('/dashboard', [DashboardController::class, 'index']); +}); +"#, + include_str!("../../rules/php/laravel-route-middleware.toml"), + ); + assert!( + !findings.is_empty(), + "should match Route::middleware('auth')" + ); + assert_eq!(findings[0].category, crate::types::AuthCategory::Middleware); + } + + #[test] + fn php_laravel_route_middleware_matches_can_alias() { + let findings = parse_and_match_php( + r#"middleware(['auth', 'can:update,post']); +"#, + include_str!("../../rules/php/laravel-route-middleware.toml"), + ); + assert!( + !findings.is_empty(), + "should match chained ->middleware([...]) with auth/can aliases" + ); + } + + #[test] + fn php_laravel_route_middleware_excludes_throttle() { + // Throttle isn't authz — it's rate-limiting. The arg-regex gate is + // what keeps the rule from claiming every `->middleware(...)` call. + let findings = parse_and_match_php( + r#"group(function () {}); +"#, + include_str!("../../rules/php/laravel-route-middleware.toml"), + ); + assert!( + findings.is_empty(), + "must not match throttle/non-authz middleware aliases" + ); + } + + #[test] + fn php_symfony_voter_class_matches() { + let findings = parse_and_match_php( + r#"denyAccessUnlessGranted('EDIT', $post); + } +} +"#, + include_str!("../../rules/php/symfony-is-granted.toml"), + ); + assert!( + !findings.is_empty(), + "should match $this->denyAccessUnlessGranted(...)" + ); + } + + #[test] + fn php_symfony_is_granted_direct_call_matches() { + let findings = parse_and_match_php( + r#"isGranted('EDIT', $post)) { + return; + } + } +} +"#, + include_str!("../../rules/php/symfony-is-granted.toml"), + ); + assert!(!findings.is_empty(), "should match $this->isGranted(...)"); + } + + #[test] + fn php_symfony_is_granted_attribute_matches() { + let findings = parse_and_match_php( + r#"role === 'admin') { return true; } } +"#, + include_str!("../../rules/php/role-equals-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match `$user->role === 'admin'`" + ); + assert_eq!(findings[0].category, crate::types::AuthCategory::Rbac); + } + + #[test] + fn php_role_equals_check_loose_matches() { + // Loose `==` is also widely used in real PHP code; the alternation + // covers both forms. + let findings = parse_and_match_php( + r#"account_type == "enterprise"; } +"#, + include_str!("../../rules/php/role-equals-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match loose `==` role comparison" + ); + } + + #[test] + fn php_role_equals_check_excludes_non_role_property() { + let findings = parse_and_match_php( + r#"name === 'admin') { echo "hi"; } } +"#, + include_str!("../../rules/php/role-equals-check.toml"), + ); + assert!( + findings.is_empty(), + "must not match property comparisons whose name isn't role-shaped" + ); + } + + #[test] + fn php_in_array_role_check_matches() { + let findings = parse_and_match_php( + r#"roles); } +"#, + include_str!("../../rules/php/in-array-role-check.toml"), + ); + assert!( + !findings.is_empty(), + "should match `in_array('manager', $user->roles)`" + ); + } + + #[test] + fn php_in_array_role_check_excludes_unrelated_collection() { + let findings = parse_and_match_php( + r#"tags); } +"#, + include_str!("../../rules/php/in-array-role-check.toml"), + ); + assert!( + findings.is_empty(), + "must not match in_array against unrelated collections" + ); + } + + #[test] + fn php_has_role_call_matches() { + let findings = parse_and_match_php( + r#"hasRole('admin'); } +"#, + include_str!("../../rules/php/has-role-call.toml"), + ); + assert!(!findings.is_empty(), "should match `->hasRole('admin')`"); + } + + #[test] + fn php_has_role_call_excludes_unrelated_predicate() { + // `hasMany` is an Eloquent relation, not authz; the method-name + // predicate is exactly what keeps this rule from claiming + // `->hasMany('Comment')`. + let findings = parse_and_match_php( + r#"hasMany('App\Comment'); } +"#, + include_str!("../../rules/php/has-role-call.toml"), + ); + assert!( + findings.is_empty(), + "must not match `->hasMany` or other unrelated predicates" + ); + } + // -- cross_predicates tests (synthetic rules) -- /// A synthetic rule shaped like ownership-check: two getters in an diff --git a/src/scanner/parser.rs b/src/scanner/parser.rs index 07729d4..616dbbd 100644 --- a/src/scanner/parser.rs +++ b/src/scanner/parser.rs @@ -19,7 +19,7 @@ pub fn get_language(lang: Language, is_tsx_jsx: bool) -> Result Ok(tree_sitter_c_sharp::LANGUAGE.into()), (Language::Kotlin, _) => Ok(tree_sitter_kotlin_ng::LANGUAGE.into()), (Language::Ruby, _) => Ok(tree_sitter_ruby::LANGUAGE.into()), - _ => Err(ZiftError::UnsupportedLanguage(lang)), + (Language::Php, _) => Ok(tree_sitter_php::LANGUAGE_PHP.into()), } } @@ -42,7 +42,6 @@ pub fn parse_source( #[cfg(test)] mod tests { use super::*; - use crate::error::ZiftError; #[test] fn parse_typescript() { @@ -154,13 +153,15 @@ public class AdminController : ControllerBase { } #[test] - fn unsupported_language_returns_error() { - // PHP has no structural grammar wired up yet — kept as the canary - // that `unsupported_language_returns_error` keeps testing what its - // name says it does. (Was Ruby before Ruby structural support; - // Kotlin before that; C# before that.) - let err = get_language(Language::Php, false).unwrap_err(); - assert!(matches!(err, ZiftError::UnsupportedLanguage(Language::Php))); - assert!(!is_language_supported(Language::Php)); + fn parse_php() { + let mut parser = tree_sitter::Parser::new(); + let source = b"authz->isGranted(...)`. + // The PHP import tracker should seed `Authorize` and propagate through + // the parameter (`authz`) and the field (`authz` again) so the + // `isGranted` call gets rerouted into the enforcement-point counter + // rather than firing the structural Symfony rule. + let result = scan_fixture( + "PostController.php", + r#"authz = $authz; + } + + public function edit($post) { + return $this->authz->isGranted('EDIT', $post); + } +} +"#, + ); + + assert!( + result.enforcement_points >= 1, + "expected the PHP policy DI isGranted call to count as an enforcement point; \ + got {} (findings: {:?})", + result.enforcement_points, + result + .findings + .iter() + .map(|f| (f.pattern_rule.clone(), f.line_start)) + .collect::>(), + ); + assert!( + !result + .findings + .iter() + .any(|f| f.pattern_rule.as_deref() == Some("php-symfony-is-granted")), + "policy-routed PHP isGranted call leaked into findings: {:?}", + result.findings, + ); +} + #[test] fn in_package_policy_implementation_file_is_skipped() { // OCP case: `internal/authz/authz_test.go` lives in `package authz` and