From 357ccf797a4590bdbaacdc2b3e73ebec5d22512c Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Tue, 16 Jun 2026 09:07:36 +1000 Subject: [PATCH 1/3] feat: add security analysis support for internal compsite GitHub Actions Signed-off-by: behnazh-w --- .../dataflow_analysis/analysis.py | 37 +++- .../code_analyzer/dataflow_analysis/github.py | 189 +++++++++++++++++- .../gha_security_analysis/detect_injection.py | 4 +- .../github_actions/github_actions_ci.py | 35 +++- .../ci_service/test_github_actions.py | 101 +++++++++- 5 files changed, 350 insertions(+), 16 deletions(-) diff --git a/src/macaron/code_analyzer/dataflow_analysis/analysis.py b/src/macaron/code_analyzer/dataflow_analysis/analysis.py index 1fed33070..6d729157c 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/analysis.py +++ b/src/macaron/code_analyzer/dataflow_analysis/analysis.py @@ -5,7 +5,9 @@ from __future__ import annotations +import os from collections.abc import Iterable +from typing import cast from macaron.code_analyzer.dataflow_analysis import bash, core, evaluation, facts, github, printing from macaron.errors import CallGraphError @@ -51,7 +53,11 @@ def analyse_github_workflow_file(workflow_path: str, repo_path: str | None, dump def analyse_github_workflow( - workflow: github_workflow_model.Workflow, workflow_source_path: str, repo_path: str | None, dump_debug: bool = False + workflow: github_workflow_model.Workflow, + workflow_source_path: str, + repo_path: str | None, + dump_debug: bool = False, + local_action_stack: tuple[str, ...] = (), ) -> core.Node: """Perform dataflow analysis for GitHub Actions Workflow. @@ -75,7 +81,9 @@ def analyse_github_workflow( analysis_context = core.OwningContextRef(core.AnalysisContext(repo_path)) core.reset_debug_sequence_number() - raw_workflow_node = github.RawGitHubActionsWorkflowNode.create(workflow, analysis_context, workflow_source_path) + raw_workflow_node = github.RawGitHubActionsWorkflowNode.create( + workflow, analysis_context, workflow_source_path, local_action_stack + ) core.increment_debug_sequence_number() raw_workflow_node.analyse() @@ -89,6 +97,31 @@ def analyse_github_workflow( return raw_workflow_node +def analyse_github_composite_action_file( + action_path: str, repo_path: str | None, dump_debug: bool = False +) -> core.Node: + """Perform dataflow analysis for a standalone local composite GitHub Action metadata file. + + The action is wrapped in a synthetic workflow so existing GitHub Actions analyses can traverse + its nested ``run`` and ``uses`` steps without a separate root node type. + """ + steps = github.parse_composite_action_steps(action_path) + workflow = cast( + github_workflow_model.Workflow, + { + "name": action_path, + "on": "workflow_call", + "jobs": { + "composite-action": { + "runs-on": "ubuntu-latest", + "steps": steps, + } + }, + }, + ) + return analyse_github_workflow(workflow, action_path, repo_path, dump_debug, (os.path.abspath(action_path),)) + + def analyse_bash_script( bash_content: str, source_path: str, repo_path: str | None, dump_debug: bool = False ) -> core.Node: diff --git a/src/macaron/code_analyzer/dataflow_analysis/github.py b/src/macaron/code_analyzer/dataflow_analysis/github.py index 7ad01ab20..6962e771f 100644 --- a/src/macaron/code_analyzer/dataflow_analysis/github.py +++ b/src/macaron/code_analyzer/dataflow_analysis/github.py @@ -5,10 +5,14 @@ from __future__ import annotations +import os from collections import defaultdict from collections.abc import Callable, Iterator from dataclasses import dataclass from graphlib import TopologicalSorter +from typing import cast + +import yamale from macaron.code_analyzer.dataflow_analysis import ( # pylint: disable=cyclic-import bash, @@ -19,10 +23,75 @@ models, printing, ) -from macaron.errors import CallGraphError +from macaron.errors import CallGraphError, ParseError from macaron.parsers import github_workflow_model +def parse_composite_action_steps(action_path: str) -> list[github_workflow_model.Step]: + """Parse the steps from a local composite GitHub Action metadata file. + + Parameters + ---------- + action_path: str + Path to an ``action.yml`` or ``action.yaml`` metadata file. + + Returns + ------- + list[github_workflow_model.Step] + The composite action steps, or an empty list when the action is not a composite action. + + Raises + ------ + ParseError + When the action metadata file cannot be parsed. + """ + try: + parse_result = yamale.make_data(action_path, parser="ruamel") + except OSError as error: + raise ParseError("Cannot parse GitHub Action metadata: " + action_path) from error + + if len(parse_result) != 1 or not isinstance(parse_result[0][0], dict): + raise ParseError("Cannot parse GitHub Action metadata: " + action_path) + + action = parse_result[0][0] + runs = action.get("runs") + if not isinstance(runs, dict) or runs.get("using") != "composite": + return [] + + steps = runs.get("steps", []) + if not isinstance(steps, list): + raise ParseError("Cannot parse composite GitHub Action steps: " + action_path) + + return cast(list[github_workflow_model.Step], steps) + + +def resolve_local_action_metadata_path(repo_path: str | None, uses_name: str) -> str | None: + """Resolve a local ``uses: ./...`` action reference to its metadata file path.""" + if repo_path is None or not uses_name.startswith("./"): + return None + + action_dir = os.path.normpath(os.path.join(repo_path, uses_name)) + repo_root = os.path.abspath(repo_path) + abs_action_dir = os.path.abspath(action_dir) + if os.path.commonpath([repo_root, abs_action_dir]) != repo_root: + return None + + for metadata_name in ("action.yml", "action.yaml"): + candidate = os.path.join(action_dir, metadata_name) + if os.path.isfile(candidate): + return candidate + + return None + + +def split_uses_name_version(uses: str) -> tuple[str, str | None]: + """Split a GitHub Actions ``uses`` value into name and optional version.""" + uses_name, separator, uses_version = uses.rpartition("@") + if not separator: + return uses, None + return uses_name, uses_version + + @dataclass(frozen=True) class GitHubActionsWorkflowContext(core.Context): """Context for the top-level scope of a GitHub Actions Workflow.""" @@ -41,10 +110,14 @@ class GitHubActionsWorkflowContext(core.Context): console: core.ContextRef[facts.Scope] #: Filepath of workflow file. source_filepath: str + #: Local composite action metadata paths currently being expanded. + local_action_stack: tuple[str, ...] @staticmethod def create( - analysis_context: core.ContextRef[core.AnalysisContext], source_filepath: str + analysis_context: core.ContextRef[core.AnalysisContext], + source_filepath: str, + local_action_stack: tuple[str, ...] = (), ) -> GitHubActionsWorkflowContext: """Create a new workflow context and its associated scopes. @@ -68,6 +141,7 @@ def create( workflow_variables=core.OwningContextRef(facts.Scope("workflow_vars")), console=core.OwningContextRef(facts.Scope("console")), source_filepath=source_filepath, + local_action_stack=local_action_stack, ) def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]: @@ -135,9 +209,15 @@ class GitHubActionsStepContext(core.Context): #: Name prefix for step output variables (stored in the job variables) #: belonging to this step (e.g. "steps.step_id.outputs.") output_var_prefix: str | None + #: Local composite action metadata paths currently being expanded. + local_action_stack: tuple[str, ...] @staticmethod - def create(job_context: core.ContextRef[GitHubActionsJobContext], step_id: str | None) -> GitHubActionsStepContext: + def create( + job_context: core.ContextRef[GitHubActionsJobContext], + step_id: str | None, + local_action_stack: tuple[str, ...] = (), + ) -> GitHubActionsStepContext: """Create a new step context and its associated scopes. Env scope inherits from outer context. Output var prefix is derived from step_id. @@ -158,6 +238,7 @@ def create(job_context: core.ContextRef[GitHubActionsJobContext], step_id: str | job_context=job_context.get_non_owned(), env=core.OwningContextRef(facts.Scope("env", job_context.ref.env.ref)), output_var_prefix=("steps." + step_id + ".outputs.") if step_id is not None else None, + local_action_stack=local_action_stack, ) def direct_refs(self) -> Iterator[core.ContextRef[core.Context] | core.ContextRef[facts.Scope]]: @@ -216,6 +297,7 @@ def create( workflow: github_workflow_model.Workflow, analysis_context: core.ContextRef[core.AnalysisContext], source_filepath: str, + local_action_stack: tuple[str, ...] = (), ) -> RawGitHubActionsWorkflowNode: """Create workflow node and its associated context. @@ -233,7 +315,7 @@ def create( RawGitHubActionsWorkflowNode The new workflow node. """ - workflow_context = GitHubActionsWorkflowContext.create(analysis_context, source_filepath) + workflow_context = GitHubActionsWorkflowContext.create(analysis_context, source_filepath, local_action_stack) return RawGitHubActionsWorkflowNode(workflow, core.OwningContextRef(workflow_context)) @@ -435,7 +517,7 @@ def build_normal_job() -> core.Node: if isinstance(raw_with_params, dict): def build_reusable_workflow_call_job() -> core.Node: - uses_name, _, uses_version = call_def["uses"].rpartition("@") + uses_name, uses_version = split_uses_name_version(call_def["uses"]) with_parameters: dict[str, facts.Value] = {} for key, val in raw_with_params.items(): @@ -455,7 +537,7 @@ def build_reusable_workflow_call_job() -> core.Node: self.job_id, self.context.get_non_owned(), uses_name, - uses_version if uses_version != "" else None, + uses_version, with_parameters, ) @@ -614,7 +696,14 @@ def create( steps = [ RawGitHubActionsStepNode( - step, core.OwningContextRef(GitHubActionsStepContext.create(context, step.get("id"))) + step, + core.OwningContextRef( + GitHubActionsStepContext.create( + context, + step.get("id"), + context.ref.workflow_context.ref.local_action_stack, + ) + ), ) for step in job.get("steps", []) ] @@ -793,7 +882,7 @@ def identify_interpretations(self, state: core.State) -> dict[core.Interpretatio if isinstance(raw_with_params, dict): def build_action() -> core.Node: - uses_name, _, uses_version = self.definition["uses"].rpartition("@") + uses_name, uses_version = split_uses_name_version(self.definition["uses"]) with_parameters: dict[str, facts.Value] = {} for key, val in raw_with_params.items(): @@ -812,7 +901,7 @@ def build_action() -> core.Node: self.definition, self.context.get_non_owned(), uses_name, - uses_version if uses_version != "" else None, + uses_version, with_parameters, ) @@ -893,6 +982,33 @@ def __init__( def identify_interpretations(self, state: core.State) -> dict[core.InterpretationKey, Callable[[], core.Node]]: """Intepret the semantics of the different supported actions.""" + if self.uses_name.startswith("./"): + action_path = resolve_local_action_metadata_path( + self.context.ref.job_context.ref.workflow_context.ref.analysis_context.ref.repo_path, + self.uses_name, + ) + if action_path: + abs_action_path = os.path.abspath(action_path) + if abs_action_path in self.context.ref.local_action_stack: + + def build_noop_for_recursive_local_action() -> core.Node: + return core.NoOpStatementNode() + + return {"default": build_noop_for_recursive_local_action} + + def build_composite_action() -> core.Node: + steps = parse_composite_action_steps(action_path) + if not steps: + return core.NoOpStatementNode() + return GitHubActionsCompositeActionNode.create( + action_path, + steps, + self.context.ref.job_context.get_non_owned(), + self.context.ref.local_action_stack + (abs_action_path,), + ) + + return {"default": build_composite_action} + match self.uses_name: case "actions/checkout": @@ -1054,6 +1170,61 @@ def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str] return result +class GitHubActionsCompositeActionNode(core.ControlFlowGraphNode): + """Control-flow-graph node representing a local composite GitHub Action.""" + + #: Path to the composite action metadata file. + action_path: str + #: Nested action steps, in execution order. + steps: list[RawGitHubActionsStepNode] + #: Control flow graph. + _cfg: core.ControlFlowGraph + + def __init__(self, action_path: str, steps: list[RawGitHubActionsStepNode]) -> None: + """Initialize a local composite action node.""" + super().__init__() + self.action_path = action_path + self.steps = steps + self._cfg = core.ControlFlowGraph.create_from_sequence(steps) + + def children(self) -> Iterator[core.Node]: + """Yield the nested composite action steps.""" + yield from self.steps + + def get_entry(self) -> core.Node: + """Return the entry node.""" + return self._cfg.get_entry() + + def get_successors(self, node: core.Node, exit_type: core.ExitType) -> set[core.Node | core.ExitType]: + """Return the successors for a particular exit of a particular node.""" + return self._cfg.get_successors(node, core.DEFAULT_EXIT) + + def get_printable_properties_table(self) -> dict[str, set[tuple[str | None, str]]]: + """Return a properties table containing the local action path.""" + return {"local action": {(None, self.action_path)}} + + @staticmethod + def create( + action_path: str, + steps: list[github_workflow_model.Step], + job_context: core.NonOwningContextRef[GitHubActionsJobContext], + local_action_stack: tuple[str, ...], + ) -> GitHubActionsCompositeActionNode: + """Create a composite action node from parsed metadata steps.""" + return GitHubActionsCompositeActionNode( + action_path, + [ + RawGitHubActionsStepNode( + step, + core.OwningContextRef( + GitHubActionsStepContext.create(job_context, step.get("id"), local_action_stack) + ), + ) + for step in steps + ], + ) + + class GitHubActionsRunStepNode(core.ControlFlowGraphNode): """Control-flow-graph node representing a GitHub Actions Run Step. diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index 80364ea76..d80036b8f 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -310,10 +310,10 @@ def _extract_statement_words(statement_node: bash.BashStatementNode) -> list[str def _extract_call_words(call_expr: CallExpr) -> list[str]: """Extract literal word values from a call expression.""" - args = call_expr["Args"] + args = call_expr.get("Args", []) words: list[str] = [] for arg in args: - parts = arg["Parts"] + parts = arg.get("Parts", []) word = "".join(part.get("Value", "") for part in parts if is_lit(part)).strip() if not word: return [] diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index d222ee011..d270906f0 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -11,8 +11,12 @@ import traceback from datetime import datetime, timedelta, timezone -from macaron.code_analyzer.dataflow_analysis.analysis import analyse_github_workflow_file -from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest +from macaron.code_analyzer.dataflow_analysis import github +from macaron.code_analyzer.dataflow_analysis.analysis import ( + analyse_github_composite_action_file, + analyse_github_workflow_file, +) +from macaron.code_analyzer.dataflow_analysis.core import Node, NodeForest, traverse_bfs from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.errors import CallGraphError, GitHubActionsValueError, ParseError @@ -608,6 +612,7 @@ def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeFo Workflows that raise ``ParseError`` are skipped. """ nodes: list[Node] = [] + reachable_action_paths: set[str] = set() for workflow_path in files: try: workflow_node = analyse_github_workflow_file(workflow_path, repo_path) @@ -617,8 +622,34 @@ def build_call_graph_for_files(self, files: list[str], repo_path: str) -> NodeFo logger.debug("Reason: %s", traceback.format_exc()) continue nodes.append(workflow_node) + for node in traverse_bfs(workflow_node): + if isinstance(node, github.GitHubActionsCompositeActionNode): + reachable_action_paths.add(os.path.abspath(node.action_path)) + + for action_path in self._get_local_action_metadata_files(repo_path): + if os.path.abspath(action_path) in reachable_action_paths: + continue + try: + action_node = analyse_github_composite_action_file(action_path, repo_path) + except (ParseError, CallGraphError): + logger.debug("Skip adding local action at %s to the callgraph.", action_path) + logger.debug("Reason: %s", traceback.format_exc()) + continue + nodes.append(action_node) return NodeForest(nodes) + def _get_local_action_metadata_files(self, repo_path: str) -> list[str]: + """Get root and ``.github/actions`` local action metadata files in a repository.""" + action_files = [] + for metadata_name in ("action.yml", "action.yaml"): + root_action = os.path.join(repo_path, metadata_name) + if os.path.isfile(root_action): + action_files.append(root_action) + action_files.extend( + glob.glob(os.path.join(repo_path, ".github", "actions", "**", metadata_name), recursive=True) + ) + return sorted(set(action_files)) + def get_third_party_configurations(self) -> list[str]: """Get the list of third-party CI configuration files. diff --git a/tests/slsa_analyzer/ci_service/test_github_actions.py b/tests/slsa_analyzer/ci_service/test_github_actions.py index 4da4f7d2a..47cb506e6 100644 --- a/tests/slsa_analyzer/ci_service/test_github_actions.py +++ b/tests/slsa_analyzer/ci_service/test_github_actions.py @@ -1,13 +1,17 @@ -# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2026, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests GitHub Actions CI service.""" +import os from datetime import datetime, timedelta from pathlib import Path import pytest +from macaron.code_analyzer.dataflow_analysis import github +from macaron.code_analyzer.dataflow_analysis.core import traverse_bfs +from macaron.code_analyzer.gha_security_analysis.detect_injection import detect_github_actions_security_issues from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions mock_repos = Path(__file__).parent.joinpath("mock_repos") @@ -52,6 +56,101 @@ def test_gh_get_workflows_fail_on_jenkins(github_actions: GitHubActions) -> None assert not github_actions.get_workflows(str(jenkins_build)) +def test_build_call_graph_expands_reachable_composite_actions(github_actions: GitHubActions, tmp_path: Path) -> None: + """Nested steps in reachable local composite actions are included in the callgraph.""" + repo_path = tmp_path + workflow_dir = repo_path.joinpath(".github", "workflows") + action_dir = repo_path.joinpath(".github", "actions", "setup") + workflow_dir.mkdir(parents=True) + action_dir.mkdir(parents=True) + workflow_path = workflow_dir.joinpath("ci.yml") + workflow_path.write_text( + """ +name: ci +on: push +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: ./.github/actions/setup +""", + encoding="utf-8", + ) + action_dir.joinpath("action.yml").write_text( + """ +name: setup +runs: + using: composite + steps: + - uses: actions/setup-node@v4 +""", + encoding="utf-8", + ) + + callgraph = github_actions.build_call_graph_for_files([str(workflow_path)], str(repo_path)) + + action_steps = [ + node.uses_name + for root in callgraph.root_nodes + for node in traverse_bfs(root) + if isinstance(node, github.GitHubActionsActionStepNode) + ] + assert action_steps == ["./.github/actions/setup", "actions/setup-node"] + assert [finding["workflow_name"] for finding in detect_github_actions_security_issues(callgraph)] == [ + os.path.relpath(workflow_path, Path.cwd()), + ] + + +def test_build_call_graph_adds_unreachable_composite_actions_as_roots( + github_actions: GitHubActions, tmp_path: Path +) -> None: + """Unreachable local composite actions are added as independent callgraph roots.""" + repo_path = tmp_path + workflow_dir = repo_path.joinpath(".github", "workflows") + action_dir = repo_path.joinpath(".github", "actions", "unused") + workflow_dir.mkdir(parents=True) + action_dir.mkdir(parents=True) + workflow_path = workflow_dir.joinpath("ci.yml") + workflow_path.write_text( + """ +name: ci +on: push +jobs: + build: + runs-on: ubuntu-latest + steps: + - run: echo ok +""", + encoding="utf-8", + ) + action_path = action_dir.joinpath("action.yaml") + action_path.write_text( + """ +name: unused +runs: + using: composite + steps: + - uses: actions/cache@v4 +""", + encoding="utf-8", + ) + + callgraph = github_actions.build_call_graph_for_files([str(workflow_path)], str(repo_path)) + + root_paths = [ + node.context.ref.source_filepath + for root in callgraph.root_nodes + for node in traverse_bfs(root) + if isinstance(node, github.GitHubActionsWorkflowNode) + ] + assert root_paths == [str(workflow_path), str(action_path)] + assert any( + isinstance(node, github.GitHubActionsActionStepNode) and node.uses_name == "actions/cache" + for root in callgraph.root_nodes + for node in traverse_bfs(root) + ) + + @pytest.mark.parametrize( ("started_at", "publish_date_time", "commit_date_time", "time_range", "expected"), [ From 975932743df7fa8e8a76433381461b380ff8079c Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Thu, 25 Jun 2026 14:35:13 +1000 Subject: [PATCH 2/3] feat: add composite action inputs to taint sources and support eval Signed-off-by: behnazh-w --- .../gha_security_analysis/detect_injection.py | 246 +++++++++++++++++- .../gha_security_analysis/recommendation.py | 4 + .../test_gha_security_analysis.py | 49 ++++ 3 files changed, 298 insertions(+), 1 deletion(-) diff --git a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py index d80036b8f..8ebdccb27 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py +++ b/src/macaron/code_analyzer/gha_security_analysis/detect_injection.py @@ -6,6 +6,7 @@ import json import os import re +import shlex from typing import TypedDict, cast from macaron.code_analyzer.dataflow_analysis import bash, core, facts @@ -232,7 +233,10 @@ def _append_action_step_findings( if "pull_request_target" in on_keys and ref in UNTRUSTED_PR_REFS: _add_finding( findings, - f"pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out PR-controlled ref `{ref}`.", + ( + "pr-target-untrusted-checkout: Workflow uses pull_request_target and checks out " + f"PR-controlled ref `{ref}`." + ), PRIORITY_CRITICAL, ) @@ -241,6 +245,8 @@ def _append_run_step_findings( findings: list[PrioritizedIssue], run_step_node: GitHubActionsRunStepNode, nodes: NodeForest ) -> None: """Append findings derived from a run step node.""" + append_composite_action_script_injection_findings(findings, run_step_node) + # Traversing a run-step subgraph can reach semantically identical command nodes through # multiple CFG/AST paths (for example nested/compound command structures). Track emitted # injection findings by stable metadata to avoid duplicate reports for the same command line. @@ -256,6 +262,244 @@ def _append_run_step_findings( _append_remote_script_exec_findings(findings, node, run_step_node, nodes) +def append_composite_action_script_injection_findings( + findings: list[PrioritizedIssue], + run_step_node: GitHubActionsRunStepNode, +) -> None: + """Append findings for composite action inputs reaching ``eval`` in local scripts. + + Parameters + ---------- + findings : list[PrioritizedIssue] + The finding collection to update. + run_step_node : GitHubActionsRunStepNode + The composite action run step to inspect. + """ + tainted_env_vars = extract_env_vars_from_action_inputs(run_step_node.definition.get("env")) + if not tainted_env_vars: + return + + for script_path in extract_local_shell_scripts_from_run_step(run_step_node): + eval_findings = find_eval_of_tainted_shell_values(script_path, tainted_env_vars) + for script_line, command_text, tainted_values in eval_findings: + workflow_line = _extract_run_step_line(run_step_node) + issue_payload = { + "step_line": workflow_line, + "script_path": os.path.relpath(script_path, os.getcwd()), + "script_line": script_line, + "step": _extract_step_name(run_step_node), + "command": command_text, + "tainted_env": sorted(tainted_values), + } + _add_finding( + findings, + f"composite-action-script-injection: {json.dumps(issue_payload)}", + PRIORITY_CRITICAL, + ) + + +def extract_env_vars_from_action_inputs(env: object) -> set[str]: + """Return env var names whose values come from composite action inputs. + + Parameters + ---------- + env : object + A parsed GitHub Actions ``env`` block. + + Returns + ------- + set[str] + Environment variable names assigned from ``${{ inputs.* }}`` expressions. + """ + if not isinstance(env, dict): + return set() + + result: set[str] = set() + for key, value in env.items(): + if not isinstance(key, str) or not isinstance(value, str): + continue + if re.search(r"\$\{\{\s*inputs\.[A-Za-z0-9_-]+", value): + result.add(key) + return result + + +def extract_local_shell_scripts_from_run_step(run_step_node: GitHubActionsRunStepNode) -> list[str]: + """Resolve local shell scripts invoked by a composite action run step. + + Parameters + ---------- + run_step_node : GitHubActionsRunStepNode + The run step whose shell command should be inspected. + + Returns + ------- + list[str] + Absolute paths to local ``.sh`` scripts invoked by the run step. + """ + run_script = run_step_node.definition["run"] + workflow_path = run_step_node.context.ref.job_context.ref.workflow_context.ref.source_filepath + if not workflow_path: + return [] + + action_dir = os.path.dirname(os.path.abspath(workflow_path)) + script_paths: list[str] = [] + for line in run_script.splitlines(): + try: + words = shlex.split(line, comments=True) + except ValueError: + continue + for word in words: + if not word.endswith(".sh"): + continue + resolved = resolve_local_action_script_path(word, action_dir) + if resolved and resolved not in script_paths: + script_paths.append(resolved) + return script_paths + + +def resolve_local_action_script_path(path: str, action_dir: str) -> str | None: + """Resolve a script path that may use ``GITHUB_ACTION_PATH``. + + Parameters + ---------- + path : str + The script path from the run step. + action_dir : str + Absolute path to the composite action directory. + + Returns + ------- + str | None + The absolute script path when it exists under ``action_dir``; otherwise ``None``. + """ + expanded = path.replace("${GITHUB_ACTION_PATH}", action_dir).replace("$GITHUB_ACTION_PATH", action_dir) + if not os.path.isabs(expanded): + expanded = os.path.join(action_dir, expanded) + + normalized = os.path.abspath(os.path.normpath(expanded)) + if os.path.commonpath([action_dir, normalized]) != action_dir: + return None + if not os.path.isfile(normalized): + return None + return normalized + + +def find_eval_of_tainted_shell_values(script_path: str, tainted_env_vars: set[str]) -> list[tuple[int, str, set[str]]]: + """Find ``eval`` commands whose arguments can include tainted action inputs. + + Parameters + ---------- + script_path : str + Path to the shell script to inspect. + tainted_env_vars : set[str] + Environment variables assigned from composite action inputs. + + Returns + ------- + list[tuple[int, str, set[str]]] + Tuples of script line, command text, and tainted shell values reaching ``eval``. + """ + try: + with open(script_path, encoding="utf-8") as script_file: + lines = script_file.readlines() + except OSError: + return [] + + tainted_shell_vars: set[str] = set() + findings: list[tuple[int, str, set[str]]] = [] + for line_number, line in enumerate(lines, start=1): + stripped = line.strip() + if not stripped or stripped.startswith("#"): + continue + + assigned_var = extract_shell_assignment_name(stripped) + if assigned_var and shell_text_references_any_var(stripped, tainted_env_vars | tainted_shell_vars): + tainted_shell_vars.add(assigned_var) + + if not is_eval_command(stripped): + continue + + tainted_refs = referenced_vars(stripped, tainted_env_vars | tainted_shell_vars) + if tainted_refs: + findings.append((line_number, stripped, tainted_refs)) + return findings + + +def extract_shell_assignment_name(line: str) -> str | None: + """Extract the assigned variable name from a simple shell assignment. + + Parameters + ---------- + line : str + Shell source line. + + Returns + ------- + str | None + The assigned variable name, or ``None`` when the line is not a simple assignment. + """ + match = re.match(r"^(?:local\s+|export\s+|readonly\s+)?([A-Za-z_][A-Za-z0-9_]*)\+?=", line) + if not match: + return None + return match.group(1) + + +def is_eval_command(line: str) -> bool: + """Return whether a shell line invokes ``eval`` as a command. + + Parameters + ---------- + line : str + Shell source line. + + Returns + ------- + bool + ``True`` when the line invokes ``eval``. + """ + return bool(re.search(r"(?:^|[;&|]\s*|\b(?:if|then|do)\s+)eval(?:\s|$)", line)) + + +def shell_text_references_any_var(text: str, variables: set[str]) -> bool: + """Return whether shell text references any variable. + + Parameters + ---------- + text : str + Shell text to inspect. + variables : set[str] + Variable names to search for. + + Returns + ------- + bool + ``True`` when ``text`` references at least one variable. + """ + return bool(referenced_vars(text, variables)) + + +def referenced_vars(text: str, variables: set[str]) -> set[str]: + """Return shell variables referenced as ``$VAR`` or ``${VAR}``. + + Parameters + ---------- + text : str + Shell text to inspect. + variables : set[str] + Variable names to search for. + + Returns + ------- + set[str] + Variables referenced in ``text``. + """ + refs: set[str] = set() + for variable in variables: + if re.search(rf"\$(?:\{{{re.escape(variable)}(?::[-=?+][^}}]*)?\}}|{re.escape(variable)}\b)", text): + refs.add(variable) + return refs + + def _append_remote_script_exec_findings( findings: list[PrioritizedIssue], pipe_node: bash.BashPipeNode, diff --git a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py index 332add599..82dcf8d1d 100644 --- a/src/macaron/code_analyzer/gha_security_analysis/recommendation.py +++ b/src/macaron/code_analyzer/gha_security_analysis/recommendation.py @@ -192,6 +192,10 @@ def recommend_for_workflow_issue(issue: str) -> Recommendation: return Recommendation("Never combine pull_request_target with checkout of PR-controlled refs.") if issue.startswith("potential-injection:"): return Recommendation("Treat GitHub context data as untrusted input; quote/sanitize before shell execution.") + if issue.startswith("composite-action-script-injection:"): + return Recommendation( + "Do not pass composite action inputs into shell commands evaluated by eval; build commands as argv arrays." + ) return Recommendation("Review this workflow finding and apply least-privilege hardening controls.") diff --git a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py index cf4990a16..a0cc3609f 100644 --- a/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py +++ b/tests/code_analyzer/gha_security_analysis/test_gha_security_analysis.py @@ -38,6 +38,55 @@ def test_detect_github_actions_security_issues( assert detect_github_actions_security_issues(callgraph) == snapshot +def test_detect_composite_action_input_eval_in_external_script(tmp_path: Path) -> None: + """Detect composite action inputs flowing through env into eval in a local shell script.""" + scripts_dir = tmp_path / "scripts" + scripts_dir.mkdir() + script_path = scripts_dir / "run.sh" + script_path.write_text( + "\n".join( + [ + "#!/usr/bin/env bash", + "CMD=\"tool --package-url ${PACKAGE_URL}\"", + "if eval \"$CMD\"; then", + " echo ok", + "fi", + ] + ), + encoding="utf-8", + ) + (tmp_path / "action.yaml").write_text( + "\n".join( + [ + "name: vulnerable composite action", + "inputs:", + " package_url:", + " description: Package URL", + "runs:", + " using: composite", + " steps:", + " - name: Run helper", + " run: bash \"$GITHUB_ACTION_PATH/scripts/run.sh\"", + " shell: bash", + " env:", + " PACKAGE_URL: ${{ inputs.package_url }}", + ] + ), + encoding="utf-8", + ) + + callgraph = GitHubActions().build_call_graph_for_files([], str(tmp_path)) + findings = detect_github_actions_security_issues(callgraph) + issues = [issue["issue"] for finding in findings for issue in finding["issues"]] + + assert any( + issue.startswith("composite-action-script-injection:") + and "scripts/run.sh" in issue + and 'if eval \\"$CMD\\"; then' in issue + for issue in issues + ) + + def test_extract_workflow_issue_line_from_potential_injection() -> None: """Extract the source line from a potential-injection issue payload.""" issue = ( From 416e7b14ae95ab3d81d1266a309984cbbd87a8f9 Mon Sep 17 00:00:00 2001 From: behnazh-w Date: Mon, 29 Jun 2026 09:22:44 +1000 Subject: [PATCH 3/3] test: trigger the internal action Signed-off-by: behnazh-w --- .github/workflows/test_macaron_action.yaml | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/test_macaron_action.yaml b/.github/workflows/test_macaron_action.yaml index dd580d425..fb7b8d225 100644 --- a/.github/workflows/test_macaron_action.yaml +++ b/.github/workflows/test_macaron_action.yaml @@ -411,3 +411,44 @@ jobs: echo "Expected verify step to fail, but it did not." exit 1 fi + + test-detect-composite-injection: + name: Detect injection in composite GitHub Actions + runs-on: ubuntu-latest + env: + MACARON_IMAGE_TAG: ${{ inputs.macaron_image_tag }} + DOCKER_PULL: never + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Download test Docker image artifact + if: ${{ inputs.docker_image_artifact_name != '' }} + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: ${{ inputs.docker_image_artifact_name }} + path: /tmp + - name: Load test Docker image + if: ${{ inputs.docker_image_artifact_name != '' }} + run: docker load --input /tmp/macaron-test-image.tar + + - name: Run Macaron (analyze github_actions_vulns for https://github.com/oracle/macaron) + id: verify_github_actions_vulns_repo_test + # This integration target is intentionally vulnerable; failure is expected. + continue-on-error: true + uses: ./ + with: + repo_path: https://github.com/oracle/macaron + digest: 59bfc67471b8c95afc816faf71760a51efff91a5 + policy_file: check-github-actions + policy_purl: pkg:github.com/oracle/macaron@.* + output_dir: macaron_output/detect_vulnerable_github_actions + upload_reports: 'false' + reports_artifact_name: macaron-injection-actions-fail-diagnosis + write_job_summary: 'true' + - name: Assert expected failure (github_actions_vulns for repo test) + if: ${{ always() }} + run: | + # Explicitly assert failure so regressions are visible in CI results. + if [ "${{ steps.verify_github_actions_vulns_repo_test.outcome }}" != "failure" ]; then + echo "Expected verify step to fail, but it did not." + exit 1 + fi