Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/skillspector/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

# Exposed for analyzers that need a final fallback symbol (e.g.,
# ``model = state.model or MODEL_CONFIG[ANALYZER_ID] or _SKILLSPECTOR_DEFAULT_MODEL``).
_SKILLSPECTOR_DEFAULT_MODEL = _provider.DEFAULT_MODEL # type: ignore[attr-defined]
_SKILLSPECTOR_DEFAULT_MODEL = _provider.DEFAULT_MODEL

_MODEL_SLOTS: tuple[str, ...] = (
"default",
Expand Down
25 changes: 15 additions & 10 deletions src/skillspector/nodes/analyzers/mcp_rug_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@
# RP2: Manifest-permission pre-staging
_PERMISSION_EXPANSION_PATTERNS = [
(r'"permissions?"\s*:\s*\[[^\]]*\]', 0.60),
(r"(?:add|grant|request|require)\s+(?:new|additional|extra|more)\s+(?:permissions?|tools?|access)", 0.70),
(
r"(?:add|grant|request|require)\s+(?:new|additional|extra|more)\s+(?:permissions?|tools?|access)",
0.70,
),
]


Expand Down Expand Up @@ -121,7 +124,7 @@ def _check_rp1(manifest: dict, file_cache: dict[str, str]) -> list[Finding]:
line_end = content.find("\n", m.end())
if line_end == -1:
line_end = len(content)
line_remainder = content[m.end():line_end]
line_remainder = content[m.end() : line_end]
if _VERSION_PIN_RE.search(full_match + line_remainder):
continue
line_num = _find_line(content, m.start())
Expand Down Expand Up @@ -151,7 +154,7 @@ def _check_rp1(manifest: dict, file_cache: dict[str, str]) -> list[Finding]:
line_end = content.find("\n", m.end())
if line_end == -1:
line_end = len(content)
line_remainder = content[m.end():line_end]
line_remainder = content[m.end() : line_end]
if _VERSION_PIN_RE.search(full_match + line_remainder):
continue
line_num = _find_line(content, m.start())
Expand All @@ -167,8 +170,7 @@ def _check_rp1(manifest: dict, file_cache: dict[str, str]) -> list[Finding]:
tags=list(_TAGS),
matched_text=full_match[:200],
explanation=(
"uvx/uv tool run commands without ==version create "
"a rug-pull risk."
"uvx/uv tool run commands without ==version create a rug-pull risk."
),
remediation="Pin the version: uvx package-name==1.2.3",
)
Expand All @@ -180,7 +182,7 @@ def _check_rp1(manifest: dict, file_cache: dict[str, str]) -> list[Finding]:
line_end = content.find("\n", m.end())
if line_end == -1:
line_end = len(content)
line_remainder = content[m.end():line_end]
line_remainder = content[m.end() : line_end]
if _VERSION_PIN_RE.search(full_match + line_remainder):
continue
pkg = m.group(1)
Expand Down Expand Up @@ -239,8 +241,7 @@ def _check_rp1(manifest: dict, file_cache: dict[str, str]) -> list[Finding]:
Finding(
rule_id="RP1",
message=(
f"Manifest references MCP server without version pin: "
f"'{m.group(0).strip()}'."
f"Manifest references MCP server without version pin: '{m.group(0).strip()}'."
),
severity="MEDIUM",
confidence=0.70,
Expand Down Expand Up @@ -463,7 +464,9 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
prev_prop = prev_params[name]
prop_diffs = []
if curr_prop["type"] != prev_prop["type"]:
prop_diffs.append(f"type changed from {prev_prop['type']} to {curr_prop['type']}")
prop_diffs.append(
f"type changed from {prev_prop['type']} to {curr_prop['type']}"
)
if curr_prop["default"] != prev_prop["default"]:
prop_diffs.append(
f"default changed from {prev_prop['default']} to {curr_prop['default']}"
Expand All @@ -478,7 +481,9 @@ def node(state: SkillspectorState) -> AnalyzerNodeResponse:
if added_params:
changes.append(f"added: {', '.join(curr_params[p]['name'] for p in added_params)}")
if removed_params:
changes.append(f"removed: {', '.join(prev_params[p]['name'] for p in removed_params)}")
changes.append(
f"removed: {', '.join(prev_params[p]['name'] for p in removed_params)}"
)
if changed_params:
changes.append(f"modified: {', '.join(changed_params)}")

Expand Down
2 changes: 1 addition & 1 deletion src/skillspector/nodes/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,4 +681,4 @@ def report(state: SkillspectorState) -> dict[str, object]:
"filtered_findings": filtered_findings,
"suppressed_findings": suppressed,
}
return out
return out
8 changes: 7 additions & 1 deletion src/skillspector/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from __future__ import annotations

from typing import Protocol
from typing import ClassVar, Protocol

from langchain_core.language_models.chat_models import BaseChatModel

Expand All @@ -31,8 +31,14 @@ class ModelMetadataProvider(Protocol):
``resolve_model`` runs the per-provider waterfall:
``SKILLSPECTOR_MODEL`` env var → provider's slot-specific default →
provider's general default. Always returns a non-empty string.

``DEFAULT_MODEL`` is the provider's general default model label.
``SLOT_DEFAULTS`` maps specific slots to their preferred models.
"""

DEFAULT_MODEL: ClassVar[str]
SLOT_DEFAULTS: ClassVar[dict[str, str]]

def get_context_length(self, model: str) -> int | None: ...

def get_max_output_tokens(self, model: str) -> int | None: ...
Expand Down
29 changes: 29 additions & 0 deletions src/skillspector/providers/chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,38 @@

from __future__ import annotations

import logging
from urllib.parse import urlparse

from langchain_core.language_models.chat_models import BaseChatModel
from langchain_openai import ChatOpenAI
from pydantic import SecretStr

logger = logging.getLogger(__name__)


def validate_base_url(url: str | None) -> None:
"""Warn if *url* is not a well-formed http(s) URL.

Raises nothing — misconfigured URLs will still fail at the HTTP
layer, but an early warning helps operators catch typos.
"""
if url is None:
return
parsed = urlparse(url)
if parsed.scheme not in ("http", "https"):
logger.warning(
"Provider base_url %r has scheme %r — expected http or https. "
"Requests will likely fail.",
url,
parsed.scheme or "(empty)",
)
if not parsed.netloc:
logger.warning(
"Provider base_url %r has no host component. Requests will likely fail.",
url,
)


def create_openai_compatible_chat_model(
*,
Expand All @@ -35,6 +63,7 @@ def create_openai_compatible_chat_model(
return None

api_key, base_url = credentials
validate_base_url(base_url)
return ChatOpenAI(
model=model,
base_url=base_url,
Expand Down
1 change: 0 additions & 1 deletion tests/nodes/test_meta_analyzer_fallback.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def test_low_severity_below_threshold_still_dropped(self) -> None:
result = _fallback_filtered(findings)
assert len(result) == 0


def test_none_severity_treated_as_low(self) -> None:
"""Finding with None severity does not crash — treated as LOW."""
findings = [_finding(confidence=0.8, severity=None)]
Expand Down
11 changes: 9 additions & 2 deletions tests/nodes/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,7 @@ def test_report_no_baseline_unchanged() -> None:
assert result["risk_score"] == 50
assert result["suppressed_findings"] == []


def test_report_executable_scripts_multiplier() -> None:
"""1.3x multiplier applied only to findings from executable files."""
# 2 HIGH findings in run.py = 2 × 25 × 1.3 = 65 (float-based accumulation)
Expand Down Expand Up @@ -674,8 +675,14 @@ def test_report_doc_findings_no_multiplier() -> None:
_finding("P2", "HIGH", file="SKILL.md"),
],
"component_metadata": [
{"path": "SKILL.md", "type": "markdown", "lines": 10, "executable": False, "size_bytes": 500},
{"path": "run.py", "type": "python", "lines": 5, "executable": True, "size_bytes": 200}
{
"path": "SKILL.md",
"type": "markdown",
"lines": 10,
"executable": False,
"size_bytes": 500,
},
{"path": "run.py", "type": "python", "lines": 5, "executable": True, "size_bytes": 200},
],
"has_executable_scripts": True,
"manifest": {},
Expand Down
84 changes: 51 additions & 33 deletions tests/test_mcp_rug_pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
from skillspector.state import SkillspectorState


def _state(manifest: dict | None = None, file_cache: dict[str, str] | None = None) -> SkillspectorState:
def _state(
manifest: dict | None = None, file_cache: dict[str, str] | None = None
) -> SkillspectorState:
state: SkillspectorState = {}
if manifest is not None:
state["manifest"] = manifest
Expand All @@ -32,78 +34,94 @@ def _state(manifest: dict | None = None, file_cache: dict[str, str] | None = Non

def test_rp1_npx_unpinned():
"""RP1 detects npx without @version suffix."""
result = node(_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "npx @scope/mcp-server\n"},
))
result = node(
_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "npx @scope/mcp-server\n"},
)
)
rp1 = [f for f in result["findings"] if f.rule_id == "RP1"]
assert len(rp1) == 1
assert "npx @scope/mcp-server" in rp1[0].matched_text


def test_rp1_npx_pinned_no_finding():
"""RP1 does not fire when npx has @version."""
result = node(_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "npx @scope/mcp-server@1.2.3\n"},
))
result = node(
_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "npx @scope/mcp-server@1.2.3\n"},
)
)
rp1 = [f for f in result["findings"] if f.rule_id == "RP1"]
assert len(rp1) == 0


def test_rp1_uvx_unpinned():
"""RP1 detects uvx without ==version."""
result = node(_state(
manifest={"name": "test-skill"},
file_cache={"install.sh": "uvx my-mcp-server\n"},
))
result = node(
_state(
manifest={"name": "test-skill"},
file_cache={"install.sh": "uvx my-mcp-server\n"},
)
)
rp1 = [f for f in result["findings"] if f.rule_id == "RP1"]
assert len(rp1) >= 1
assert any("uvx" in f.matched_text for f in rp1)


def test_rp1_docker_unpinned():
"""RP1 detects docker run without tag."""
result = node(_state(
manifest={"name": "test-skill"},
file_cache={"Dockerfile": "FROM org/mcp-server\n"},
))
node(
_state(
manifest={"name": "test-skill"},
file_cache={"Dockerfile": "FROM org/mcp-server\n"},
)
)
# RP1 docker pattern matches "docker pull|run|create"
# FROM in Dockerfile isn't matched by our regex, so update test
result2 = node(_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "docker run org/mcp-server\n"},
))
result2 = node(
_state(
manifest={"name": "test-skill"},
file_cache={"setup.sh": "docker run org/mcp-server\n"},
)
)
rp1 = [f for f in result2["findings"] if f.rule_id == "RP1"]
assert len(rp1) >= 1


def test_rp1_multiple_patterns():
"""Multiple unpinned references produce multiple RP1 findings."""
result = node(_state(
manifest={"name": "test-skill"},
file_cache={
"setup.sh": "npx @scope/server-a\nnpx @org/server-b\n",
},
))
result = node(
_state(
manifest={"name": "test-skill"},
file_cache={
"setup.sh": "npx @scope/server-a\nnpx @org/server-b\n",
},
)
)
rp1 = [f for f in result["findings"] if f.rule_id == "RP1"]
assert len(rp1) == 2


def test_rp3_version_wildcard():
"""RP3 detects wildcard version."""
result = node(_state(
manifest={"version": "*", "name": "test"},
))
result = node(
_state(
manifest={"version": "*", "name": "test"},
)
)
rp3 = [f for f in result["findings"] if f.rule_id == "RP3"]
assert len(rp3) >= 1


def test_rp3_version_ok_no_finding():
"""RP3 does not fire on pinned version."""
result = node(_state(
manifest={"version": "1.2.3", "name": "test"},
))
result = node(
_state(
manifest={"version": "1.2.3", "name": "test"},
)
)
rp3 = [f for f in result["findings"] if f.rule_id == "RP3"]
assert len(rp3) == 0

Expand Down
16 changes: 4 additions & 12 deletions tests/unit/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ def test_unset_slot_env_falls_through_to_provider(self) -> None:
# Without any slot override, the provider's resolve_model() runs.
assert mod.MODEL_CONFIG["default"] == mod._SKILLSPECTOR_DEFAULT_MODEL

def test_multiple_slots_independently_overridden(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
def test_multiple_slots_independently_overridden(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("SKILLSPECTOR_MODEL_META_ANALYZER", "model-a")
monkeypatch.setenv("SKILLSPECTOR_MODEL_SEMANTIC_DEVELOPER_INTENT", "model-b")
mod = _reload_constants()
Expand Down Expand Up @@ -118,9 +116,7 @@ def test_known_model_no_warning(
with caplog.at_level(logging.WARNING, logger="skillspector.constants"):
mod = _reload_constants()
default_model = mod._SKILLSPECTOR_DEFAULT_MODEL
warnings_for_default = [
r for r in caplog.records if default_model in r.message
]
warnings_for_default = [r for r in caplog.records if default_model in r.message]
assert len(warnings_for_default) == 0

def test_strict_validation_raises_on_unknown_model(
Expand All @@ -139,17 +135,13 @@ def test_strict_validation_passes_with_known_model(
mod = _reload_constants()
assert mod.MODEL_CONFIG["default"] == mod._SKILLSPECTOR_DEFAULT_MODEL

def test_strict_validation_disabled_by_default(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
def test_strict_validation_disabled_by_default(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("SKILLSPECTOR_MODEL", "nonexistent-model")
# No SKILLSPECTOR_STRICT_MODEL_VALIDATION set — should warn, not raise.
mod = _reload_constants()
assert mod.MODEL_CONFIG["default"] == "nonexistent-model"

def test_strict_validation_case_insensitive(
self, monkeypatch: pytest.MonkeyPatch
) -> None:
def test_strict_validation_case_insensitive(self, monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("SKILLSPECTOR_MODEL", "nonexistent-model")
monkeypatch.setenv("SKILLSPECTOR_STRICT_MODEL_VALIDATION", "True")
with pytest.raises(ValueError, match="Strict model validation enabled"):
Expand Down
Loading
Loading