Skip to content

Commit 7aa6224

Browse files
feat(curl): config-driven URL allowlist to bypass schema-mode rewrite
`rtk curl` rewrites every `curl URL` to `rtk curl URL` and pipes the response through `rtk json --schema`, which produces field-type literals (`field: int`) and a `(N)` array-length suffix. That's a token-savings win for human-facing exploration of arbitrary third-party APIs, but it ACTIVELY BREAKS downstream JSON parsing (jq, `python json.load`, agent-side filtering, anything that round-trips JSON) for private or internal APIs whose responses are consumed by parsers rather than read by humans. Today the only escape hatch is `[hooks] exclude_commands = ["curl"]`, which disables curl rewriting entirely — losing the token savings on the 3rd-party APIs that motivated the rewrite in the first place. This change adds a narrow opt-in mechanism: a substring allowlist that opts specific URLs out of the rewrite while leaving everything else rewriting as before. [curl] bypass_url_markers = [ "localhost:8080/api/", "//internal.example.com/v1/", ] Each marker is matched as a substring against the full command segment (after env-prefix stripping), so it composes cleanly with curl's flag positioning (`-X POST`, `-H`, `-d`, etc). Bypass is per-segment for compound commands: `a && curl <internal> | jq` bypasses the curl segment while `a` and the pipe target still rewrite as normal. Default `bypass_url_markers = []` preserves historical behavior — users who don't configure anything see no change. API surface ----------- * New `crate::core::config::CurlConfig { bypass_url_markers: Vec<String> }` * New `crate::discover::registry::RewriteOptions { curl_bypass_url_markers }` * New `rewrite_command_with_options(cmd, excluded, &opts)` — takes options explicitly, useful for tests and callers that already have config in hand. * Existing `rewrite_command(cmd, excluded)` is unchanged for callers but now reads `Config.curl.bypass_url_markers` internally and forwards. Mirrors the existing #196 bypass shape for `gh --json/--jq/--template`: detect a structured-output consumer and skip schema-mode filtering. Tests ----- * 4 new config tests (default empty / [curl] roundtrip / missing section). * 9 new registry tests covering: localhost / loopback / hostname marker bypass; POST with headers + payload; multi-marker OR; default-empty preserves rewrite; unmatched URL still rewrites; port-specific narrowness; per-segment behavior in compound commands. * Full suite: 1699 passed, 0 failed, 6 ignored.
1 parent 4338f02 commit 7aa6224

2 files changed

Lines changed: 283 additions & 10 deletions

File tree

src/core/config.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ pub struct Config {
2121
pub hooks: HooksConfig,
2222
#[serde(default)]
2323
pub limits: LimitsConfig,
24+
#[serde(default)]
25+
pub curl: CurlConfig,
2426
}
2527

2628
#[derive(Debug, Serialize, Deserialize, Default)]
@@ -31,6 +33,32 @@ pub struct HooksConfig {
3133
pub exclude_commands: Vec<String>,
3234
}
3335

36+
#[derive(Debug, Serialize, Deserialize, Default)]
37+
pub struct CurlConfig {
38+
/// URL substring markers that opt-out a `curl` command from RTK's
39+
/// `rtk curl ... --schema` rewrite. Use this for private/internal
40+
/// JSON APIs whose responses are consumed by parsers (jq, agents,
41+
/// downstream code) that need raw JSON rather than RTK's schema
42+
/// summary.
43+
///
44+
/// Each marker is a substring match against the full command text;
45+
/// any match in any segment of a compound command bypasses the
46+
/// rewrite for that segment only. Defaults to empty — RTK's default
47+
/// behavior of rewriting all `curl` invocations is unchanged unless
48+
/// you opt in.
49+
///
50+
/// Example:
51+
/// ```toml
52+
/// [curl]
53+
/// bypass_url_markers = [
54+
/// "localhost:8080/api/",
55+
/// "//internal.example.com/",
56+
/// ]
57+
/// ```
58+
#[serde(default)]
59+
pub bypass_url_markers: Vec<String>,
60+
}
61+
3462
#[derive(Debug, Serialize, Deserialize)]
3563
pub struct TrackingConfig {
3664
pub enabled: bool,
@@ -233,6 +261,39 @@ enabled = true
233261
assert!(config.telemetry.consent_given.is_none());
234262
}
235263

264+
#[test]
265+
fn test_curl_config_default_empty() {
266+
let config = Config::default();
267+
assert!(
268+
config.curl.bypass_url_markers.is_empty(),
269+
"default curl.bypass_url_markers must be empty (opt-in only)"
270+
);
271+
}
272+
273+
#[test]
274+
fn test_curl_config_deserialize() {
275+
let toml = r#"
276+
[curl]
277+
bypass_url_markers = ["localhost:8080/", "//api.example.com/v1/"]
278+
"#;
279+
let config: Config = toml::from_str(toml).expect("valid toml");
280+
assert_eq!(
281+
config.curl.bypass_url_markers,
282+
vec!["localhost:8080/", "//api.example.com/v1/"]
283+
);
284+
}
285+
286+
#[test]
287+
fn test_curl_config_missing_section_is_valid() {
288+
let toml = r#"
289+
[tracking]
290+
enabled = true
291+
history_days = 90
292+
"#;
293+
let config: Config = toml::from_str(toml).expect("valid toml");
294+
assert!(config.curl.bypass_url_markers.is_empty());
295+
}
296+
236297
#[test]
237298
fn test_telemetry_consent_roundtrip() {
238299
let toml = r#"

src/discover/registry.rs

Lines changed: 222 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -433,12 +433,44 @@ fn strip_trailing_redirects(cmd: &str) -> (&str, &str) {
433433
(cmd_part, redir_part)
434434
}
435435

436+
/// Optional knobs for `rewrite_command_with_options`. Default values reproduce
437+
/// the historical behavior of `rewrite_command(cmd, excluded)`.
438+
#[derive(Debug, Default, Clone)]
439+
pub struct RewriteOptions {
440+
/// URL substring markers that opt-out `curl` invocations from being
441+
/// rewritten to `rtk curl … | rtk json --schema`. See
442+
/// `crate::core::config::CurlConfig::bypass_url_markers`.
443+
pub curl_bypass_url_markers: Vec<String>,
444+
}
445+
436446
/// Returns `None` if the command is unsupported or ignored (hook should pass through).
437447
///
438448
/// Handles compound commands (`&&`, `||`, `;`) by rewriting each segment independently.
439449
/// For pipes (`|`), only rewrites the left-hand command (pipe targets stay raw),
440450
/// but continues rewriting segments after subsequent `&&`/`||`/`;` operators.
451+
///
452+
/// Reads `[curl] bypass_url_markers` from `config.toml` to decide whether a
453+
/// `curl` segment should be passed through unchanged. For explicit control
454+
/// over that list (e.g. in tests), use `rewrite_command_with_options`.
441455
pub fn rewrite_command(cmd: &str, excluded: &[String]) -> Option<String> {
456+
let opts = RewriteOptions {
457+
curl_bypass_url_markers: crate::core::config::Config::load()
458+
.map(|c| c.curl.bypass_url_markers)
459+
.unwrap_or_default(),
460+
};
461+
rewrite_command_with_options(cmd, excluded, &opts)
462+
}
463+
464+
/// Same as `rewrite_command`, but lets the caller supply `RewriteOptions`
465+
/// directly instead of reading from the on-disk config. Tests use this to
466+
/// pin a specific `curl_bypass_url_markers` set without touching the user's
467+
/// config file; production callers that already have config in hand can
468+
/// avoid a second `Config::load()` round-trip.
469+
pub fn rewrite_command_with_options(
470+
cmd: &str,
471+
excluded: &[String],
472+
opts: &RewriteOptions,
473+
) -> Option<String> {
442474
let trimmed = cmd.trim();
443475
if trimmed.is_empty() {
444476
return None;
@@ -462,11 +494,15 @@ pub fn rewrite_command(cmd: &str, excluded: &[String]) -> Option<String> {
462494
return Some(trimmed.to_string());
463495
}
464496

465-
rewrite_compound(trimmed, &compiled)
497+
rewrite_compound(trimmed, &compiled, &opts.curl_bypass_url_markers)
466498
}
467499

468500
/// Rewrite a compound command (with `&&`, `||`, `;`, `|`) by rewriting each segment.
469-
fn rewrite_compound(cmd: &str, excluded: &[ExcludePattern]) -> Option<String> {
501+
fn rewrite_compound(
502+
cmd: &str,
503+
excluded: &[ExcludePattern],
504+
curl_bypass: &[String],
505+
) -> Option<String> {
470506
let tokens = tokenize(cmd);
471507
let mut result = String::with_capacity(cmd.len() + 32);
472508
let mut any_changed = false;
@@ -479,7 +515,8 @@ fn rewrite_compound(cmd: &str, excluded: &[ExcludePattern]) -> Option<String> {
479515
match tok.kind {
480516
TokenKind::Operator => {
481517
let seg = cmd[seg_start..tok.offset].trim();
482-
let rewritten = rewrite_segment(seg, excluded).unwrap_or_else(|| seg.to_string());
518+
let rewritten = rewrite_segment(seg, excluded, curl_bypass)
519+
.unwrap_or_else(|| seg.to_string());
483520
if rewritten != seg {
484521
any_changed = true;
485522
}
@@ -509,7 +546,8 @@ fn rewrite_compound(cmd: &str, excluded: &[ExcludePattern]) -> Option<String> {
509546
let rewritten = if is_pipe_incompatible {
510547
seg.to_string()
511548
} else {
512-
rewrite_segment(seg, excluded).unwrap_or_else(|| seg.to_string())
549+
rewrite_segment(seg, excluded, curl_bypass)
550+
.unwrap_or_else(|| seg.to_string())
513551
};
514552
if rewritten != seg {
515553
any_changed = true;
@@ -537,7 +575,8 @@ fn rewrite_compound(cmd: &str, excluded: &[ExcludePattern]) -> Option<String> {
537575
}
538576
TokenKind::Shellism if tok.value == "&" => {
539577
let seg = cmd[seg_start..tok.offset].trim();
540-
let rewritten = rewrite_segment(seg, excluded).unwrap_or_else(|| seg.to_string());
578+
let rewritten = rewrite_segment(seg, excluded, curl_bypass)
579+
.unwrap_or_else(|| seg.to_string());
541580
if rewritten != seg {
542581
any_changed = true;
543582
}
@@ -553,7 +592,8 @@ fn rewrite_compound(cmd: &str, excluded: &[ExcludePattern]) -> Option<String> {
553592
}
554593

555594
let seg = cmd[seg_start..].trim();
556-
let rewritten = rewrite_segment(seg, excluded).unwrap_or_else(|| seg.to_string());
595+
let rewritten =
596+
rewrite_segment(seg, excluded, curl_bypass).unwrap_or_else(|| seg.to_string());
557597
if rewritten != seg {
558598
any_changed = true;
559599
}
@@ -634,8 +674,12 @@ fn compile_exclude_patterns(patterns: &[String]) -> Vec<ExcludePattern> {
634674
.collect()
635675
}
636676

637-
fn rewrite_segment(seg: &str, excluded: &[ExcludePattern]) -> Option<String> {
638-
rewrite_segment_inner(seg, excluded, 0)
677+
fn rewrite_segment(
678+
seg: &str,
679+
excluded: &[ExcludePattern],
680+
curl_bypass: &[String],
681+
) -> Option<String> {
682+
rewrite_segment_inner(seg, excluded, curl_bypass, 0)
639683
}
640684

641685
fn is_excluded(cmd: &str, excluded: &[ExcludePattern]) -> bool {
@@ -645,7 +689,12 @@ fn is_excluded(cmd: &str, excluded: &[ExcludePattern]) -> bool {
645689
})
646690
}
647691

648-
fn rewrite_segment_inner(seg: &str, excluded: &[ExcludePattern], depth: usize) -> Option<String> {
692+
fn rewrite_segment_inner(
693+
seg: &str,
694+
excluded: &[ExcludePattern],
695+
curl_bypass: &[String],
696+
depth: usize,
697+
) -> Option<String> {
649698
let trimmed = seg.trim();
650699
if trimmed.is_empty() {
651700
return None;
@@ -660,7 +709,7 @@ fn rewrite_segment_inner(seg: &str, excluded: &[ExcludePattern], depth: usize) -
660709
if rest.is_empty() {
661710
return None;
662711
}
663-
return match rewrite_segment_inner(rest, excluded, depth + 1) {
712+
return match rewrite_segment_inner(rest, excluded, curl_bypass, depth + 1) {
664713
Some(rewritten) => Some(format!("{} {}", prefix, rewritten)),
665714
None => None,
666715
};
@@ -746,6 +795,22 @@ fn rewrite_segment_inner(seg: &str, excluded: &[ExcludePattern], depth: usize) -
746795
}
747796
}
748797

798+
// `rtk curl` pipes responses through `rtk json --schema`, which produces
799+
// field-type literals (`field: int`, `field: string`) and a `(N)`
800+
// array-length suffix. That's a token-savings win for arbitrary
801+
// third-party APIs, but actively breaks downstream JSON parsing (jq,
802+
// python `json.load`, agent-side filtering) for private/internal APIs
803+
// whose responses are consumed as raw JSON.
804+
//
805+
// Skip the rewrite when the URL contains any user-configured marker.
806+
// Empty list = unchanged historical behavior (every curl gets rewritten).
807+
// Configure via `[curl] bypass_url_markers` in `~/.config/rtk/config.toml`.
808+
if rule.rtk_cmd == "rtk curl" && !curl_bypass.is_empty() {
809+
if curl_bypass.iter().any(|marker| cmd_clean.contains(marker)) {
810+
return None;
811+
}
812+
}
813+
749814
// Try each rewrite prefix (longest first) with word-boundary check
750815
for &prefix in rule.rewrite_prefixes {
751816
if let Some(rest) = strip_word_prefix(cmd_clean, prefix) {
@@ -2963,6 +3028,153 @@ mod tests {
29633028
assert!(rewrite_command("curl https://api.example.com", &excluded).is_some());
29643029
}
29653030

3031+
// `[curl] bypass_url_markers` lets users opt private / internal JSON APIs
3032+
// out of the `rtk curl … --schema` rewrite when the rewritten output would
3033+
// break a downstream parser. Tests below pass markers explicitly via
3034+
// `rewrite_command_with_options` so behavior is independent of the on-disk
3035+
// user config.
3036+
fn curl_bypass_opts(markers: &[&str]) -> RewriteOptions {
3037+
RewriteOptions {
3038+
curl_bypass_url_markers: markers.iter().map(|s| s.to_string()).collect(),
3039+
}
3040+
}
3041+
3042+
#[test]
3043+
fn test_rewrite_curl_bypasses_localhost_marker() {
3044+
// curl to a configured localhost API returns None (no rewrite), so the
3045+
// original curl runs verbatim and the caller gets real JSON instead of
3046+
// schema-mode literals.
3047+
let opts = curl_bypass_opts(&["localhost:3300/"]);
3048+
assert_eq!(
3049+
rewrite_command_with_options(
3050+
"curl http://localhost:3300/api/change-requests",
3051+
&[],
3052+
&opts
3053+
),
3054+
None
3055+
);
3056+
}
3057+
3058+
#[test]
3059+
fn test_rewrite_curl_bypasses_loopback_marker() {
3060+
// Loopback variant — separate marker entry.
3061+
let opts = curl_bypass_opts(&["127.0.0.1:3300/"]);
3062+
assert_eq!(
3063+
rewrite_command_with_options(
3064+
"curl -s http://127.0.0.1:3300/api/health",
3065+
&[],
3066+
&opts
3067+
),
3068+
None
3069+
);
3070+
}
3071+
3072+
#[test]
3073+
fn test_rewrite_curl_bypasses_post_with_headers_and_payload() {
3074+
// Real-world POST variant: -X POST, -H header, -d body. Bypass must
3075+
// trigger regardless of curl flag positioning, since markers are a
3076+
// substring match against the full segment.
3077+
let opts = curl_bypass_opts(&["localhost:3300/"]);
3078+
assert_eq!(
3079+
rewrite_command_with_options(
3080+
"curl -s -X POST -H 'x-api-key: foo' -d '{}' http://localhost:3300/api/session-ack",
3081+
&[],
3082+
&opts
3083+
),
3084+
None
3085+
);
3086+
}
3087+
3088+
#[test]
3089+
fn test_rewrite_curl_bypasses_https_hostname_marker() {
3090+
// Hostname-based marker (e.g. an internal Tailscale or VPN-only API).
3091+
let opts = curl_bypass_opts(&["//api.internal.example/"]);
3092+
assert_eq!(
3093+
rewrite_command_with_options(
3094+
"curl https://api.internal.example/v1/projects",
3095+
&[],
3096+
&opts
3097+
),
3098+
None
3099+
);
3100+
}
3101+
3102+
#[test]
3103+
fn test_rewrite_curl_bypasses_multiple_markers() {
3104+
// Multiple markers act as OR — any matching substring bypasses.
3105+
let opts = curl_bypass_opts(&["localhost:8090/", "localhost:11434/"]);
3106+
assert_eq!(
3107+
rewrite_command_with_options("curl http://localhost:8090/v1/models", &[], &opts),
3108+
None
3109+
);
3110+
assert_eq!(
3111+
rewrite_command_with_options("curl http://localhost:11434/api/tags", &[], &opts),
3112+
None
3113+
);
3114+
}
3115+
3116+
#[test]
3117+
fn test_rewrite_curl_default_empty_bypass_still_rewrites() {
3118+
// Default behavior — empty bypass markers — leaves the historical
3119+
// rewrite-everything semantics intact. This is the core upstream
3120+
// contract: opt-in only, no behavior change for users who haven't
3121+
// configured anything.
3122+
let opts = RewriteOptions::default();
3123+
assert_eq!(
3124+
rewrite_command_with_options(
3125+
"curl http://localhost:3300/api/change-requests",
3126+
&[],
3127+
&opts
3128+
),
3129+
Some("rtk curl http://localhost:3300/api/change-requests".into())
3130+
);
3131+
}
3132+
3133+
#[test]
3134+
fn test_rewrite_curl_still_rewrites_unmatched_url() {
3135+
// Third-party / unconfigured URL is not in the marker list, so the
3136+
// rtk curl rewrite still fires. Token-savings premise preserved for
3137+
// everyone except the user's own opt-in endpoints.
3138+
let opts = curl_bypass_opts(&["localhost:3300/"]);
3139+
assert_eq!(
3140+
rewrite_command_with_options("curl https://api.github.com/repos/foo/bar", &[], &opts),
3141+
Some("rtk curl https://api.github.com/repos/foo/bar".into())
3142+
);
3143+
assert_eq!(
3144+
rewrite_command_with_options("curl https://example.com/api/data", &[], &opts),
3145+
Some("rtk curl https://example.com/api/data".into())
3146+
);
3147+
}
3148+
3149+
#[test]
3150+
fn test_rewrite_curl_marker_is_port_specific() {
3151+
// Subtle: localhost on a port NOT in the marker list (e.g. someone
3152+
// running a third-party JSON server on :4000) STILL gets rewritten.
3153+
// The bypass is intentionally narrow — markers include port to keep
3154+
// collateral surface small.
3155+
let opts = curl_bypass_opts(&["localhost:3300/"]);
3156+
assert_eq!(
3157+
rewrite_command_with_options("curl http://localhost:4000/api/foo", &[], &opts),
3158+
Some("rtk curl http://localhost:4000/api/foo".into())
3159+
);
3160+
}
3161+
3162+
#[test]
3163+
fn test_rewrite_compound_with_bypassed_curl_skips_only_that_segment() {
3164+
// In a compound command, the bypassed-curl segment passes through
3165+
// unchanged but other rewritable segments (git status) still get
3166+
// rewritten. Confirms the bypass is per-segment, not all-or-nothing.
3167+
let opts = curl_bypass_opts(&["localhost:3300/"]);
3168+
assert_eq!(
3169+
rewrite_command_with_options(
3170+
"git status && curl http://localhost:3300/api/change-requests",
3171+
&[],
3172+
&opts
3173+
),
3174+
Some("rtk git status && curl http://localhost:3300/api/change-requests".into())
3175+
);
3176+
}
3177+
29663178
#[test]
29673179
fn test_rewrite_compound_partial_exclude() {
29683180
// curl excluded but git still rewrites

0 commit comments

Comments
 (0)