diff --git a/src/bin/report.rs b/src/bin/report.rs index 097d43c..b9480f1 100644 --- a/src/bin/report.rs +++ b/src/bin/report.rs @@ -45,6 +45,126 @@ fn main() { r.rounds ); } + + report_generated_corpus(); +} + +/// Profile order mirrors `scripts/gen-samples.js`: each is one javascript-obfuscator +/// (obfuscator.io) technique. Unknown profiles are appended in sorted order. +const PROFILE_ORDER: &[&str] = &[ + "minimal", + "strarr_base64", + "strarr_rc4", + "controlflow", + "deadcode", + "numbers_keys", + "strong", +]; + +/// Aggregate of the obfuscator.io corpus (`samples/generated/`) by profile. +/// Synthetic but obfuscator.io-faithful, so these rows track per-technique +/// readability the same way the real-sample rows track per-target readability. +fn report_generated_corpus() { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("samples") + .join("generated"); + let Ok(rd) = fs::read_dir(&dir) else { + return; // corpus not generated locally; nothing to add + }; + + let mut by_profile: std::collections::BTreeMap = Default::default(); + for entry in rd.flatten() { + let p = entry.path(); + if p.extension().and_then(|s| s.to_str()) != Some("js") { + continue; + } + let name = p.file_name().unwrap().to_string_lossy().into_owned(); + let Some(profile) = profile_of(&name) else { + continue; + }; + let src = fs::read_to_string(&p).unwrap(); + let r = deobfuscate(&src, &name); + let agg = by_profile.entry(profile).or_default(); + agg.files += 1; + agg.in_bytes += src.len(); + agg.out_bytes += r.code.len(); + agg.brackets += count_bracket_access(&r.code); + agg.opaque_sum += opaque_name_ratio(&r.code); + agg.rounds_max = agg.rounds_max.max(r.rounds); + if !r.converged { + agg.not_converged += 1; + } + } + if by_profile.is_empty() { + return; + } + + println!("\nobfuscator.io corpus (samples/generated/) — aggregated by profile"); + println!( + "{:<13} {:>5} {:>9} {:>9} {:>6} {:>8} {:>8} {:>6} {:>5}", + "profile", "files", "in_bytes", "out_bytes", "kept%", "brackets", "opaque%", "rounds", "conv" + ); + + let mut profiles: Vec = by_profile.keys().cloned().collect(); + profiles.sort_by_key(|p| { + PROFILE_ORDER + .iter() + .position(|x| x == p) + .unwrap_or(PROFILE_ORDER.len()) + }); + + let mut total = ProfileAgg::default(); + for profile in &profiles { + let a = &by_profile[profile]; + print_agg_row(profile, a); + total.merge(a); + } + print_agg_row("ALL", &total); +} + +#[derive(Default)] +struct ProfileAgg { + files: usize, + in_bytes: usize, + out_bytes: usize, + brackets: usize, + opaque_sum: usize, + rounds_max: usize, + not_converged: usize, +} + +impl ProfileAgg { + fn merge(&mut self, o: &ProfileAgg) { + self.files += o.files; + self.in_bytes += o.in_bytes; + self.out_bytes += o.out_bytes; + self.brackets += o.brackets; + self.opaque_sum += o.opaque_sum; + self.rounds_max = self.rounds_max.max(o.rounds_max); + self.not_converged += o.not_converged; + } +} + +fn print_agg_row(label: &str, a: &ProfileAgg) { + let kept = a.out_bytes * 100 / a.in_bytes.max(1); + let opaque = a.opaque_sum / a.files.max(1); // mean per-file ratio + let conv = if a.not_converged == 0 { + "ok".to_string() + } else { + format!("!{}", a.not_converged) + }; + println!( + "{:<13} {:>5} {:>9} {:>9} {:>5}% {:>8} {:>7}% {:>6} {:>5}", + label, a.files, a.in_bytes, a.out_bytes, kept, a.brackets, opaque, a.rounds_max, conv + ); +} + +/// `seed_01_arithmetic__strong.js` -> `strong`. Profiles are the segment after +/// the `__` separator (seed names use single `_`, the join uses `__`). +fn profile_of(name: &str) -> Option { + let stem = name.strip_suffix(".js")?; + let (_, profile) = stem.rsplit_once("__")?; + Some(profile.to_string()) } fn count_bracket_access(s: &str) -> usize { diff --git a/tests/golden.rs b/tests/golden.rs index 72ed387..328d79e 100644 --- a/tests/golden.rs +++ b/tests/golden.rs @@ -186,9 +186,134 @@ fn render_scoreboard() -> String { if r.converged { "yes" } else { "**no**" }, )); } + render_generated_corpus(&mut s); s } +/// Profile order mirrors `scripts/gen-samples.js` — each is one +/// javascript-obfuscator (obfuscator.io) technique. Unknown profiles append last. +const PROFILE_ORDER: &[&str] = &[ + "minimal", + "strarr_base64", + "strarr_rc4", + "controlflow", + "deadcode", + "numbers_keys", + "strong", +]; + +/// Append a per-profile rollup of the obfuscator.io corpus (`samples/generated/`). +/// These are synthetic but obfuscator.io-faithful, and they gate correctness via +/// `manifest.json`; aggregating them here makes them count toward readability too, +/// per technique. `kept%` is byte-weighted (Σout/Σin); `opaque%` is the mean of +/// per-file ratios; `rounds` is the worst case; `converged` flags any non-fixpoint. +fn render_generated_corpus(s: &mut String) { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("samples") + .join("generated"); + let mut by_profile: std::collections::BTreeMap = Default::default(); + let Ok(rd) = fs::read_dir(&dir) else { + return; + }; + for entry in rd.flatten() { + let p = entry.path(); + if p.extension().and_then(|x| x.to_str()) != Some("js") { + continue; + } + let name = p.file_name().unwrap().to_string_lossy().into_owned(); + let Some((_, profile)) = name.strip_suffix(".js").and_then(|st| st.rsplit_once("__")) else { + continue; + }; + let src = fs::read_to_string(&p).unwrap(); + let r = deobfuscate(&src, &name); + let a = by_profile.entry(profile.to_string()).or_default(); + a.files += 1; + a.in_bytes += src.len(); + a.out_bytes += r.code.len(); + a.brackets += count_bracket_access(&r.code); + a.opaque_sum += opaque_name_ratio(&r.code); + a.rounds_max = a.rounds_max.max(r.rounds); + if !r.converged { + a.not_converged += 1; + } + } + if by_profile.is_empty() { + return; + } + + let mut profiles: Vec = by_profile.keys().cloned().collect(); + profiles.sort_by_key(|p| { + PROFILE_ORDER + .iter() + .position(|x| x == p) + .unwrap_or(PROFILE_ORDER.len()) + }); + + s.push_str("\n## obfuscator.io corpus (`samples/generated/`), by profile\n\n"); + s.push_str( + "Synthetic javascript-obfuscator fixtures (one row per technique, \ + aggregated over all seeds). `kept%` is byte-weighted Σout/Σin; \ + `opaque%` is the mean per-file ratio; `rounds` is the worst case; \ + `converged` is `yes` only if every file reached a fixpoint.\n\n", + ); + s.push_str( + "| profile | files | in_bytes | out_bytes | kept% | brackets | opaque% | rounds | converged |\n", + ); + s.push_str( + "|---------|------:|---------:|----------:|------:|---------:|--------:|-------:|:---------:|\n", + ); + let mut total = ProfileAgg::default(); + for profile in &profiles { + push_agg_row(s, profile, &by_profile[profile]); + total.merge(&by_profile[profile]); + } + push_agg_row(s, "**all**", &total); +} + +#[derive(Default)] +struct ProfileAgg { + files: usize, + in_bytes: usize, + out_bytes: usize, + brackets: usize, + opaque_sum: usize, + rounds_max: usize, + not_converged: usize, +} + +impl ProfileAgg { + fn merge(&mut self, o: &ProfileAgg) { + self.files += o.files; + self.in_bytes += o.in_bytes; + self.out_bytes += o.out_bytes; + self.brackets += o.brackets; + self.opaque_sum += o.opaque_sum; + self.rounds_max = self.rounds_max.max(o.rounds_max); + self.not_converged += o.not_converged; + } +} + +fn push_agg_row(s: &mut String, label: &str, a: &ProfileAgg) { + let kept = a.out_bytes * 100 / a.in_bytes.max(1); + let opaque = a.opaque_sum / a.files.max(1); + s.push_str(&format!( + "| {} | {} | {} | {} | {}% | {} | {}% | {} | {} |\n", + label, + a.files, + a.in_bytes, + a.out_bytes, + kept, + a.brackets, + opaque, + a.rounds_max, + if a.not_converged == 0 { + "yes".to_string() + } else { + format!("**{} no**", a.not_converged) + }, + )); +} + fn count_bracket_access(s: &str) -> usize { s.matches("[\"").count() } diff --git a/tests/snapshots/SCOREBOARD.md b/tests/snapshots/SCOREBOARD.md index 79510fe..c584c71 100644 --- a/tests/snapshots/SCOREBOARD.md +++ b/tests/snapshots/SCOREBOARD.md @@ -19,3 +19,18 @@ Regenerated by `tests/golden.rs` (re-bless with `UPDATE_SNAPSHOTS=1 cargo test - | sample_7.js | 745944 | 609607 | 81% | 576 | 6% | 6 | yes | | sample_8.js | 401876 | 232934 | 57% | 4 | 1% | 7 | yes | | sample_9.js | 156692 | 157990 | 100% | 0 | 5% | 5 | yes | + +## obfuscator.io corpus (`samples/generated/`), by profile + +Synthetic javascript-obfuscator fixtures (one row per technique, aggregated over all seeds). `kept%` is byte-weighted Σout/Σin; `opaque%` is the mean per-file ratio; `rounds` is the worst case; `converged` is `yes` only if every file reached a fixpoint. + +| profile | files | in_bytes | out_bytes | kept% | brackets | opaque% | rounds | converged | +|---------|------:|---------:|----------:|------:|---------:|--------:|-------:|:---------:| +| minimal | 20 | 31067 | 19334 | 62% | 12 | 8% | 4 | yes | +| strarr_base64 | 20 | 68836 | 47423 | 68% | 12 | 5% | 4 | yes | +| strarr_rc4 | 20 | 107162 | 77393 | 72% | 12 | 4% | 4 | yes | +| controlflow | 20 | 62944 | 20273 | 32% | 12 | 8% | 7 | yes | +| deadcode | 20 | 56174 | 19709 | 35% | 12 | 8% | 5 | yes | +| numbers_keys | 20 | 90297 | 48641 | 53% | 12 | 5% | 4 | yes | +| strong | 20 | 164800 | 95481 | 57% | 12 | 5% | 5 | yes | +| **all** | 140 | 581280 | 328254 | 56% | 84 | 6% | 7 | yes |