Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions src/bin/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,16 @@ fn report_generated_corpus() {
println!("\nobfuscator.io corpus (samples/generated/) — aggregated by profile");
println!(
"{:<13} {:>5} {:>9} {:>9} {:>6} {:>8} {:>8} {:>8} {:>6} {:>5}",
"profile", "files", "in_bytes", "out_bytes", "kept%", "brackets", "opaque%", "hexrefs", "rounds", "conv"
"profile",
"files",
"in_bytes",
"out_bytes",
"kept%",
"brackets",
"opaque%",
"hexrefs",
"rounds",
"conv"
);

let mut profiles: Vec<String> = by_profile.keys().cloned().collect();
Expand Down Expand Up @@ -163,7 +172,16 @@ fn print_agg_row(label: &str, a: &ProfileAgg) {
};
println!(
"{:<13} {:>5} {:>9} {:>9} {:>5}% {:>8} {:>7}% {:>8} {:>6} {:>5}",
label, a.files, a.in_bytes, a.out_bytes, kept, a.brackets, opaque, a.hexrefs, a.rounds_max, conv
label,
a.files,
a.in_bytes,
a.out_bytes,
kept,
a.brackets,
opaque,
a.hexrefs,
a.rounds_max,
conv
);
}

Expand All @@ -176,8 +194,20 @@ fn profile_of(name: &str) -> Option<String> {
}

fn count_bracket_access(s: &str) -> usize {
// crude: count `["` occurrences (string-keyed member access)
s.matches("[\"").count()
// Count `["` only where it is a string-keyed *member access* (`obj["x"]`),
// not an array/object literal (`= ["x"]`, `, ["x"]`). The discriminator is
// the byte immediately before `[`: a member access follows an expression
// (identifier char, `)`, `]`, or a closing quote) with no space, whereas a
// literal follows an operator/punctuator/space. Excludes the array-literal
// false positives that dominate the raw `["` count.
let b = s.as_bytes();
(1..b.len().saturating_sub(1))
.filter(|&i| b[i] == b'[' && b[i + 1] == b'"' && is_member_lhs(b[i - 1]))
.count()
}

fn is_member_lhs(p: u8) -> bool {
p.is_ascii_alphanumeric() || matches!(p, b'_' | b'$' | b')' | b']' | b'"' | b'\'')
}

/// Raw (non-distinct) occurrence count of `_0x…`-style hex-named identifiers
Expand Down
8 changes: 4 additions & 4 deletions src/passes/dce.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ fn keep_statement(s: &Statement<'_>, scoping: &Scoping) -> bool {
match s {
Statement::EmptyStatement(_) => false,
Statement::VariableDeclaration(vd) => !vd.declarations.is_empty(),
Statement::FunctionDeclaration(f) => f
.id
.as_ref()
.is_none_or(|id| !fn_decl_is_dead(id.symbol_id(), f, scoping)),
Statement::FunctionDeclaration(f) => {
f.id.as_ref()
.is_none_or(|id| !fn_decl_is_dead(id.symbol_id(), f, scoping))
}
// A bare literal statement (`80214130;`, `true;`, `null;`) computes a
// value that is immediately discarded with no side effect — pure
// obfuscation noise (often a spent opaque-predicate constant). Drop it.
Expand Down
7 changes: 7 additions & 0 deletions src/passes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod math_fingerprint;
mod member_normalize;
mod proxy_inline;
mod pure_eval;
mod reconstruct_object;
mod rename;
mod sequence_split;
mod unflatten;
Expand All @@ -35,6 +36,7 @@ pub use math_fingerprint::MathFingerprint;
pub use member_normalize::MemberNormalize;
pub use proxy_inline::ProxyInline;
pub use pure_eval::PureEval;
pub use reconstruct_object::ReconstructObject;
pub use rename::RenameByRole;
pub use sequence_split::SequenceSplit;
pub use unflatten::Unflatten;
Expand Down Expand Up @@ -102,6 +104,11 @@ pub fn default_pipeline() -> Vec<Box<dyn Pass>> {
// lift, and the const folds below reach further. Runs before them so the
// exposed work is picked up in the same round. See `src/ir/inline.rs`.
Box::new(crate::ir::InlineSingleUse),
// Fold `X = {}; X.a = …; X.b = …;` runs back into the object literal
// they were lowered from (transformObjectKeys). Runs before ProxyInline
// so operator-proxy *tables* built in this split form are reconstituted
// into the `{ m: function… }` literal ProxyInline can recognize.
Box::new(ReconstructObject::default()),
// Collapse operator-proxy wrappers (a OP b) before lifting, so decoder
// calls hidden inside proxy expansions become visible.
Box::new(ProxyInline::default()),
Expand Down
250 changes: 250 additions & 0 deletions src/passes/reconstruct_object.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
//! Object-literal reconstruction.
//!
//! `transformObjectKeys` (and hand-written packers) lower an object literal into
//! an empty object plus a run of property writes:
//!
//! ```js
//! const O = {};
//! O.name = "P1";
//! O.price = 600;
//! ```
//!
//! This pass folds that contiguous run back into the literal it came from —
//! `const O = { name: "P1", price: 600 }`. That alone improves readability, but
//! the bigger payoff is downstream: obfuscators build their *operator-proxy
//! tables* the same way (`const t = {}; t.m = function(a,b){…}; …`), so
//! reconstruction is what lets `proxy_inline` recognize the table at all, which
//! in turn folds the opaque predicates guarding dead branches so DCE can remove
//! them.
//!
//! Soundness — we fold only when the rewrite cannot change observable behavior:
//! * The seed is a single declarator `X = {}` with an *empty* object literal.
//! * We consume only the *immediately following, contiguous* statements of the
//! form `X.<staticName> = <expr>` (plain `=`, non-computed key). Contiguity
//! guarantees nothing reads the half-built object between the writes.
//! * The value expression must not reference `X` at all (even inside a nested
//! function): in the literal, `X` is not yet bound, so a value that read `X`
//! would change meaning. This is conservative (a closure-only reference would
//! be safe) but always sound.
//! * `__proto__` is excluded (its literal form has special prototype-setting
//! semantics) and a repeated key stops the run (last-write-wins is preserved
//! by not merging across a duplicate).
//!
//! Property values keep their original order, so any side effects in them run in
//! the same sequence as the original writes.

use std::collections::HashSet;

use oxc_allocator::{Allocator, Vec as ArenaVec};
use oxc_ast::ast::{
AssignmentOperator, AssignmentTarget, Expression, ObjectPropertyKind, Program, PropertyKind,
Statement,
};
use oxc_ast::AstBuilder;
use oxc_ast_visit::Visit;
use oxc_semantic::Scoping;
use oxc_syntax::symbol::SymbolId;
use oxc_traverse::{traverse_mut, Traverse, TraverseCtx};

use crate::pass::Pass;
use crate::util::build_scoping;

#[derive(Default)]
pub struct ReconstructObject {
changed: bool,
}

impl Pass for ReconstructObject {
fn name(&self) -> &'static str {
"reconstruct-object"
}

fn run<'a>(&mut self, program: &mut Program<'a>, allocator: &'a Allocator) -> bool {
let scoping = build_scoping(program);
self.changed = false;
traverse_mut(self, allocator, program, scoping, ());
self.changed
}
}

/// Symbol bound by a `X = {}` seed statement, plus a mutable handle to its
/// (currently empty) object literal is obtained later; here we just classify.
fn seed_symbol(stmt: &Statement<'_>) -> Option<SymbolId> {
let Statement::VariableDeclaration(vd) = stmt else {
return None;
};
if vd.declarations.len() != 1 {
return None;
}
let decl = &vd.declarations[0];
let id = decl.id.get_binding_identifier()?;
match &decl.init {
Some(Expression::ObjectExpression(obj)) if obj.properties.is_empty() => id.symbol_id.get(),
_ => None,
}
}

/// If `stmt` is `seed.<name> = <expr>` (plain assign, static key) for `seed`,
/// return the property name. Read-only — used to validate a run before folding.
fn member_assign_key<'a>(
stmt: &Statement<'a>,
seed: SymbolId,
scoping: &Scoping,
) -> Option<String> {
let Statement::ExpressionStatement(es) = stmt else {
return None;
};
let Expression::AssignmentExpression(a) = &es.expression else {
return None;
};
if a.operator != AssignmentOperator::Assign {
return None;
}
let AssignmentTarget::StaticMemberExpression(m) = &a.left else {
return None;
};
let Expression::Identifier(obj) = &m.object else {
return None;
};
let rid = obj.reference_id.get()?;
if scoping.get_reference(rid).symbol_id() != Some(seed) {
return None;
}
Some(m.property.name.to_string())
}

/// Does `expr` contain any reference resolving to `seed`?
fn references_symbol(expr: &Expression<'_>, seed: SymbolId, scoping: &Scoping) -> bool {
struct Finder<'s> {
seed: SymbolId,
scoping: &'s Scoping,
found: bool,
}
impl<'a, 's> Visit<'a> for Finder<'s> {
fn visit_identifier_reference(&mut self, id: &oxc_ast::ast::IdentifierReference<'a>) {
if let Some(rid) = id.reference_id.get() {
if self.scoping.get_reference(rid).symbol_id() == Some(self.seed) {
self.found = true;
}
}
}
}
let mut f = Finder {
seed,
scoping,
found: false,
};
f.visit_expression(expr);
f.found
}

impl<'a> Traverse<'a, ()> for ReconstructObject {
fn enter_statements(
&mut self,
stmts: &mut ArenaVec<'a, Statement<'a>>,
ctx: &mut TraverseCtx<'a, ()>,
) {
// Cheap precheck: any empty-object seed at all?
if !stmts.iter().any(|s| seed_symbol(s).is_some()) {
return;
}

let n = stmts.len();
let mut old: Vec<Option<Statement<'a>>> = std::mem::replace(stmts, ctx.ast.vec())
.into_iter()
.map(Some)
.collect();

let mut i = 0;
while i < n {
// Plan a fold using read-only borrows of `old` + scoping.
let plan: Option<Vec<usize>> = seed_symbol(old[i].as_ref().unwrap()).and_then(|seed| {
let scoping = ctx.scoping();
let mut consumed = Vec::new();
let mut keys = HashSet::new();
let mut j = i + 1;
while j < n {
let Some(key) = member_assign_key(old[j].as_ref().unwrap(), seed, scoping)
else {
break;
};
if key == "__proto__" || !keys.insert(key) {
break;
}
// value must not observe the not-yet-bound seed object.
let rhs = assign_rhs(old[j].as_ref().unwrap());
if references_symbol(rhs, seed, scoping) {
break;
}
consumed.push(j);
j += 1;
}
(!consumed.is_empty()).then_some(consumed)
});

match plan {
Some(consumed) => {
let next = consumed.last().unwrap() + 1;
// Build the property list, then attach it to the seed's literal.
let mut props = ctx.ast.vec_with_capacity(consumed.len());
for cj in &consumed {
let stmt = old[*cj].take().unwrap();
props.push(take_member_property(stmt, ctx.ast));
}
let mut seed_stmt = old[i].take().unwrap();
if let Statement::VariableDeclaration(vd) = &mut seed_stmt {
if let Some(Expression::ObjectExpression(obj)) =
&mut vd.declarations[0].init
{
obj.properties = props;
}
}
stmts.push(seed_stmt);
self.changed = true;
i = next;
}
None => {
stmts.push(old[i].take().unwrap());
i += 1;
}
}
}
}
}

fn assign_rhs<'b, 'a>(stmt: &'b Statement<'a>) -> &'b Expression<'a> {
let Statement::ExpressionStatement(es) = stmt else {
unreachable!()
};
let Expression::AssignmentExpression(a) = &es.expression else {
unreachable!()
};
&a.right
}

/// Consume a validated `seed.name = value` statement into the object property
/// `name: value`. Reuses the member key's arena-allocated identifier directly.
fn take_member_property<'a>(stmt: Statement<'a>, ast: AstBuilder<'a>) -> ObjectPropertyKind<'a> {
let Statement::ExpressionStatement(es) = stmt else {
unreachable!()
};
let Expression::AssignmentExpression(a) = es.unbox().expression else {
unreachable!()
};
let a = a.unbox();
let AssignmentTarget::StaticMemberExpression(m) = a.left else {
unreachable!()
};
let m = m.unbox();
let span = m.span;
let key = ast.property_key_static_identifier(span, m.property.name);
ast.object_property_kind_object_property(
span,
PropertyKind::Init,
key,
a.right,
false,
false,
false,
)
}
4 changes: 2 additions & 2 deletions src/passes/rename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ fn callback_param_roles(method: &str) -> Option<&'static [&'static str]> {
// (accumulator, currentValue, index, array)
"reduce" | "reduceRight" => &["acc", "value", "index", "arr"],
// (element, index, array)
"map" | "forEach" | "filter" | "find" | "findIndex" | "findLast"
| "findLastIndex" | "some" | "every" | "flatMap" => &["item", "index", "arr"],
"map" | "forEach" | "filter" | "find" | "findIndex" | "findLast" | "findLastIndex"
| "some" | "every" | "flatMap" => &["item", "index", "arr"],
// (a, b) comparator
"sort" => &["left", "right"],
_ => return None,
Expand Down
18 changes: 16 additions & 2 deletions tests/golden.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ fn render_generated_corpus(s: &mut String) {
continue;
}
let name = p.file_name().unwrap().to_string_lossy().into_owned();
let Some((_, profile)) = name.strip_suffix(".js").and_then(|st| st.rsplit_once("__")) else {
let Some((_, profile)) = name.strip_suffix(".js").and_then(|st| st.rsplit_once("__"))
else {
continue;
};
let src = fs::read_to_string(&p).unwrap();
Expand Down Expand Up @@ -326,7 +327,20 @@ fn push_agg_row(s: &mut String, label: &str, a: &ProfileAgg) {
}

fn count_bracket_access(s: &str) -> usize {
s.matches("[\"").count()
// Count `["` only where it is a string-keyed *member access* (`obj["x"]`),
// not an array/object literal — the byte before `[` is part of the object
// expression (identifier char, `)`, `]`, or a closing quote) with no space.
// Mirrors `src/bin/report.rs`. Excludes the array-literal false positives
// that dominate the raw `["` count.
let b = s.as_bytes();
(1..b.len().saturating_sub(1))
.filter(|&i| {
b[i] == b'['
&& b[i + 1] == b'"'
&& (b[i - 1].is_ascii_alphanumeric()
|| matches!(b[i - 1], b'_' | b'$' | b')' | b']' | b'"' | b'\''))
})
.count()
}

/// Fraction (%) of distinct identifier-ish tokens that look obfuscated: length
Expand Down
Loading
Loading