Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Compilation/CompilationContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,19 @@ public IReadOnlyList<string> TakePendingLoopLabels()
return null;
}

/// <summary>
/// If <paramref name="variableName"/> currently binds to a promoted string-accumulator local
/// (a slot whose CLR type is <c>StringBuilder</c>, declared by the #857 promotion path), returns its
/// <see cref="LocalBuilder"/>; otherwise null. The slot's CLR type is the single source of truth, so
/// this is automatically scope-correct under shadowing and never misfires for a captured/object local
/// (no other code path declares a user local with a <c>StringBuilder</c> slot).
/// </summary>
public LocalBuilder? TryGetPromotedStringAccumulator(string variableName)
{
if (!Locals.TryGetLocal(variableName, out var local)) return null;
return Locals.GetLocalType(variableName) == Types.StringBuilder ? local : null;
}

/// <summary>
/// Resolves the generated shape struct for a promoted object-literal local by its canonical shape
/// key (#862), or null if shapes are not threaded into this context / the key is unknown. Used at the
Expand Down
2 changes: 2 additions & 0 deletions Compilation/ILCompiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ public void Compile(List<Stmt> statements, TypeMap typeMap, DeadCodeInfo? deadCo
Phase1_EmitRuntimeTypes();
Phase2_AnalyzeClosures(statements);
ArrayLocalPromotionAnalyzer.Analyze(statements, _typeMap, _closures.Analyzer);
StringAccumulatorPromotionAnalyzer.Analyze(statements, _typeMap, _closures.Analyzer);
NonEscapingArrowLocalAnalyzer.Analyze(statements, _closures.DirectCallArrowBindings, _closures.Analyzer);
ObjectLocalPromotionAnalyzer.Analyze(statements, _typeMap, _closures.Analyzer);
Phase3_CreateProgramType();
Expand Down Expand Up @@ -955,6 +956,7 @@ public void CompileModules(List<ParsedModule> modules, ModuleResolver resolver,
Phase1_EmitRuntimeTypes();
Phase2_AnalyzeClosures(allStatements);
ArrayLocalPromotionAnalyzer.Analyze(allStatements, _typeMap, _closures.Analyzer);
StringAccumulatorPromotionAnalyzer.Analyze(allStatements, _typeMap, _closures.Analyzer);
NonEscapingArrowLocalAnalyzer.Analyze(allStatements, _closures.DirectCallArrowBindings, _closures.Analyzer);
ObjectLocalPromotionAnalyzer.Analyze(allStatements, _typeMap, _closures.Analyzer);
Phase3_CreateProgramType();
Expand Down
10 changes: 10 additions & 0 deletions Compilation/ILEmitter.Calls.MethodDispatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ private void EmitMethodCall(Expr.Get methodGet, List<Expr> arguments)
return;
}

// Promoted string-accumulator charCodeAt (#857): read the StringBuilder's UTF-16 code unit
// directly via its [int] indexer (== JS charCodeAt); out-of-range yields NaN (JS semantics).
// Must intercept before the string-method path, which would emit the slot as a string receiver.
if (methodName == "charCodeAt" && !methodGet.Optional && methodGet.Object is Expr.Variable ccVar
&& _ctx.TryGetPromotedStringAccumulator(ccVar.Name.Lexeme) is { } ccSb)
{
EmitPromotedStringCharCodeAt(ccSb, arguments);
return;
}

// Try direct dispatch for known class instance methods
TypeSystem.TypeInfo? objType = _ctx.TypeMap?.Get(methodGet.Object);
if (TryEmitDirectMethodCall(methodGet.Object, objType, methodName, arguments))
Expand Down
17 changes: 17 additions & 0 deletions Compilation/ILEmitter.Expressions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,23 @@ protected override void EmitAssign(Expr.Assign a)
// rest of the module body and ultimately trips PathStackDepth at the final ret.
if (TryEmitCjsAssign(a)) return;

// Promoted string-accumulator append (#857): `s = s + E` where `s` is a StringBuilder slot.
// Emit `sb.Append(E)` instead of evaluating `s + E` (String.Concat) and storing — turning the
// O(n²) accumulation into O(n). The analyzer promotes `s` only when every such append is in
// statement position, so the Append-returned builder left on the stack is the single value
// `Stmt.Expression` pops (the discarded assignment-expression result).
if (a.Value is Expr.Binary { Operator.Type: TokenType.PLUS, Left: Expr.Variable accLeft } accBin
&& accLeft.Name.Lexeme == a.Name.Lexeme
&& _ctx.TryGetPromotedStringAccumulator(a.Name.Lexeme) is { } accSb)
{
IL.Emit(OpCodes.Ldloc, accSb);
EmitExpression(accBin.Right);
EnsureString();
IL.Emit(OpCodes.Callvirt, _ctx.Types.StringBuilderAppendString);
SetStackUnknown();
return;
}

EmitExpression(a.Value);

// 0. Per-iteration loop-binding cell (#650): write through the StrongBox so the
Expand Down
14 changes: 14 additions & 0 deletions Compilation/ILEmitter.Operators.cs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,20 @@ protected override void EmitUnary(Expr.Unary u)

protected override void EmitCompoundAssign(Expr.CompoundAssign ca)
{
// Promoted string-accumulator append (#857): `s += E` where `s` is a StringBuilder slot →
// `sb.Append(E)`. Promotion guarantees statement position, so the Append-returned builder on the
// stack is the value Stmt.Expression pops. See EmitAssign and StringAccumulatorPromotionAnalyzer.
if (ca.Operator.Type == TokenType.PLUS_EQUAL
&& _ctx.TryGetPromotedStringAccumulator(ca.Name.Lexeme) is { } accSb)
{
IL.Emit(OpCodes.Ldloc, accSb);
EmitExpression(ca.Value);
EnsureString();
IL.Emit(OpCodes.Callvirt, _ctx.Types.StringBuilderAppendString);
SetStackUnknown();
return;
}

var local = _ctx.Locals.GetLocal(ca.Name.Lexeme);
FieldBuilder? topLevelField = null;
_ctx.TopLevelStaticVars?.TryGetValue(ca.Name.Lexeme, out topLevelField);
Expand Down
53 changes: 53 additions & 0 deletions Compilation/ILEmitter.Properties.cs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,18 @@ protected override void EmitGet(Expr.Get g)
return;
}

// Promoted string-accumulator `.length` (#857): direct StringBuilder.Length. .NET StringBuilder
// .Length is UTF-16 code units, identical to JS string .length — no materialization.
if (!g.Optional && g.Name.Lexeme == "length" && g.Object is Expr.Variable accLenVar
&& _ctx.TryGetPromotedStringAccumulator(accLenVar.Name.Lexeme) is { } accLenSb)
{
IL.Emit(OpCodes.Ldloc, accLenSb);
IL.Emit(OpCodes.Callvirt, _ctx.Types.GetProperty(_ctx.Types.StringBuilder, "Length").GetGetMethod()!);
IL.Emit(OpCodes.Conv_R8);
SetStackType(StackType.Double);
return;
}

// Try direct getter dispatch for known class instance types
TypeInfo? objType = _ctx.TypeMap?.Get(g.Object);
if (TryEmitDirectGetterCall(g.Object, objType, g.Name.Lexeme))
Expand Down Expand Up @@ -1234,6 +1246,47 @@ private void EmitPromotedArrayPush(LocalBuilder list, ArrayElementsDescriptor de
SetStackType(StackType.Double);
}

/// <summary>
/// Emits <c>s.charCodeAt(i)</c> for a promoted string-accumulator (StringBuilder slot): reads the
/// UTF-16 code unit directly via the <c>this[int]</c> indexer (identical to JS charCodeAt), with an
/// out-of-range (incl. negative, via unsigned compare) result of NaN. Leaves a boxed double, matching
/// the string-method call convention. See EmitMethodCall and StringAccumulatorPromotionAnalyzer.
/// </summary>
private void EmitPromotedStringCharCodeAt(LocalBuilder sb, List<Expr> arguments)
{
var getLength = _ctx.Types.GetProperty(_ctx.Types.StringBuilder, "Length").GetGetMethod()!;
var getChars = _ctx.Types.GetMethod(_ctx.Types.StringBuilder, "get_Chars", _ctx.Types.Int32);

var idxLocal = IL.DeclareLocal(_ctx.Types.Int32);
if (arguments.Count > 0) EmitExpressionAsDouble(arguments[0]);
else IL.Emit(OpCodes.Ldc_R8, 0.0);
IL.Emit(OpCodes.Conv_I4);
IL.Emit(OpCodes.Stloc, idxLocal);

var oob = IL.DefineLabel();
var end = IL.DefineLabel();

// if ((uint)idx >= (uint)sb.Length) -> NaN (unsigned fold catches negative indices too)
IL.Emit(OpCodes.Ldloc, idxLocal);
IL.Emit(OpCodes.Ldloc, sb);
IL.Emit(OpCodes.Callvirt, getLength);
IL.Emit(OpCodes.Bge_Un, oob);

IL.Emit(OpCodes.Ldloc, sb);
IL.Emit(OpCodes.Ldloc, idxLocal);
IL.Emit(OpCodes.Callvirt, getChars);
IL.Emit(OpCodes.Conv_R8);
IL.Emit(OpCodes.Box, _ctx.Types.Double);
IL.Emit(OpCodes.Br, end);

IL.MarkLabel(oob);
IL.Emit(OpCodes.Ldc_R8, double.NaN);
IL.Emit(OpCodes.Box, _ctx.Types.Double);

IL.MarkLabel(end);
SetStackUnknown();
}

/// <summary>
/// Emits the common List&lt;object?&gt; / $Array set path with frozen checks and fallback.
/// Shared by all descriptor-driven SetIndex paths (typed miss fallthrough and object direct).
Expand Down
17 changes: 17 additions & 0 deletions Compilation/ILEmitter.Statements.cs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,23 @@ protected override void EmitVarDeclaration(Stmt.Var v)
return;
}

// Append-only string-accumulator promotion (#857): a provably non-escaping `string` local with
// a string-literal initializer, used only via `s = s + str`/`s += str` (statement position),
// `s.length`, and `s.charCodeAt(i)`, is backed by a StringBuilder slot — turning O(n²) repeated
// String.Concat (each copies the whole accumulator) into amortized-O(1) Append. StringBuilder
// .Length and the [i] indexer are UTF-16 code units, identical to JS .length/charCodeAt, so those
// reads need no materialization. Reached only AFTER the capture branches above (the analyzer
// excludes captured names); the append/length/charCodeAt fast paths key off the slot's CLR type.
if (_ctx.TypeMap != null && _ctx.TypeMap.IsPromotableStringAccumulator(v.Name)
&& v.Initializer is Expr.Literal { Value: string seedStr })
{
var sbLocal = _ctx.Locals.DeclareLocal(v.Name.Lexeme, _ctx.Types.StringBuilder);
IL.Emit(OpCodes.Ldstr, seedStr);
IL.Emit(OpCodes.Newobj, _ctx.Types.StringBuilderStringCtor);
IL.Emit(OpCodes.Stloc, sbLocal);
return;
}

// Determine if this local can use unboxed double type
Type localType = CanUseUnboxedLocal(v) ? _ctx.Types.Double : _ctx.Types.Object;
var local = _ctx.Locals.DeclareLocal(v.Name.Lexeme, localType);
Expand Down
182 changes: 182 additions & 0 deletions Compilation/StringAccumulatorPromotionAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
using SharpTS.Parsing;
using SharpTS.Parsing.Visitors;
using SharpTS.TypeSystem;

namespace SharpTS.Compilation;

/// <summary>
/// Whole-program analysis that flags <c>string</c> local declarations which can be promoted from the
/// default <c>object</c> slot to a concrete <c>StringBuilder</c> slot (#857). Repeated
/// <c>s = s + str</c> / <c>s += str</c> on an <c>object</c>/<c>string</c> slot lowers to
/// <c>String.Concat</c>, which copies the whole accumulator every iteration — O(n²). A StringBuilder
/// slot turns that into amortized-O(1) <c>Append</c> (O(n) total). <c>StringBuilder.Length</c> and the
/// <c>this[int]</c> indexer are UTF-16 code units, identical to JS <c>.length</c> and
/// <c>charCodeAt(i)</c>, so those reads need no materialization.
///
/// <para>Deliberately-conservative first cut (mirrors <see cref="ArrayLocalPromotionAnalyzer"/>): a local
/// is promoted only when provably non-escaping AND every use is one of a tiny permitted set, so the bare
/// <c>StringBuilder</c> (which is NOT a <c>string</c>) is never observed anywhere that expects a string.
/// A candidate <c>s</c> qualifies iff ALL hold:</para>
/// <list type="number">
/// <item>declared <c>const</c>/<c>let</c> with a string-literal initializer (<c>""</c> or any <c>"…"</c>);</item>
/// <item>the name is declared exactly once in the whole program (conservative shadowing guard);</item>
/// <item>the name is not captured by any closure (a captured local is routed to an <c>object</c>
/// display-class field, never a typed slot);</item>
/// <item>every use is one of: an <b>append in statement position</b> (<c>s = s + E</c> or <c>s += E</c>
/// as an expression statement, where <c>E</c> is statically <c>string</c> and does not reference
/// <c>s</c>), <c>s.length</c>, or <c>s.charCodeAt(i)</c>. Any other appearance — return, argument
/// pass, <c>s[i]</c>, other method/property, comparison, template literal, reassignment to a
/// non-append value, a non-string append, or an append used as a value — disqualifies it.</item>
/// </list>
///
/// <para>Append must be in statement position because <c>s = s + E</c> evaluates to the new string; with a
/// StringBuilder slot that result cannot be produced without an O(n) <c>ToString()</c>. As an expression
/// statement the result is discarded (<c>Stmt.Expression</c> pops it), so the emitter leaves the
/// <c>Append</c>-returned builder on the stack as the one popped value. Materialize-on-escape
/// (<c>return s</c>, pass, index, other methods) is a deliberate Phase-2 follow-up.</para>
/// </summary>
public static class StringAccumulatorPromotionAnalyzer
{
public static void Analyze(List<Stmt> program, TypeMap? typeMap, ClosureAnalyzer? closures)
{
if (typeMap == null) return;

var visitor = new Visitor(typeMap);
foreach (var stmt in program)
visitor.Visit(stmt);

foreach (var (key, nameToken) in visitor.Candidates)
{
if (visitor.Disqualified.Contains(key)) continue;
if (visitor.DeclCount.GetValueOrDefault(key) != 1) continue;
// IsVariableCaptured is lexeme-global (conservative): a captured local is routed to an
// object display-class field, never a StringBuilder slot, so capture must disqualify.
if (closures?.IsVariableCaptured(key.Name) == true) continue;
typeMap.MarkPromotableStringAccumulator(nameToken);
}
}

private sealed class Visitor(TypeMap typeMap) : AstVisitorBase
{
private readonly TypeMap _typeMap = typeMap;

// Candidacy/disqualification is keyed by (function scope, lexeme), NOT whole-program lexeme:
// a common accumulator name like `s` in one function must not be poisoned by an unrelated,
// escaping `s` in another (e.g. perf_hooks's `const s = findMark(...)` in a bundle). Each
// function/arrow body is its own scope; cross-scope references are captures, handled by the
// IsVariableCaptured guard. The module top level is scope 0.
private int _scope;
private int _nextScope;

/// <summary>(scope, name) → candidate declaration's name token (string-literal-initialized local).</summary>
public Dictionary<(int Scope, string Name), Token> Candidates { get; } = new();

/// <summary>How many times each (scope, name) is declared.</summary>
public Dictionary<(int Scope, string Name), int> DeclCount { get; } = new();

/// <summary>(scope, name) pairs with at least one disqualifying occurrence.</summary>
public HashSet<(int Scope, string Name)> Disqualified { get; } = new();

protected override void VisitFunction(Stmt.Function stmt) => InScope(() => base.VisitFunction(stmt));
protected override void VisitArrowFunction(Expr.ArrowFunction expr) => InScope(() => base.VisitArrowFunction(expr));

private void InScope(Action body)
{
var saved = _scope;
_scope = ++_nextScope;
body();
_scope = saved;
}

protected override void VisitVar(Stmt.Var stmt) =>
HandleDeclaration(stmt.Name, stmt.Initializer);

protected override void VisitConst(Stmt.Const stmt) =>
HandleDeclaration(stmt.Name, stmt.Initializer);

private void HandleDeclaration(Token name, Expr? initializer)
{
var key = (_scope, name.Lexeme);
DeclCount[key] = DeclCount.GetValueOrDefault(key) + 1;

if (initializer is Expr.Literal { Value: string } && !Candidates.ContainsKey(key))
Candidates[key] = name;

// A string-literal initializer has no sub-uses, but a non-candidate initializer may
// reference other accumulators.
if (initializer is not Expr.Literal { Value: string } && initializer != null)
Visit(initializer);
}

protected override void VisitExpressionStmt(Stmt.Expression stmt)
{
// Permitted append in statement position (result discarded): `s = s + E` / `s += E`
// with E statically string. Consume by visiting ONLY E — not the target, not the inner
// `s` read. Visiting E still disqualifies if E references s (the VisitVariable catch-all)
// or escapes any other accumulator.
switch (stmt.Expr)
{
case Expr.Assign { Value: Expr.Binary { Operator.Type: TokenType.PLUS, Left: Expr.Variable lv } bin } asg
when lv.Name.Lexeme == asg.Name.Lexeme && IsStaticString(bin.Right):
Visit(bin.Right);
return;
case Expr.CompoundAssign { Operator.Type: TokenType.PLUS_EQUAL } ca when IsStaticString(ca.Value):
Visit(ca.Value);
return;
}
base.VisitExpressionStmt(stmt);
}

protected override void VisitGet(Expr.Get expr)
{
// `s.length` — permitted; skip the receiver variable.
if (expr.Name.Lexeme == "length" && expr.Object is Expr.Variable && !expr.Optional)
return;
Visit(expr.Object);
}

protected override void VisitCall(Expr.Call expr)
{
// `s.charCodeAt(i)` — permitted; visit the index args but skip the receiver variable.
if (expr.Callee is Expr.Get { Object: Expr.Variable, Optional: false } get
&& get.Name.Lexeme == "charCodeAt")
{
foreach (var arg in expr.Arguments)
Visit(arg);
return;
}
base.VisitCall(expr);
}

protected override void VisitAssign(Expr.Assign expr)
{
// Reached only when NOT consumed as a statement-position append above — i.e. a reassign,
// a non-string/prepend append, or an append used as a value. Disqualify.
Disqualified.Add((_scope, expr.Name.Lexeme));
base.VisitAssign(expr);
}

protected override void VisitCompoundAssign(Expr.CompoundAssign expr)
{
Disqualified.Add((_scope, expr.Name.Lexeme));
base.VisitCompoundAssign(expr);
}

protected override void VisitVariable(Expr.Variable expr)
{
// Catch-all: any bare variable occurrence not consumed by a permitted-use override above
// is an escape (returned, passed, indexed, compared, concatenated as a value, etc.).
Disqualified.Add((_scope, expr.Name.Lexeme));
}

private bool IsStaticString(Expr e) => IsStringTypeInfo(_typeMap.Get(e));

private static bool IsStringTypeInfo(TypeInfo? type) => type switch
{
TypeInfo.String => true,
TypeInfo.StringLiteral => true,
TypeInfo.Union u => u.FlattenedTypes.All(IsStringTypeInfo),
_ => false
};
}
}
Loading
Loading