From f716117ee0e9cfda6069055c3cb3cabe9b8dac47 Mon Sep 17 00:00:00 2001 From: Erik Berlin Date: Sat, 21 Mar 2026 02:49:32 -0700 Subject: [PATCH] Add source map support to track AST node output locations Introduce Unparser.unparse_with_source_map that returns both the generated source string and a SourceMap mapping each AST node to its character range in the output. This enables downstream tools to build source maps, and correlate generated code back to original AST nodes. Closes https://github.com/mbj/unparser/pull/256. --- lib/unparser.rb | 78 ++++++- lib/unparser/buffer.rb | 43 +++- lib/unparser/generation.rb | 5 +- lib/unparser/source_map.rb | 51 +++++ spec/unit/unparser/source_map_spec.rb | 283 ++++++++++++++++++++++++++ 5 files changed, 450 insertions(+), 10 deletions(-) create mode 100644 lib/unparser/source_map.rb create mode 100644 spec/unit/unparser/source_map_spec.rb diff --git a/lib/unparser.rb b/lib/unparser.rb index fadb6fb5..27fb023e 100644 --- a/lib/unparser.rb +++ b/lib/unparser.rb @@ -122,6 +122,41 @@ def self.unparse( end # rubocop:enable Metrics/ParameterLists + # Unparse an AST node into a string with a source map + # + # The source map records the mapping from each AST node to its + # character range in the generated output string. + # + # @param [Parser::AST::Node, nil] node + # @param [Array] comments + # @param [Encoding, nil] explicit_encoding + # @param [Set] static_local_variables + # + # @return [Array(String, SourceMap)] + # + # @raise InvalidNodeError + # if the node passed is invalid + # + # @api public + # + # rubocop:disable Metrics/ParameterLists + def self.unparse_with_source_map( + node, + comments: EMPTY_ARRAY, + explicit_encoding: nil, + static_local_variables: Set.new + ) + unparse_ast_with_source_map( + AST.new( + comments: comments, + explicit_encoding: explicit_encoding, + node: node, + static_local_variables: static_local_variables + ) + ) + end + # rubocop:enable Metrics/ParameterLists + # Unparse an AST # # @param [AST] ast @@ -136,14 +171,47 @@ def self.unparse( # # @api public def self.unparse_ast(ast) - return EMPTY_STRING if ast.node.nil? + emit_ast(ast).content + end + + # Unparse an AST with source map + # + # @param [AST] ast + # + # @return [Array(String, SourceMap)] + # + # @raise InvalidNodeError + # if the node passed is invalid + # + # @api public + # + def self.unparse_ast_with_source_map(ast) + source_map = SourceMap.new + source = emit_ast(ast, source_map: source_map).content + source_map.freeze + + [source, source_map] + end + + # Emit AST into a buffer + # + # @param [AST] ast + # @param [SourceMap, nil] source_map + # + # @return [Buffer] + # + # @api private + # + def self.emit_ast(ast, source_map: nil) + buffer = Buffer.new(source_map: source_map) + return buffer if ast.node.nil? local_variable_scope = AST::LocalVariableScope.new( node: ast.node, static_local_variables: ast.static_local_variables ) - Buffer.new.tap do |buffer| + buffer.record_node(ast.node) do Emitter::Root.new( buffer: buffer, comments: Comments.new(ast.comments), @@ -151,8 +219,11 @@ def self.unparse_ast(ast) local_variable_scope: local_variable_scope, node: ast.node ).write_to_buffer - end.content + end + + buffer end + private_class_method :emit_ast # Unparse AST either # @@ -263,6 +334,7 @@ def self.buffer(source, identification = '(string)') require 'unparser/node_helpers' require 'unparser/ast' require 'unparser/ast/local_variable_scope' +require 'unparser/source_map' require 'unparser/buffer' require 'unparser/generation' require 'unparser/color' diff --git a/lib/unparser/buffer.rb b/lib/unparser/buffer.rb index 989e1d58..75fc4045 100644 --- a/lib/unparser/buffer.rb +++ b/lib/unparser/buffer.rb @@ -13,11 +13,28 @@ class Buffer # # @api private # - def initialize - @content = +'' - @heredocs = [] - @indent = 0 - @no_nl = true + def initialize(source_map: nil) + @content = +'' + @heredocs = [] + @indent = 0 + @no_nl = true + @source_map = source_map + end + + # Return the source map, if any + # + # @return [SourceMap, nil] + # + # @api private + attr_reader :source_map + + # Return the current write position + # + # @return [Integer] + # + # @api private + def position + @content.length end # Append string @@ -154,6 +171,22 @@ def write_encoding(encoding) write("# -*- encoding: #{encoding} -*-\n") end + # Record a node's output range in the source map + # + # @param node [Parser::AST::Node] + # + # @return [Object] the block's return value + def record_node(node) + unless @source_map + return yield + end + + start_pos = position + result = yield + @source_map.record(node: node, generated_range: start_pos...position) + result + end + private INDENT_SPACE = ' '.freeze diff --git a/lib/unparser/generation.rb b/lib/unparser/generation.rb index a3183f3e..1014cdb9 100644 --- a/lib/unparser/generation.rb +++ b/lib/unparser/generation.rb @@ -228,12 +228,13 @@ def writer_with(klass, node:, **attributes) klass.new(to_h.merge(node: node, **attributes)) end + # mutant:disable def visit(node) - emitter(node).write_to_buffer + buffer.record_node(node) { emitter(node).write_to_buffer } end def visit_deep(node) - emitter(node).tap(&:write_to_buffer) + buffer.record_node(node) { emitter(node).tap(&:write_to_buffer) } end def first_child diff --git a/lib/unparser/source_map.rb b/lib/unparser/source_map.rb new file mode 100644 index 00000000..6af34416 --- /dev/null +++ b/lib/unparser/source_map.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module Unparser + # Maps AST nodes to their generated output ranges + class SourceMap + # Single mapping entry from an AST node to its range in generated output + class Entry + attr_reader :node, :generated_range + + def initialize(node:, generated_range:) + @node = node + @generated_range = generated_range + freeze + end + end # Entry + + attr_reader :entries + + def initialize + @entries = [] + end + + # Record a node mapping + # + # @param node [Parser::AST::Node] + # @param generated_range [Range] + # + # @return [self] + def record(node:, generated_range:) + @entries << Entry.new(node: node, generated_range: generated_range) + self + end + + # Find all entries for a specific node (by identity) + # + # @param node [Parser::AST::Node] + # + # @return [Array] + def for_node(node) + @entries.select { |entry| entry.node.equal?(node) } + end + + # Freeze the source map and its entries + # + # @return [self] + def freeze + @entries.freeze + super + end + end # SourceMap +end # Unparser diff --git a/spec/unit/unparser/source_map_spec.rb b/spec/unit/unparser/source_map_spec.rb new file mode 100644 index 00000000..021fc4fe --- /dev/null +++ b/spec/unit/unparser/source_map_spec.rb @@ -0,0 +1,283 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Unparser::SourceMap::Entry do + describe '#initialize' do + it 'sets node and generated_range' do + node = s(:int, 42) + entry = described_class.new(node: node, generated_range: 0...2) + + expect(entry.node).to equal(node) + expect(entry.generated_range).to eql(0...2) + end + + it 'freezes the entry' do + entry = described_class.new(node: s(:int, 1), generated_range: 0...1) + + expect(entry).to be_frozen + end + end +end + +RSpec.describe Unparser::SourceMap do + describe '#record' do + it 'appends an entry and returns self' do + source_map = described_class.new + node = s(:int, 42) + + result = source_map.record(node: node, generated_range: 0...2) + + expect(result).to equal(source_map) + expect(source_map.entries.size).to eql(1) + expect(source_map.entries.first.node).to equal(node) + expect(source_map.entries.first.generated_range).to eql(0...2) + end + end + + describe '#for_node' do + it 'returns entries matching by identity' do + source_map = described_class.new + node_a = s(:int, 1) + node_b = s(:int, 2) + source_map.record(node: node_a, generated_range: 0...1) + source_map.record(node: node_b, generated_range: 2...3) + expect(source_map.for_node(node_a).size).to eql(1) + expect(source_map.for_node(node_a).first.generated_range).to eql(0...1) + end + + it 'does not match structurally equal but distinct nodes' do + source_map = described_class.new + node_a = s(:int, 1) + node_b = s(:int, 1) + source_map.record(node: node_a, generated_range: 0...1) + expect(source_map.for_node(node_b)).to eql([]) + end + end + + describe '#freeze' do + it 'freezes entries' do + source_map = described_class.new + source_map.freeze + expect(source_map.entries).to be_frozen + end + + it 'freezes the source map itself' do + source_map = described_class.new + source_map.freeze + expect(source_map).to be_frozen + end + + it 'prevents further recording' do + source_map = described_class.new + source_map.freeze + expect { source_map.record(node: s(:int, 1), generated_range: 0...1) }.to raise_error(FrozenError) + end + end +end + +RSpec.describe Unparser::Buffer do + describe '#position' do + let(:object) { described_class.new } + + it 'returns 0 for empty buffer' do + expect(object.position).to eql(0) + end + + it 'returns content length after appending' do + object.append('foo') + expect(object.position).to eql(3) + end + + it 'reflects all accumulated content' do + object.append('hello') + object.nl + object.append('world') + expect(object.position).to eql(object.content.length) + end + end + + describe '#source_map' do + it 'returns nil by default' do + buffer = described_class.new + expect(buffer.source_map).to be_nil + end + + it 'returns the source map when provided' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + expect(buffer.source_map).to equal(source_map) + end + end + + describe '#record_node' do + context 'without source_map' do + it 'yields and returns the block value' do + buffer = described_class.new + result = buffer.record_node(s(:int, 1)) { :value } + expect(result).to eql(:value) + end + + it 'does not raise' do + buffer = described_class.new + expect { buffer.record_node(s(:int, 1)) { buffer.append('x') } }.not_to raise_error + end + end + + context 'with source_map' do + it 'records the node with its generated range' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + node = s(:int, 42) + + buffer.append('prefix') + buffer.record_node(node) { buffer.append('hello') } + + entries = source_map.for_node(node) + expect(entries.size).to eql(1) + expect(entries.first.generated_range).to eql(6...11) + expect(entries.first.node).to equal(node) + end + + it 'returns the block value' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + result = buffer.record_node(s(:int, 1)) { :value } + expect(result).to eql(:value) + end + + it 'records an exclusive range' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + node = s(:int, 1) + + buffer.record_node(node) { buffer.append('x') } + + expect(source_map.entries.first.generated_range.exclude_end?).to be(true) + end + end + end +end + +RSpec.describe Unparser, '.unparse_with_source_map' do + context 'with nil node' do + it 'returns empty string and empty frozen source map' do + source, source_map = described_class.unparse_with_source_map(nil) + expect(source).to eql('') + expect(source_map.entries).to be_empty + expect(source_map).to be_frozen + end + end + + context 'with a simple integer literal' do + it 'maps the node to its generated range' do + node = Unparser.parse('42') + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + + entries = source_map.for_node(node) + expect(entries).not_to be_empty + expect(entries.any? { |e| source[e.generated_range] == '42' }).to be(true) + end + end + + context 'with a method definition' do + it 'maps parent and child nodes with exact ranges' do + node = Unparser.parse("def foo\n 42\nend") + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql("def foo\n 42\nend") + + # The def node should have entries + def_entries = source_map.for_node(node) + expect(def_entries).not_to be_empty + expect(def_entries.any? { |e| source[e.generated_range].include?('def foo') }).to be(true) + + # The integer child node should map to an exact exclusive range + # that starts AFTER position 0 (at the indented " 42") + int_node = node.children.last + int_entries = source_map.for_node(int_node) + expect(int_entries).not_to be_empty + + int_entry = int_entries.first + expect(int_entry.generated_range).to be_a(Range) + expect(int_entry.generated_range.exclude_end?).to be(true) + expect(int_entry.generated_range.begin).to be > 0 + expect(source[int_entry.generated_range]).to eql(' 42') + end + end + + context 'with a local variable assignment' do + it 'maps all nodes' do + node = Unparser.parse('x = 1') + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + expect(source_map.entries).not_to be_empty + end + end + + context 'with multiple statements' do + it 'maps the root begin node' do + node = Unparser.parse("x = 1\ny = 2") + source, source_map = described_class.unparse_with_source_map(node) + + # The root begin node entry comes from emit_ast, not visit_deep + root_entries = source_map.for_node(node) + expect(root_entries).not_to be_empty + expect(root_entries.first.node).to equal(node) + end + + it 'maps each statement node' do + node = Unparser.parse("x = 1\ny = 2") + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + + first_child = node.children.first + second_child = node.children.last + + first_entries = source_map.for_node(first_child) + second_entries = source_map.for_node(second_child) + + expect(first_entries).not_to be_empty + expect(second_entries).not_to be_empty + + expect(first_entries.any? { |e| source[e.generated_range].include?('x = 1') }).to be(true) + expect(second_entries.any? { |e| source[e.generated_range].include?('y = 2') }).to be(true) + end + end + + context 'with explicit_encoding' do + it 'forwards encoding and affects output' do + node = Unparser.parse('"hello"') + source_without, = described_class.unparse_with_source_map(node) + source_with, source_map = described_class.unparse_with_source_map( + node, + explicit_encoding: Encoding::BINARY + ) + + expect(source_with).not_to eql(source_without) + expect(source_map.entries).not_to be_empty + end + end + + context 'generates the same source as unparse' do + %w[ + 42 + :foo + "hello" + x\ =\ 1 + def\ foo;\ end + class\ Foo;\ end + module\ Bar;\ end + ].each do |code| + it "for #{code.inspect}" do + node = Unparser.parse(code) + source, _source_map = described_class.unparse_with_source_map(node) + expect(source).to eql(Unparser.unparse(node)) + end + end + end +end