diff --git a/lib/unparser.rb b/lib/unparser.rb index fadb6fb5..27fb023e 100644 --- a/lib/unparser.rb +++ b/lib/unparser.rb @@ -122,6 +122,41 @@ def self.unparse( end # rubocop:enable Metrics/ParameterLists + # Unparse an AST node into a string with a source map + # + # The source map records the mapping from each AST node to its + # character range in the generated output string. + # + # @param [Parser::AST::Node, nil] node + # @param [Array] comments + # @param [Encoding, nil] explicit_encoding + # @param [Set] static_local_variables + # + # @return [Array(String, SourceMap)] + # + # @raise InvalidNodeError + # if the node passed is invalid + # + # @api public + # + # rubocop:disable Metrics/ParameterLists + def self.unparse_with_source_map( + node, + comments: EMPTY_ARRAY, + explicit_encoding: nil, + static_local_variables: Set.new + ) + unparse_ast_with_source_map( + AST.new( + comments: comments, + explicit_encoding: explicit_encoding, + node: node, + static_local_variables: static_local_variables + ) + ) + end + # rubocop:enable Metrics/ParameterLists + # Unparse an AST # # @param [AST] ast @@ -136,14 +171,47 @@ def self.unparse( # # @api public def self.unparse_ast(ast) - return EMPTY_STRING if ast.node.nil? + emit_ast(ast).content + end + + # Unparse an AST with source map + # + # @param [AST] ast + # + # @return [Array(String, SourceMap)] + # + # @raise InvalidNodeError + # if the node passed is invalid + # + # @api public + # + def self.unparse_ast_with_source_map(ast) + source_map = SourceMap.new + source = emit_ast(ast, source_map: source_map).content + source_map.freeze + + [source, source_map] + end + + # Emit AST into a buffer + # + # @param [AST] ast + # @param [SourceMap, nil] source_map + # + # @return [Buffer] + # + # @api private + # + def self.emit_ast(ast, source_map: nil) + buffer = Buffer.new(source_map: source_map) + return buffer if ast.node.nil? local_variable_scope = AST::LocalVariableScope.new( node: ast.node, static_local_variables: ast.static_local_variables ) - Buffer.new.tap do |buffer| + buffer.record_node(ast.node) do Emitter::Root.new( buffer: buffer, comments: Comments.new(ast.comments), @@ -151,8 +219,11 @@ def self.unparse_ast(ast) local_variable_scope: local_variable_scope, node: ast.node ).write_to_buffer - end.content + end + + buffer end + private_class_method :emit_ast # Unparse AST either # @@ -263,6 +334,7 @@ def self.buffer(source, identification = '(string)') require 'unparser/node_helpers' require 'unparser/ast' require 'unparser/ast/local_variable_scope' +require 'unparser/source_map' require 'unparser/buffer' require 'unparser/generation' require 'unparser/color' diff --git a/lib/unparser/buffer.rb b/lib/unparser/buffer.rb index 989e1d58..75fc4045 100644 --- a/lib/unparser/buffer.rb +++ b/lib/unparser/buffer.rb @@ -13,11 +13,28 @@ class Buffer # # @api private # - def initialize - @content = +'' - @heredocs = [] - @indent = 0 - @no_nl = true + def initialize(source_map: nil) + @content = +'' + @heredocs = [] + @indent = 0 + @no_nl = true + @source_map = source_map + end + + # Return the source map, if any + # + # @return [SourceMap, nil] + # + # @api private + attr_reader :source_map + + # Return the current write position + # + # @return [Integer] + # + # @api private + def position + @content.length end # Append string @@ -154,6 +171,22 @@ def write_encoding(encoding) write("# -*- encoding: #{encoding} -*-\n") end + # Record a node's output range in the source map + # + # @param node [Parser::AST::Node] + # + # @return [Object] the block's return value + def record_node(node) + unless @source_map + return yield + end + + start_pos = position + result = yield + @source_map.record(node: node, generated_range: start_pos...position) + result + end + private INDENT_SPACE = ' '.freeze diff --git a/lib/unparser/generation.rb b/lib/unparser/generation.rb index a3183f3e..1014cdb9 100644 --- a/lib/unparser/generation.rb +++ b/lib/unparser/generation.rb @@ -228,12 +228,13 @@ def writer_with(klass, node:, **attributes) klass.new(to_h.merge(node: node, **attributes)) end + # mutant:disable def visit(node) - emitter(node).write_to_buffer + buffer.record_node(node) { emitter(node).write_to_buffer } end def visit_deep(node) - emitter(node).tap(&:write_to_buffer) + buffer.record_node(node) { emitter(node).tap(&:write_to_buffer) } end def first_child diff --git a/lib/unparser/source_map.rb b/lib/unparser/source_map.rb new file mode 100644 index 00000000..6af34416 --- /dev/null +++ b/lib/unparser/source_map.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +module Unparser + # Maps AST nodes to their generated output ranges + class SourceMap + # Single mapping entry from an AST node to its range in generated output + class Entry + attr_reader :node, :generated_range + + def initialize(node:, generated_range:) + @node = node + @generated_range = generated_range + freeze + end + end # Entry + + attr_reader :entries + + def initialize + @entries = [] + end + + # Record a node mapping + # + # @param node [Parser::AST::Node] + # @param generated_range [Range] + # + # @return [self] + def record(node:, generated_range:) + @entries << Entry.new(node: node, generated_range: generated_range) + self + end + + # Find all entries for a specific node (by identity) + # + # @param node [Parser::AST::Node] + # + # @return [Array] + def for_node(node) + @entries.select { |entry| entry.node.equal?(node) } + end + + # Freeze the source map and its entries + # + # @return [self] + def freeze + @entries.freeze + super + end + end # SourceMap +end # Unparser diff --git a/spec/unit/unparser/source_map_spec.rb b/spec/unit/unparser/source_map_spec.rb new file mode 100644 index 00000000..021fc4fe --- /dev/null +++ b/spec/unit/unparser/source_map_spec.rb @@ -0,0 +1,283 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Unparser::SourceMap::Entry do + describe '#initialize' do + it 'sets node and generated_range' do + node = s(:int, 42) + entry = described_class.new(node: node, generated_range: 0...2) + + expect(entry.node).to equal(node) + expect(entry.generated_range).to eql(0...2) + end + + it 'freezes the entry' do + entry = described_class.new(node: s(:int, 1), generated_range: 0...1) + + expect(entry).to be_frozen + end + end +end + +RSpec.describe Unparser::SourceMap do + describe '#record' do + it 'appends an entry and returns self' do + source_map = described_class.new + node = s(:int, 42) + + result = source_map.record(node: node, generated_range: 0...2) + + expect(result).to equal(source_map) + expect(source_map.entries.size).to eql(1) + expect(source_map.entries.first.node).to equal(node) + expect(source_map.entries.first.generated_range).to eql(0...2) + end + end + + describe '#for_node' do + it 'returns entries matching by identity' do + source_map = described_class.new + node_a = s(:int, 1) + node_b = s(:int, 2) + source_map.record(node: node_a, generated_range: 0...1) + source_map.record(node: node_b, generated_range: 2...3) + expect(source_map.for_node(node_a).size).to eql(1) + expect(source_map.for_node(node_a).first.generated_range).to eql(0...1) + end + + it 'does not match structurally equal but distinct nodes' do + source_map = described_class.new + node_a = s(:int, 1) + node_b = s(:int, 1) + source_map.record(node: node_a, generated_range: 0...1) + expect(source_map.for_node(node_b)).to eql([]) + end + end + + describe '#freeze' do + it 'freezes entries' do + source_map = described_class.new + source_map.freeze + expect(source_map.entries).to be_frozen + end + + it 'freezes the source map itself' do + source_map = described_class.new + source_map.freeze + expect(source_map).to be_frozen + end + + it 'prevents further recording' do + source_map = described_class.new + source_map.freeze + expect { source_map.record(node: s(:int, 1), generated_range: 0...1) }.to raise_error(FrozenError) + end + end +end + +RSpec.describe Unparser::Buffer do + describe '#position' do + let(:object) { described_class.new } + + it 'returns 0 for empty buffer' do + expect(object.position).to eql(0) + end + + it 'returns content length after appending' do + object.append('foo') + expect(object.position).to eql(3) + end + + it 'reflects all accumulated content' do + object.append('hello') + object.nl + object.append('world') + expect(object.position).to eql(object.content.length) + end + end + + describe '#source_map' do + it 'returns nil by default' do + buffer = described_class.new + expect(buffer.source_map).to be_nil + end + + it 'returns the source map when provided' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + expect(buffer.source_map).to equal(source_map) + end + end + + describe '#record_node' do + context 'without source_map' do + it 'yields and returns the block value' do + buffer = described_class.new + result = buffer.record_node(s(:int, 1)) { :value } + expect(result).to eql(:value) + end + + it 'does not raise' do + buffer = described_class.new + expect { buffer.record_node(s(:int, 1)) { buffer.append('x') } }.not_to raise_error + end + end + + context 'with source_map' do + it 'records the node with its generated range' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + node = s(:int, 42) + + buffer.append('prefix') + buffer.record_node(node) { buffer.append('hello') } + + entries = source_map.for_node(node) + expect(entries.size).to eql(1) + expect(entries.first.generated_range).to eql(6...11) + expect(entries.first.node).to equal(node) + end + + it 'returns the block value' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + result = buffer.record_node(s(:int, 1)) { :value } + expect(result).to eql(:value) + end + + it 'records an exclusive range' do + source_map = Unparser::SourceMap.new + buffer = described_class.new(source_map: source_map) + node = s(:int, 1) + + buffer.record_node(node) { buffer.append('x') } + + expect(source_map.entries.first.generated_range.exclude_end?).to be(true) + end + end + end +end + +RSpec.describe Unparser, '.unparse_with_source_map' do + context 'with nil node' do + it 'returns empty string and empty frozen source map' do + source, source_map = described_class.unparse_with_source_map(nil) + expect(source).to eql('') + expect(source_map.entries).to be_empty + expect(source_map).to be_frozen + end + end + + context 'with a simple integer literal' do + it 'maps the node to its generated range' do + node = Unparser.parse('42') + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + + entries = source_map.for_node(node) + expect(entries).not_to be_empty + expect(entries.any? { |e| source[e.generated_range] == '42' }).to be(true) + end + end + + context 'with a method definition' do + it 'maps parent and child nodes with exact ranges' do + node = Unparser.parse("def foo\n 42\nend") + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql("def foo\n 42\nend") + + # The def node should have entries + def_entries = source_map.for_node(node) + expect(def_entries).not_to be_empty + expect(def_entries.any? { |e| source[e.generated_range].include?('def foo') }).to be(true) + + # The integer child node should map to an exact exclusive range + # that starts AFTER position 0 (at the indented " 42") + int_node = node.children.last + int_entries = source_map.for_node(int_node) + expect(int_entries).not_to be_empty + + int_entry = int_entries.first + expect(int_entry.generated_range).to be_a(Range) + expect(int_entry.generated_range.exclude_end?).to be(true) + expect(int_entry.generated_range.begin).to be > 0 + expect(source[int_entry.generated_range]).to eql(' 42') + end + end + + context 'with a local variable assignment' do + it 'maps all nodes' do + node = Unparser.parse('x = 1') + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + expect(source_map.entries).not_to be_empty + end + end + + context 'with multiple statements' do + it 'maps the root begin node' do + node = Unparser.parse("x = 1\ny = 2") + source, source_map = described_class.unparse_with_source_map(node) + + # The root begin node entry comes from emit_ast, not visit_deep + root_entries = source_map.for_node(node) + expect(root_entries).not_to be_empty + expect(root_entries.first.node).to equal(node) + end + + it 'maps each statement node' do + node = Unparser.parse("x = 1\ny = 2") + source, source_map = described_class.unparse_with_source_map(node) + + expect(source).to eql(Unparser.unparse(node)) + + first_child = node.children.first + second_child = node.children.last + + first_entries = source_map.for_node(first_child) + second_entries = source_map.for_node(second_child) + + expect(first_entries).not_to be_empty + expect(second_entries).not_to be_empty + + expect(first_entries.any? { |e| source[e.generated_range].include?('x = 1') }).to be(true) + expect(second_entries.any? { |e| source[e.generated_range].include?('y = 2') }).to be(true) + end + end + + context 'with explicit_encoding' do + it 'forwards encoding and affects output' do + node = Unparser.parse('"hello"') + source_without, = described_class.unparse_with_source_map(node) + source_with, source_map = described_class.unparse_with_source_map( + node, + explicit_encoding: Encoding::BINARY + ) + + expect(source_with).not_to eql(source_without) + expect(source_map.entries).not_to be_empty + end + end + + context 'generates the same source as unparse' do + %w[ + 42 + :foo + "hello" + x\ =\ 1 + def\ foo;\ end + class\ Foo;\ end + module\ Bar;\ end + ].each do |code| + it "for #{code.inspect}" do + node = Unparser.parse(code) + source, _source_map = described_class.unparse_with_source_map(node) + expect(source).to eql(Unparser.unparse(node)) + end + end + end +end