Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmark/xpath.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ contexts:
prelude: |
require 'rexml/document'

DEPTH = 30
DEPTH = 40
xml = '<a>' * DEPTH + '</a>' * DEPTH
doc = REXML::Document.new(xml)

WIDTH = 200
WIDTH = 1000
xml_wide = '<root>' + '<child/>' * WIDTH + '</root>'
doc_wide = REXML::Document.new(xml_wide)
first_child = doc_wide.root.children.first
Expand Down
63 changes: 38 additions & 25 deletions lib/rexml/xpath_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -652,35 +652,48 @@ def leave(tag, *args)
trace(:leave, tag, *args)
end

# Reorders an array of nodes so that they are in document order
# It tries to do this efficiently.
#
# FIXME: I need to get rid of this, but the issue is that most of the XPath
# interpreter functions as a filter, which means that we lose context going
# in and out of function calls. If I knew what the index of the nodes was,
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
# Reorders an array of nodes so that they are in document order.
# Assigns each node in the relevant subtree(s) an integer document-order
# position via a single depth-first search (DFS) pass, then sorts the
# input by those positions.
def sort(array_of_nodes, order)
new_arry = []
array_of_nodes.each { |node|
node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
node_idx << np.parent.index( np )
np = np.parent
return array_of_nodes if array_of_nodes.size <= 1

positions = document_order_positions(array_of_nodes)
if order == :forward
array_of_nodes.sort_by { |node| positions[sort_anchor(node)] }
else
array_of_nodes.sort_by { |node| -positions[sort_anchor(node)] }
end
end

def sort_anchor(node)
node.node_type == :attribute ? node.element : node
end

def document_order_positions(nodes)
positions = {}.compare_by_identity
visited_roots = {}.compare_by_identity
counter = 0
nodes.each do |node|
anchor = sort_anchor(node)
root = anchor
while (parent = root.parent)
root = parent
end
new_arry << [ node_idx.reverse, node ]
}
ordered = new_arry.sort_by do |index, node|
if order == :forward
index
else
index.map(&:-@)
next if visited_roots.key?(root)
visited_roots[root] = true
Comment on lines +678 to +685
stack = [root]
until stack.empty?
current = stack.pop
positions[current] = (counter += 1)
type = current.node_type
if type == :element or type == :document
current.children.reverse_each { |child| stack.push(child) }
end
end
end
ordered.collect do |_index, node|
node
end
positions
end

def descendant(nodeset, include_self)
Expand Down
29 changes: 29 additions & 0 deletions test/xpath/test_base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,35 @@ def test_descendant_or_self_ordering
1.upto(4) {|x| assert_equal( x.to_s, cs[x-1].attributes['id'] ) }
end

def test_attribute_axis_document_order_across_elements
# When the attribute axis is applied to multiple input elements, the
# resulting attributes must be ordered by the document position of their
# owning elements.
doc = Document.new('<root><a id="1"/><a id="2"/><a id="3"/></root>')
attrs = XPath.match(doc, "//a/@id")
assert_equal(["1", "2", "3"], attrs.map(&:value))
end

def test_mixed_text_and_element_children_document_order
# When sort merges per-input nodesets containing mixed text and element
# children, document order must be preserved across the boundary.
source = <<-XML
<root>
<a>t0<b id="0"/>u0</a>
<a>t1<b id="1"/>u1</a>
</root>
XML
doc = Document.new(source)
nodes = XPath.match(doc, "//a/node()")
values = nodes.map do |node|
case node
when Element then "b##{node.attributes['id']}"
else node.value
end
end
assert_equal(["t0", "b#0", "u0", "t1", "b#1", "u1"], values)
end

def test_and
d = Document.new %q{<doc><route run='*' title='HNO'
destination='debian_production1' date='*' edition='*'
Expand Down
Loading