diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
index a6d76fdc..25b96493 100644
--- a/lib/rexml/parsers/xpathparser.rb
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -655,6 +655,10 @@ def PrimaryExpr path, parsed
n = []
OrExpr( contents, n )
parsed.concat(n)
+ # For xpath like `(/path[predicate1][predicate2])[predicate3][predicate4]`,
+ # add a separator mark to distinguish predicates of the inner parentheses and the outer parentheses.
+ type = n[0]
+ parsed.push(:self, :node) if type == :document || type == :child || type == :union
end
path
end
diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index c5d420ce..a8e6f09a 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -199,28 +199,24 @@ def expr( path_stack, nodeset, context=nil )
nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
when :self
nodeset = step(path_stack) do
- [nodeset]
+ [:iterate_raw_nodesets, [nodeset.map(&:raw_node)]]
end
when :child
nodeset = step(path_stack) do
- child(nodeset)
+ [:iterate_raw_nodesets, child(nodeset)]
end
when :literal
trace(:literal, path_stack, nodeset) if @debug
return path_stack.shift
when :attribute
nodeset = step(path_stack, any_type: :attribute) do
- nodesets = []
- nodeset.each do |node|
+ raw_nodesets = nodeset.map do |node|
raw_node = node.raw_node
next unless raw_node.node_type == :element
attributes = raw_node.attributes
- next if attributes.empty?
- nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
- XPathNode.new(attribute, position: i + 1)
- end
- end
- nodesets
+ attributes.each_attribute.to_a unless attributes.empty?
+ end.compact
+ [:iterate_raw_nodesets, raw_nodesets]
end
when :namespace
pre_defined_namespaces = {
@@ -245,11 +241,13 @@ def expr( path_stack, nodeset, context=nil )
end
end
end
- nodesets
+ # Not working at all, so just return an empty nodesets for now.
+ # Needs Namespace-node class
+ [:iterate_raw_nodesets, []]
end
when :parent
nodeset = step(path_stack) do
- nodesets = []
+ parents = {}.compare_by_identity
nodeset.each do |node|
raw_node = node.raw_node
if raw_node.node_type == :attribute
@@ -257,101 +255,17 @@ def expr( path_stack, nodeset, context=nil )
else
parent = raw_node.parent
end
- nodesets << [XPathNode.new(parent, position: 1)] if parent
- end
- nodesets
- end
- when :ancestor
- nodeset = step(path_stack) do
- nodesets = []
- # new_nodes = {}
- nodeset.each do |node|
- raw_node = node.raw_node
- new_nodeset = []
- while raw_node.parent
- raw_node = raw_node.parent
- # next if new_nodes.key?(node)
- new_nodeset << XPathNode.new(raw_node,
- position: new_nodeset.size + 1)
- # new_nodes[node] = true
- end
- nodesets << new_nodeset unless new_nodeset.empty?
- end
- nodesets
- end
- when :ancestor_or_self
- nodeset = step(path_stack) do
- nodesets = []
- # new_nodes = {}
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.node_type == :element
- new_nodeset = [XPathNode.new(raw_node, position: 1)]
- # new_nodes[node] = true
- while raw_node.parent
- raw_node = raw_node.parent
- # next if new_nodes.key?(node)
- new_nodeset << XPathNode.new(raw_node,
- position: new_nodeset.size + 1)
- # new_nodes[node] = true
- end
- nodesets << new_nodeset unless new_nodeset.empty?
+ parents[parent] = true if parent
end
- nodesets
+ [:iterate_raw_nodesets, parents.keys.map {|parent| [parent] }]
end
- when :descendant_or_self
+ when :descendant, :descendant_or_self, :following, :following_sibling
nodeset = step(path_stack) do
- descendant(nodeset, true)
+ [op, nodeset.map(&:raw_node)]
end
- when :descendant
- nodeset = step(path_stack) do
- descendant(nodeset, false)
- end
- when :following_sibling
- nodeset = step(path_stack) do
- nodesets = []
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.respond_to?(:parent)
- next if raw_node.parent.nil?
- all_siblings = raw_node.parent.children
- current_index = all_siblings.index(raw_node)
- following_siblings = all_siblings[(current_index + 1)..-1]
- next if following_siblings.empty?
- nodesets << following_siblings.collect.with_index do |sibling, i|
- XPathNode.new(sibling, position: i + 1)
- end
- end
- nodesets
- end
- when :preceding_sibling
- nodeset = step(path_stack, order: :reverse) do
- nodesets = []
- nodeset.each do |node|
- raw_node = node.raw_node
- next unless raw_node.respond_to?(:parent)
- next if raw_node.parent.nil?
- all_siblings = raw_node.parent.children
- current_index = all_siblings.index(raw_node)
- preceding_siblings = all_siblings[0, current_index].reverse
- next if preceding_siblings.empty?
- nodesets << preceding_siblings.collect.with_index do |sibling, i|
- XPathNode.new(sibling, position: i + 1)
- end
- end
- nodesets
- end
- when :preceding
+ when :ancestor, :ancestor_or_self, :preceding, :preceding_sibling
nodeset = step(path_stack, order: :reverse) do
- unnode(nodeset) do |node|
- preceding(node)
- end
- end
- when :following
- nodeset = step(path_stack) do
- unnode(nodeset) do |node|
- following(node)
- end
+ [op, nodeset.map(&:raw_node)]
end
when :variable
var_name = path_stack.shift
@@ -439,7 +353,6 @@ def expr( path_stack, nodeset, context=nil )
end
Functions.context = target_context
return Functions.send(func_name, *args)
-
else
raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
end
@@ -449,138 +362,359 @@ def expr( path_stack, nodeset, context=nil )
leave(:expr, path_stack, nodeset) if @debug
end
+ # Determines if a predicate expression is dependent on the position of nodes.
+ # nil if the predicate is position-independent,
+ # :simple if the predicate is a simple position query that can be optimized in axis scanning
+ # :complex if the predicate is a complex query that might be dependent on the position of nodes
+ def position_dependency(predicate_expr)
+ # [number], [position()=number], [position() < number], [position() > number]
+ return :simple if position_operation(predicate_expr)
+
+ # expressions that return number.
+ return :complex if %i[div mod mult plus minus neg].include?(predicate_expr[0])
+ return :complex if predicate_expr[0] == :function && %w[number ceiling round floor string-length sum count].include?(predicate_expr[1])
+ # Numeric literal including Integer and Float: [2] means [position() = 2]
+ return :complex if predicate_expr[0] == :literal && Numeric === predicate_expr[1]
+ # A variable could resolve to a number at runtime: [$n] means [position() = $n].
+ return :complex if predicate_expr[0] == :variable
+
+ # expressions that contain position-dependent functions
+ return :complex if calls_position_dependent_function?(predicate_expr)
+ end
+
+ # Recursively checks if the expression contains position-dependent functions such as position() or last()
+ def calls_position_dependent_function?(expr)
+ return false unless Array === expr
+ return true if expr[0] == :function && (expr[1] == 'position' || expr[1] == 'last')
+ expr.any? {|part| calls_position_dependent_function?(part) }
+ end
+
+ # Detects simple position-based predicates that can be optimized in axis scanning, such as [1], [position()=1], [position() < 2], [position() > 3]
+ # Returns operators and values such as [:==, 1], [:<, 2], [:>, 3]
+ # Returns nil if the predicate is not a simple position-based predicate
+ def position_operation(predicate_expr)
+ return [:==, predicate_expr[1]] if predicate_expr[0] == :literal && predicate_expr[1].is_a?(Integer)
+
+ op, left, right = predicate_expr
+ return unless op == :eq || op == :lt || op == :lteq || op == :gt || op == :gteq
+ return unless [left, right].include?([:function, 'position', []])
+
+ literal = [left, right].find {|part| part[0] == :literal && part[1].is_a?(Integer) }
+ return unless literal
+
+ value = literal[1]
+ case op
+ when :eq
+ [:==, value]
+ when :lt
+ literal == right ? [:<, value] : [:>, value]
+ when :lteq
+ literal == right ? [:<, value + 1] : [:>, value - 1]
+ when :gt
+ literal == right ? [:>, value]: [:<, value]
+ when :gteq
+ literal == right ? [:>, value - 1] : [:<, value + 1]
+ end
+ end
+
+ # Pseudo scanner for axis scanning step that nodesets are already collected
+ def iterate_raw_nodesets(raw_nodesets, tester, selector)
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
+ # Scanner for ancestor-or-self axis
+ def ancestor_or_self(raw_nodes, tester, selector)
+ ancestor(raw_nodes, tester, selector, include_self: true)
+ end
+
+ # Scanner for preceding-sibling axis
+ def preceding_sibling(raw_nodes, tester, selector)
+ preceding_following_sibling(raw_nodes, tester, selector, reverse: true)
+ end
+
+ # Scanner for following-sibling axis
+ def following_sibling(raw_nodes, tester, selector)
+ preceding_following_sibling(raw_nodes, tester, selector, reverse: false)
+ end
+
+ def preceding_following_sibling(raw_nodes, tester, selector, reverse:)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent }
+ case selector
+ when :uniq
+ raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes|
+ sets = {}.compare_by_identity
+ sibling_nodes.each {|sibling| sets[sibling] = true }
+ children = parent.children
+ children = children.reverse if reverse
+ children.drop_while {|child| !sets.key?(child) }.drop(1)
+ end.select(&tester)
+ when :nodesets
+ raw_nodesets = raw_nodes.map do |raw_node|
+ parent = raw_node.parent
+ index = parent.children.index(raw_node)
+ reverse ? parent.children[0...index].reverse : parent.children[index + 1..-1]
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ else
+ operator, value = selector
+ raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes|
+ anchors = {}.compare_by_identity
+ sibling_nodes.each {|sibling| anchors[sibling] = true }
+ children = parent.children
+ children = children.reverse if reverse
+ followings = children.drop_while {|child| !anchors.key?(child) }.drop(1)
+ anchor_indexes = { 0 => true }
+ last_anchor = 0
+ index = 0
+ matched = []
+ followings.each do |node|
+ if tester.call(node)
+ case operator
+ when :==
+ matched << node if anchor_indexes.include?(index - value + 1)
+ when :<
+ matched << node if last_anchor > index - value + 1
+ when :>
+ matched << node if index >= value
+ end
+ index += 1
+ end
+ if anchors.key?(node)
+ anchor_indexes[index] = true
+ last_anchor = index
+ end
+ end
+ matched
+ end
+ end
+ end
+
+ # Scanner for ancestor axis
+ def ancestor(raw_nodes, tester, selector, include_self: false)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent }
+ case selector
+ when :uniq
+ ancestors = {}.compare_by_identity
+ raw_nodes.each do |raw_node|
+ ancestors[raw_node] = true if include_self
+ parent = raw_node.parent
+ while parent
+ break if ancestors.key?(parent)
+ ancestors[parent] = true
+ parent = parent.parent
+ end
+ end
+ ancestors.keys.select(&tester)
+ else
+ # Slow pass
+ raw_nodesets = raw_nodes.map do |raw_node|
+ ancestors = []
+ ancestors << raw_node if include_self
+ parent = raw_node.parent
+ while parent
+ ancestors << parent
+ parent = parent.parent
+ end
+ ancestors
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+ end
+
+ # Scanner fallback step for axis that is not optimized for position-based predicates.
+ def non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ nodesets = raw_nodesets.map do |nodeset|
+ nodeset.select(&tester).map.with_index(1) do |node, position|
+ XPathNode.new(node, position: position)
+ end
+ end.reject(&:empty?)
+ case selector
+ when :nodesets
+ nodesets
+ when :uniq
+ seen = {}.compare_by_identity
+ nodesets.flatten.each {|node| seen[node.raw_node] = true }
+ seen.keys
+ else
+ operator, value = selector
+ nodes = nodesets.flatten
+ nodes =
+ case operator
+ when :==
+ nodes.select {|node| node.position == value }
+ when :<
+ nodes.select {|node| node.position < value }
+ when :>
+ nodes.select {|node| node.position > value }
+ end
+ seen = {}.compare_by_identity
+ nodes.each {|node| seen[node.raw_node] = true }
+ seen.keys
+ end
+ end
+
+ # Split predicates into several groups based on their dependency on the position of nodes
+ # If there are no position-based predicates,
+ # return [position_independent_predicates, nil, [], nil]
+ # If there are only one simple position-based predicate,
+ # return [position_independent_predicates, position_operator, position_independent_predicates, nil]
+ # If there are multiple position-based predicates or complex position-based predicates,
+ # return [position_independent_predicates, nil, nil, complex_predicates]
+ def split_positional_predicates(predicates)
+ pre_independent = predicates.take_while {|predicate| position_dependency(predicate).nil? }
+ predicates = predicates.drop(pre_independent.size)
+ return [pre_independent, nil, [], nil] if predicates.empty?
+
+ op = position_operation(predicates.first)
+ if op && predicates[1..-1].all? {|predicate| position_dependency(predicate).nil? }
+ [pre_independent, op, predicates[1..-1], nil]
+ else
+ [pre_independent, nil, nil, predicates]
+ end
+ end
+
+ # Performs an axis scanning step.
+ # The caller provides a scanner method and its argument, which determines the axis to scan and the nodes to scan from:
+ # step(path_stack) { [scanner_method, scanner_argument] }
+ # Scanner method are called with `(scanner_argument, tester_block, selector)`
+ # selector is a flag for the scanner to determine how to return the scan result.
+ # It can be: `:uniq`, `:nodesets` or `[position_comparator, value]`.
+ # `:uniq` means the scanner should return unique nodes. Predicates are position-independent.
+ # `:nodesets` means the scanner should return nodesets. Predicates are complex position queries that can't be optimized in axis scanning.
+ # `[position_comparator, value]` means the scanner should return nodes matching the position comparator and value.
+ # Each scanner method can implement optimized scanning strategy for each selector.
+
def step(path_stack, any_type: :element, order: :forward)
- nodesets = yield
+ scanner, scanner_argument = yield
begin
- enter(:step, path_stack, nodesets) if @debug
- nodesets = node_test(path_stack, nodesets, any_type: any_type)
- while path_stack[0] == :predicate
- path_stack.shift # :predicate
- predicate_expression = path_stack.shift.dclone
- nodesets = evaluate_predicate(predicate_expression, nodesets)
- end
- if nodesets.size == 1
- ordered_nodeset = nodesets[0]
- else
- seen = {}.compare_by_identity
- raw_nodes = []
- nodesets.each do |nodeset|
- nodeset.each do |node|
- raw_node = node.respond_to?(:raw_node) ? node.raw_node : node
- next if seen.key?(raw_node)
- seen[raw_node] = true
- raw_nodes << raw_node
+ enter(:step, path_stack, scanner, scanner_argument) if @debug
+ tester = node_test(path_stack, any_type: any_type)
+ predicates = []
+ while path_stack.first == :predicate
+ path_stack.shift
+ predicates << path_stack.shift
+ end
+ pre_predicates, position_operator, post_predicates, complex_predicates = split_positional_predicates(predicates)
+
+ if pre_predicates.any?
+ original_tester = tester
+ tester = -> (raw_node) do
+ original_tester.call(raw_node) &&
+ pre_predicates.all? do |predicate_expr|
+ evaluate_predicate(predicate_expr.dclone, [[XPathNode.new(raw_node, position: 1)]]).flatten.size == 1
end
end
- ordered_nodeset = sort(raw_nodes, order)
end
- new_nodeset = []
- ordered_nodeset.each do |node|
- new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
+
+ if complex_predicates
+ nodesets = send(scanner, scanner_argument, tester, :nodesets)
+ elsif position_operator
+ nodeset = send(scanner, scanner_argument, tester, position_operator).map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
+ end
+ nodesets = [nodeset]
+ else
+ nodeset = send(scanner, scanner_argument, tester, :uniq).map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
+ end
+ nodesets = [nodeset]
+ end
+
+ (complex_predicates || post_predicates).each do |predicate_expr|
+ nodesets = evaluate_predicate(predicate_expr.dclone, nodesets)
+ end
+ seen = {}.compare_by_identity
+ nodesets.each do |nodeset|
+ nodeset.each do |node|
+ raw_node = node.respond_to?(:raw_node) ? node.raw_node : node
+ seen[raw_node] = true
+ end
+ end
+ ordered = sort(seen.keys, order)
+ ordered.map.with_index(1) do |raw_node, position|
+ XPathNode.new(raw_node, position: position)
end
- new_nodeset
ensure
- leave(:step, path_stack, new_nodeset) if @debug
+ leave(:step, path_stack, ordered) if @debug
end
end
- def node_test(path_stack, nodesets, any_type: :element)
- enter(:node_test, path_stack, nodesets) if @debug
+ def node_test(path_stack, any_type: :element)
+ enter(:node_test, path_stack) if @debug
operator = path_stack.shift
case operator
when :qname
prefix = path_stack.shift
name = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- case raw_node.node_type
- when :element
- if prefix.nil?
- raw_node.name == name
- elsif prefix.empty?
- if strict?
- raw_node.name == name and raw_node.namespace == ""
- else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
- end
- else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
- end
- when :attribute
- if prefix.nil?
- raw_node.name == name
- elsif prefix.empty?
+ ->(raw_node) do
+ case raw_node.node_type
+ when :element
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ if strict?
raw_node.name == name and raw_node.namespace == ""
else
- raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
end
else
- false
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix)
+ end
+ when :attribute
+ if prefix.nil?
+ raw_node.name == name
+ elsif prefix.empty?
+ raw_node.name == name and raw_node.namespace == ""
+ else
+ raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix)
end
+ else
+ false
end
end
when :namespace
prefix = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- case raw_node.node_type
- when :element
- namespaces = @namespaces || raw_node.namespaces
- raw_node.namespace == namespaces[prefix]
- when :attribute
- namespaces = @namespaces || raw_node.element.namespaces
- raw_node.namespace == namespaces[prefix]
- else
- false
- end
+ ->(raw_node) do
+ case raw_node.node_type
+ when :element
+ namespaces = @namespaces || raw_node.namespaces
+ raw_node.namespace == namespaces[prefix]
+ when :attribute
+ namespaces = @namespaces || raw_node.element.namespaces
+ raw_node.namespace == namespaces[prefix]
+ else
+ false
end
end
when :any
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == any_type
- end
+ ->(raw_node) do
+ raw_node.node_type == any_type
end
when :comment
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == :comment
- end
+ ->(raw_node) do
+ raw_node.node_type == :comment
end
when :text
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- raw_node.node_type == :text
- end
+ ->(raw_node) do
+ raw_node.node_type == :text
end
when :processing_instruction
target = path_stack.shift
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- raw_node = node.raw_node
- (raw_node.node_type == :processing_instruction) and
- (target.empty? or (raw_node.target == target))
- end
+ ->(raw_node) do
+ (raw_node.node_type == :processing_instruction) and
+ (target.empty? or (raw_node.target == target))
end
when :node
- new_nodesets = nodesets.collect do |nodeset|
- filter_nodeset(nodeset) do |node|
- true
- end
+ ->(raw_node) do
+ true
end
else
message = "[BUG] Unexpected node test: " +
"<#{operator.inspect}>: <#{path_stack.inspect}>"
raise message
end
- new_nodesets
ensure
- leave(:node_test, path_stack, new_nodesets) if @debug
+ leave(:node_test, path_stack) if @debug
end
def filter_nodeset(nodeset)
@@ -594,7 +728,6 @@ def filter_nodeset(nodeset)
def evaluate_predicate(expression, nodesets)
enter(:predicate, expression, nodesets) if @debug
- new_nodeset_count = 0
new_nodesets = nodesets.collect do |nodeset|
new_nodeset = []
subcontext = { :size => nodeset.size }
@@ -611,20 +744,17 @@ def evaluate_predicate(expression, nodesets)
result = result[0] if result.kind_of? Array and result.length == 1
if result.kind_of? Numeric
if result == node.position
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1)
end
elsif result.instance_of? Array
if result.size > 0 and result.inject(false) {|k,s| s or k}
if result.size > 0
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1)
end
end
else
if result
- new_nodeset_count += 1
- new_nodeset << XPathNode.new(node, position: new_nodeset_count)
+ new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1)
end
end
end
@@ -661,43 +791,65 @@ def leave(tag, *args)
# I wouldn't have to do this. Maybe add a document IDX for each node?
# Problems with mutable documents. Or, rewrite everything.
def sort(array_of_nodes, order)
- new_arry = []
- array_of_nodes.each { |node|
+ return array_of_nodes if array_of_nodes.size <= 1
+
+ keyed = array_of_nodes.map do |node|
node_idx = []
np = node.node_type == :attribute ? node.element : node
while np.parent and np.parent.node_type == :element
- node_idx << np.parent.index( np )
+ node_idx << np.parent.index(np)
np = np.parent
end
- new_arry << [ node_idx.reverse, node ]
- }
- ordered = new_arry.sort_by do |index, node|
- if order == :forward
- index
- else
- index.map(&:-@)
- end
- end
- ordered.collect do |_index, node|
- node
+ [node_idx.reverse, node]
end
+ ordered = keyed.sort_by(&:first)
+ ordered.reverse! if order == :reverse
+ ordered.map { |_index, node| node }
end
- def descendant(nodeset, include_self)
- nodesets = []
- nodeset.each do |node|
- new_nodeset = []
- new_nodes = {}
- descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
- nodesets << new_nodeset unless new_nodeset.empty?
+ # Scanner for descendant-or-self axis
+ def descendant_or_self(raw_nodes, tester, selector)
+ descendant(raw_nodes, tester, selector, include_self: true)
+ end
+
+ # Scanner for descendant axis
+ def descendant(raw_nodes, tester, selector, include_self: false)
+ raw_nodes = raw_nodes.select {|node| node.respond_to?(:children) }
+ case selector
+ when :uniq
+ seen = {}.compare_by_identity
+ recursive = ->(raw_node) do
+ node_type = raw_node.node_type
+ return if seen[raw_node]
+ seen[raw_node] = true
+ return unless node_type == :element || node_type == :document
+ raw_node.children.each do |child|
+ recursive.call(child)
+ end
+ end
+ raw_nodes.each do |raw_node|
+ if include_self
+ recursive.call(raw_node)
+ else
+ raw_node.children.each(&recursive)
+ end
+ end
+ seen.keys.select(&tester)
+ else
+ raw_nodesets = raw_nodes.map do |raw_node|
+ new_nodeset = []
+ new_nodes = {}
+ descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
+ new_nodeset
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
end
- nodesets
end
def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
if include_self
return if new_nodes.key?(raw_node)
- new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
+ new_nodeset << raw_node
new_nodes[raw_node] = true
end
@@ -709,11 +861,17 @@ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
end
end
+ # Scanner for preceding axis
+ def preceding(raw_nodes, tester, selector)
+ raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map {|raw_node| preceding_nodes(raw_node) }
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
# Builds a nodeset of all of the preceding nodes of the supplied node,
# in reverse document order
# preceding:: includes every element in the document that precedes this node,
# except for ancestors
- def preceding(node)
+ def preceding_nodes(node)
ancestors = []
parent = node.parent
while parent
@@ -727,8 +885,7 @@ def preceding(node)
if ancestors.include?(preceding_node)
ancestors.delete(preceding_node)
else
- precedings << XPathNode.new(preceding_node,
- position: precedings.size + 1)
+ precedings << preceding_node
end
preceding_node = preceding_node_of(preceding_node)
end
@@ -750,12 +907,19 @@ def preceding_node_of( node )
psn
end
- def following(node)
+ # Scanner for following axis
+ def following(raw_nodes, tester, selector)
+ raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map do |raw_node|
+ following_nodes(raw_node)
+ end
+ non_optimized_raw_nodesets_select(raw_nodesets, tester, selector)
+ end
+
+ def following_nodes(node)
followings = []
following_node = next_sibling_node(node)
while following_node
- followings << XPathNode.new(following_node,
- position: followings.size + 1)
+ followings << following_node
following_node = following_node_of(following_node)
end
followings
@@ -778,30 +942,19 @@ def next_sibling_node(node)
end
def child(nodeset)
- nodesets = []
- nodeset.each do |node|
+ nodeset.map do |node|
raw_node = node.raw_node
node_type = raw_node.node_type
# trace(:child, node_type, node)
case node_type
when :element
- nodesets << raw_node.children.collect.with_index do |child_node, i|
- XPathNode.new(child_node, position: i + 1)
- end
+ raw_node.children
when :document
- new_nodeset = []
- raw_node.children.each do |child|
- case child
- when XMLDecl, Text
- # Ignore
- else
- new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
- end
+ raw_node.children.reject do |child|
+ XMLDecl === child || Text === child
end
- nodesets << new_nodeset unless new_nodeset.empty?
end
- end
- nodesets
+ end.compact
end
def norm b
diff --git a/test/xpath/test_attribute.rb b/test/xpath/test_attribute.rb
index b778ff81..458fda9f 100644
--- a/test/xpath/test_attribute.rb
+++ b/test/xpath/test_attribute.rb
@@ -32,5 +32,18 @@ def test_no_namespace
"nothing" => "")
assert_equal(["child2"], children.collect(&:text))
end
+
+ def test_no_error
+ REXML::XPath.match(@document, '//attribute::*/parent::*')
+ # Some of those are not yet supported in REXML, but at least it shouldn't raise an error
+ REXML::XPath.match(@document, '//attribute::*/preceding::*')
+ REXML::XPath.match(@document, '//attribute::*/following::*')
+ REXML::XPath.match(@document, '//attribute::*/preceding-sibling::*')
+ REXML::XPath.match(@document, '//attribute::*/following-sibling::*')
+ REXML::XPath.match(@document, '//attribute::*/ancestor::*')
+ REXML::XPath.match(@document, '//attribute::*/descendant::*')
+ REXML::XPath.match(@document, '//attribute::*/ancestor-or-self::*')
+ REXML::XPath.match(@document, '//attribute::*/descendant-or-self::*')
+ end
end
end
diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb
index 9c44ad63..d71a17d6 100644
--- a/test/xpath/test_axis_preceding_sibling.rb
+++ b/test/xpath/test_axis_preceding_sibling.rb
@@ -34,5 +34,21 @@ def test_preceding_sibling_axis
prev = XPath.first(context, "preceding-sibling::f[3]")
assert_equal "3", prev.attributes["id"]
end
+
+ def test_preceding_sibling_position_less_than
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal([], XPath.match(context, "preceding-sibling::f[position() < 1]"))
+ assert_equal(["5"],
+ XPath.match(context, "preceding-sibling::f[position() < 2]").map {|n| n.attributes["id"] })
+ assert_equal(["5", "4"],
+ XPath.match(context, "preceding-sibling::f[position() < 3]").map {|n| n.attributes["id"] })
+ end
+
+ def test_preceding_sibling_position_less_than_or_equal
+ context = XPath.first(@@doc, "/a/e/f[last()]")
+ assert_equal([], XPath.match(context, "preceding-sibling::f[position() <= 0]"))
+ assert_equal(["5", "4"],
+ XPath.match(context, "preceding-sibling::f[position() <= 2]").map {|n| n.attributes["id"] })
+ end
end
end
diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb
index 1c6eb624..f51ab04f 100644
--- a/test/xpath/test_base.rb
+++ b/test/xpath/test_base.rb
@@ -485,6 +485,14 @@ def test_following_sibling_predicates
assert_equal(["w", "x", "y", "z"], matches.map(&:name))
end
+ def test_following_sibling_position_less_than
+ source = ""
+ doc = REXML::Document.new(source)
+ assert_equal([], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 1]"))
+ assert_equal(["b"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 2]").map(&:name))
+ assert_equal(["b", "c"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 3]").map(&:name))
+ end
+
def test_preceding_sibling_across_multiple_nodes
source = <<-XML
@@ -575,14 +583,22 @@ def test_nested_predicates
matches = XPath.match(doc, '(/div/div/test[3])').map(&:text)
assert_equal [], matches
+ matches = XPath.match(doc, '/div/div/test[1][1]').map(&:text)
+ assert_equal ["ab", "ef", "hi"], matches
matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text)
assert_equal ["ab"], matches
+ matches = XPath.match(doc, '/div/div/test[1][2]').map(&:text)
+ assert_equal [], matches
matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text)
assert_equal ["ef"], matches
matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text)
assert_equal ["hi"], matches
+ matches = XPath.match(doc, '/div/div/test[2][1]').map(&:text)
+ assert_equal ["cd", "gh"], matches
matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text)
assert_equal ["cd"], matches
+ matches = XPath.match(doc, '/div/div/test[2][2]').map(&:text)
+ assert_equal [], matches
matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text)
assert_equal ["gh"], matches
matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text)
diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb
index 278e3765..f25bd758 100644
--- a/test/xpath/test_predicate.rb
+++ b/test/xpath/test_predicate.rb
@@ -59,12 +59,50 @@ def test_predicates_multi
assert_equal( "1", m[0].attributes["id"] )
end
+ def test_predicate_multi_position
+ xml = <<~XML
+
+
+
+
+ XML
+ doc = REXML::Document.new(xml)
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1]")
+ assert_equal(%w[2 3 4 5 7 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1]")
+ assert_equal(%w[3 4 5 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3']")
+ assert_equal(%w[4 5 8 9 10], result.map { |node| node.attributes["id"] })
+
+ result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3'][position()!=2]")
+ assert_equal(%w[4 8 10], result.map { |node| node.attributes["id"] })
+ end
+
def do_path( path )
m = REXML::XPath.match( @doc, path )
#puts path, @parser.parse( path ).inspect
return m
end
+ def test_predicate_float_literal
+ doc = REXML::Document.new("")
+ # [N.0] is equivalent to [position() = N.0] = [position() = N]
+ assert_equal(["a"], REXML::XPath.match(doc, "/r/*[1.0]").map(&:name))
+ assert_equal(["b"], REXML::XPath.match(doc, "/r/*[2.0]").map(&:name))
+ # Non-integer numeric literals match no node.
+ assert_equal([], REXML::XPath.match(doc, "/r/*[1.5]"))
+ end
+
+ def test_predicate_variable_as_position
+ doc = REXML::Document.new("")
+ parser = REXML::XPathParser.new
+ parser["x"] = 2
+ assert_equal(["b"], parser.parse("/r/*[$x]", doc).map(&:name))
+ end
+
def test_get_no_siblings_terminal_nodes
source = <<-XML