diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index a6d76fdc..25b96493 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -655,6 +655,10 @@ def PrimaryExpr path, parsed n = [] OrExpr( contents, n ) parsed.concat(n) + # For xpath like `(/path[predicate1][predicate2])[predicate3][predicate4]`, + # add a separator mark to distinguish predicates of the inner parentheses and the outer parentheses. + type = n[0] + parsed.push(:self, :node) if type == :document || type == :child || type == :union end path end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index c5d420ce..a8e6f09a 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -199,28 +199,24 @@ def expr( path_stack, nodeset, context=nil ) nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)] when :self nodeset = step(path_stack) do - [nodeset] + [:iterate_raw_nodesets, [nodeset.map(&:raw_node)]] end when :child nodeset = step(path_stack) do - child(nodeset) + [:iterate_raw_nodesets, child(nodeset)] end when :literal trace(:literal, path_stack, nodeset) if @debug return path_stack.shift when :attribute nodeset = step(path_stack, any_type: :attribute) do - nodesets = [] - nodeset.each do |node| + raw_nodesets = nodeset.map do |node| raw_node = node.raw_node next unless raw_node.node_type == :element attributes = raw_node.attributes - next if attributes.empty? - nodesets << attributes.each_attribute.collect.with_index do |attribute, i| - XPathNode.new(attribute, position: i + 1) - end - end - nodesets + attributes.each_attribute.to_a unless attributes.empty? + end.compact + [:iterate_raw_nodesets, raw_nodesets] end when :namespace pre_defined_namespaces = { @@ -245,11 +241,13 @@ def expr( path_stack, nodeset, context=nil ) end end end - nodesets + # Not working at all, so just return an empty nodesets for now. + # Needs Namespace-node class + [:iterate_raw_nodesets, []] end when :parent nodeset = step(path_stack) do - nodesets = [] + parents = {}.compare_by_identity nodeset.each do |node| raw_node = node.raw_node if raw_node.node_type == :attribute @@ -257,101 +255,17 @@ def expr( path_stack, nodeset, context=nil ) else parent = raw_node.parent end - nodesets << [XPathNode.new(parent, position: 1)] if parent - end - nodesets - end - when :ancestor - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - new_nodeset = [] - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? - end - nodesets - end - when :ancestor_or_self - nodeset = step(path_stack) do - nodesets = [] - # new_nodes = {} - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.node_type == :element - new_nodeset = [XPathNode.new(raw_node, position: 1)] - # new_nodes[node] = true - while raw_node.parent - raw_node = raw_node.parent - # next if new_nodes.key?(node) - new_nodeset << XPathNode.new(raw_node, - position: new_nodeset.size + 1) - # new_nodes[node] = true - end - nodesets << new_nodeset unless new_nodeset.empty? + parents[parent] = true if parent end - nodesets + [:iterate_raw_nodesets, parents.keys.map {|parent| [parent] }] end - when :descendant_or_self + when :descendant, :descendant_or_self, :following, :following_sibling nodeset = step(path_stack) do - descendant(nodeset, true) + [op, nodeset.map(&:raw_node)] end - when :descendant - nodeset = step(path_stack) do - descendant(nodeset, false) - end - when :following_sibling - nodeset = step(path_stack) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - following_siblings = all_siblings[(current_index + 1)..-1] - next if following_siblings.empty? - nodesets << following_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) - end - end - nodesets - end - when :preceding_sibling - nodeset = step(path_stack, order: :reverse) do - nodesets = [] - nodeset.each do |node| - raw_node = node.raw_node - next unless raw_node.respond_to?(:parent) - next if raw_node.parent.nil? - all_siblings = raw_node.parent.children - current_index = all_siblings.index(raw_node) - preceding_siblings = all_siblings[0, current_index].reverse - next if preceding_siblings.empty? - nodesets << preceding_siblings.collect.with_index do |sibling, i| - XPathNode.new(sibling, position: i + 1) - end - end - nodesets - end - when :preceding + when :ancestor, :ancestor_or_self, :preceding, :preceding_sibling nodeset = step(path_stack, order: :reverse) do - unnode(nodeset) do |node| - preceding(node) - end - end - when :following - nodeset = step(path_stack) do - unnode(nodeset) do |node| - following(node) - end + [op, nodeset.map(&:raw_node)] end when :variable var_name = path_stack.shift @@ -439,7 +353,6 @@ def expr( path_stack, nodeset, context=nil ) end Functions.context = target_context return Functions.send(func_name, *args) - else raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>" end @@ -449,138 +362,359 @@ def expr( path_stack, nodeset, context=nil ) leave(:expr, path_stack, nodeset) if @debug end + # Determines if a predicate expression is dependent on the position of nodes. + # nil if the predicate is position-independent, + # :simple if the predicate is a simple position query that can be optimized in axis scanning + # :complex if the predicate is a complex query that might be dependent on the position of nodes + def position_dependency(predicate_expr) + # [number], [position()=number], [position() < number], [position() > number] + return :simple if position_operation(predicate_expr) + + # expressions that return number. + return :complex if %i[div mod mult plus minus neg].include?(predicate_expr[0]) + return :complex if predicate_expr[0] == :function && %w[number ceiling round floor string-length sum count].include?(predicate_expr[1]) + # Numeric literal including Integer and Float: [2] means [position() = 2] + return :complex if predicate_expr[0] == :literal && Numeric === predicate_expr[1] + # A variable could resolve to a number at runtime: [$n] means [position() = $n]. + return :complex if predicate_expr[0] == :variable + + # expressions that contain position-dependent functions + return :complex if calls_position_dependent_function?(predicate_expr) + end + + # Recursively checks if the expression contains position-dependent functions such as position() or last() + def calls_position_dependent_function?(expr) + return false unless Array === expr + return true if expr[0] == :function && (expr[1] == 'position' || expr[1] == 'last') + expr.any? {|part| calls_position_dependent_function?(part) } + end + + # Detects simple position-based predicates that can be optimized in axis scanning, such as [1], [position()=1], [position() < 2], [position() > 3] + # Returns operators and values such as [:==, 1], [:<, 2], [:>, 3] + # Returns nil if the predicate is not a simple position-based predicate + def position_operation(predicate_expr) + return [:==, predicate_expr[1]] if predicate_expr[0] == :literal && predicate_expr[1].is_a?(Integer) + + op, left, right = predicate_expr + return unless op == :eq || op == :lt || op == :lteq || op == :gt || op == :gteq + return unless [left, right].include?([:function, 'position', []]) + + literal = [left, right].find {|part| part[0] == :literal && part[1].is_a?(Integer) } + return unless literal + + value = literal[1] + case op + when :eq + [:==, value] + when :lt + literal == right ? [:<, value] : [:>, value] + when :lteq + literal == right ? [:<, value + 1] : [:>, value - 1] + when :gt + literal == right ? [:>, value]: [:<, value] + when :gteq + literal == right ? [:>, value - 1] : [:<, value + 1] + end + end + + # Pseudo scanner for axis scanning step that nodesets are already collected + def iterate_raw_nodesets(raw_nodesets, tester, selector) + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + + # Scanner for ancestor-or-self axis + def ancestor_or_self(raw_nodes, tester, selector) + ancestor(raw_nodes, tester, selector, include_self: true) + end + + # Scanner for preceding-sibling axis + def preceding_sibling(raw_nodes, tester, selector) + preceding_following_sibling(raw_nodes, tester, selector, reverse: true) + end + + # Scanner for following-sibling axis + def following_sibling(raw_nodes, tester, selector) + preceding_following_sibling(raw_nodes, tester, selector, reverse: false) + end + + def preceding_following_sibling(raw_nodes, tester, selector, reverse:) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes| + sets = {}.compare_by_identity + sibling_nodes.each {|sibling| sets[sibling] = true } + children = parent.children + children = children.reverse if reverse + children.drop_while {|child| !sets.key?(child) }.drop(1) + end.select(&tester) + when :nodesets + raw_nodesets = raw_nodes.map do |raw_node| + parent = raw_node.parent + index = parent.children.index(raw_node) + reverse ? parent.children[0...index].reverse : parent.children[index + 1..-1] + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + else + operator, value = selector + raw_nodes.group_by(&:parent).flat_map do |parent, sibling_nodes| + anchors = {}.compare_by_identity + sibling_nodes.each {|sibling| anchors[sibling] = true } + children = parent.children + children = children.reverse if reverse + followings = children.drop_while {|child| !anchors.key?(child) }.drop(1) + anchor_indexes = { 0 => true } + last_anchor = 0 + index = 0 + matched = [] + followings.each do |node| + if tester.call(node) + case operator + when :== + matched << node if anchor_indexes.include?(index - value + 1) + when :< + matched << node if last_anchor > index - value + 1 + when :> + matched << node if index >= value + end + index += 1 + end + if anchors.key?(node) + anchor_indexes[index] = true + last_anchor = index + end + end + matched + end + end + end + + # Scanner for ancestor axis + def ancestor(raw_nodes, tester, selector, include_self: false) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:parent) && node.parent } + case selector + when :uniq + ancestors = {}.compare_by_identity + raw_nodes.each do |raw_node| + ancestors[raw_node] = true if include_self + parent = raw_node.parent + while parent + break if ancestors.key?(parent) + ancestors[parent] = true + parent = parent.parent + end + end + ancestors.keys.select(&tester) + else + # Slow pass + raw_nodesets = raw_nodes.map do |raw_node| + ancestors = [] + ancestors << raw_node if include_self + parent = raw_node.parent + while parent + ancestors << parent + parent = parent.parent + end + ancestors + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + end + + # Scanner fallback step for axis that is not optimized for position-based predicates. + def non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + nodesets = raw_nodesets.map do |nodeset| + nodeset.select(&tester).map.with_index(1) do |node, position| + XPathNode.new(node, position: position) + end + end.reject(&:empty?) + case selector + when :nodesets + nodesets + when :uniq + seen = {}.compare_by_identity + nodesets.flatten.each {|node| seen[node.raw_node] = true } + seen.keys + else + operator, value = selector + nodes = nodesets.flatten + nodes = + case operator + when :== + nodes.select {|node| node.position == value } + when :< + nodes.select {|node| node.position < value } + when :> + nodes.select {|node| node.position > value } + end + seen = {}.compare_by_identity + nodes.each {|node| seen[node.raw_node] = true } + seen.keys + end + end + + # Split predicates into several groups based on their dependency on the position of nodes + # If there are no position-based predicates, + # return [position_independent_predicates, nil, [], nil] + # If there are only one simple position-based predicate, + # return [position_independent_predicates, position_operator, position_independent_predicates, nil] + # If there are multiple position-based predicates or complex position-based predicates, + # return [position_independent_predicates, nil, nil, complex_predicates] + def split_positional_predicates(predicates) + pre_independent = predicates.take_while {|predicate| position_dependency(predicate).nil? } + predicates = predicates.drop(pre_independent.size) + return [pre_independent, nil, [], nil] if predicates.empty? + + op = position_operation(predicates.first) + if op && predicates[1..-1].all? {|predicate| position_dependency(predicate).nil? } + [pre_independent, op, predicates[1..-1], nil] + else + [pre_independent, nil, nil, predicates] + end + end + + # Performs an axis scanning step. + # The caller provides a scanner method and its argument, which determines the axis to scan and the nodes to scan from: + # step(path_stack) { [scanner_method, scanner_argument] } + # Scanner method are called with `(scanner_argument, tester_block, selector)` + # selector is a flag for the scanner to determine how to return the scan result. + # It can be: `:uniq`, `:nodesets` or `[position_comparator, value]`. + # `:uniq` means the scanner should return unique nodes. Predicates are position-independent. + # `:nodesets` means the scanner should return nodesets. Predicates are complex position queries that can't be optimized in axis scanning. + # `[position_comparator, value]` means the scanner should return nodes matching the position comparator and value. + # Each scanner method can implement optimized scanning strategy for each selector. + def step(path_stack, any_type: :element, order: :forward) - nodesets = yield + scanner, scanner_argument = yield begin - enter(:step, path_stack, nodesets) if @debug - nodesets = node_test(path_stack, nodesets, any_type: any_type) - while path_stack[0] == :predicate - path_stack.shift # :predicate - predicate_expression = path_stack.shift.dclone - nodesets = evaluate_predicate(predicate_expression, nodesets) - end - if nodesets.size == 1 - ordered_nodeset = nodesets[0] - else - seen = {}.compare_by_identity - raw_nodes = [] - nodesets.each do |nodeset| - nodeset.each do |node| - raw_node = node.respond_to?(:raw_node) ? node.raw_node : node - next if seen.key?(raw_node) - seen[raw_node] = true - raw_nodes << raw_node + enter(:step, path_stack, scanner, scanner_argument) if @debug + tester = node_test(path_stack, any_type: any_type) + predicates = [] + while path_stack.first == :predicate + path_stack.shift + predicates << path_stack.shift + end + pre_predicates, position_operator, post_predicates, complex_predicates = split_positional_predicates(predicates) + + if pre_predicates.any? + original_tester = tester + tester = -> (raw_node) do + original_tester.call(raw_node) && + pre_predicates.all? do |predicate_expr| + evaluate_predicate(predicate_expr.dclone, [[XPathNode.new(raw_node, position: 1)]]).flatten.size == 1 end end - ordered_nodeset = sort(raw_nodes, order) end - new_nodeset = [] - ordered_nodeset.each do |node| - new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1) + + if complex_predicates + nodesets = send(scanner, scanner_argument, tester, :nodesets) + elsif position_operator + nodeset = send(scanner, scanner_argument, tester, position_operator).map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) + end + nodesets = [nodeset] + else + nodeset = send(scanner, scanner_argument, tester, :uniq).map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) + end + nodesets = [nodeset] + end + + (complex_predicates || post_predicates).each do |predicate_expr| + nodesets = evaluate_predicate(predicate_expr.dclone, nodesets) + end + seen = {}.compare_by_identity + nodesets.each do |nodeset| + nodeset.each do |node| + raw_node = node.respond_to?(:raw_node) ? node.raw_node : node + seen[raw_node] = true + end + end + ordered = sort(seen.keys, order) + ordered.map.with_index(1) do |raw_node, position| + XPathNode.new(raw_node, position: position) end - new_nodeset ensure - leave(:step, path_stack, new_nodeset) if @debug + leave(:step, path_stack, ordered) if @debug end end - def node_test(path_stack, nodesets, any_type: :element) - enter(:node_test, path_stack, nodesets) if @debug + def node_test(path_stack, any_type: :element) + enter(:node_test, path_stack) if @debug operator = path_stack.shift case operator when :qname prefix = path_stack.shift name = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - if prefix.nil? - raw_node.name == name - elsif prefix.empty? - if strict? - raw_node.name == name and raw_node.namespace == "" - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) - end - when :attribute - if prefix.nil? - raw_node.name == name - elsif prefix.empty? + ->(raw_node) do + case raw_node.node_type + when :element + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + if strict? raw_node.name == name and raw_node.namespace == "" else - raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) end else - false + raw_node.name == name and raw_node.namespace == get_namespace(raw_node, prefix) + end + when :attribute + if prefix.nil? + raw_node.name == name + elsif prefix.empty? + raw_node.name == name and raw_node.namespace == "" + else + raw_node.name == name and raw_node.namespace == get_namespace(raw_node.element, prefix) end + else + false end end when :namespace prefix = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - case raw_node.node_type - when :element - namespaces = @namespaces || raw_node.namespaces - raw_node.namespace == namespaces[prefix] - when :attribute - namespaces = @namespaces || raw_node.element.namespaces - raw_node.namespace == namespaces[prefix] - else - false - end + ->(raw_node) do + case raw_node.node_type + when :element + namespaces = @namespaces || raw_node.namespaces + raw_node.namespace == namespaces[prefix] + when :attribute + namespaces = @namespaces || raw_node.element.namespaces + raw_node.namespace == namespaces[prefix] + else + false end end when :any - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == any_type - end + ->(raw_node) do + raw_node.node_type == any_type end when :comment - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :comment - end + ->(raw_node) do + raw_node.node_type == :comment end when :text - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - raw_node.node_type == :text - end + ->(raw_node) do + raw_node.node_type == :text end when :processing_instruction target = path_stack.shift - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - raw_node = node.raw_node - (raw_node.node_type == :processing_instruction) and - (target.empty? or (raw_node.target == target)) - end + ->(raw_node) do + (raw_node.node_type == :processing_instruction) and + (target.empty? or (raw_node.target == target)) end when :node - new_nodesets = nodesets.collect do |nodeset| - filter_nodeset(nodeset) do |node| - true - end + ->(raw_node) do + true end else message = "[BUG] Unexpected node test: " + "<#{operator.inspect}>: <#{path_stack.inspect}>" raise message end - new_nodesets ensure - leave(:node_test, path_stack, new_nodesets) if @debug + leave(:node_test, path_stack) if @debug end def filter_nodeset(nodeset) @@ -594,7 +728,6 @@ def filter_nodeset(nodeset) def evaluate_predicate(expression, nodesets) enter(:predicate, expression, nodesets) if @debug - new_nodeset_count = 0 new_nodesets = nodesets.collect do |nodeset| new_nodeset = [] subcontext = { :size => nodeset.size } @@ -611,20 +744,17 @@ def evaluate_predicate(expression, nodesets) result = result[0] if result.kind_of? Array and result.length == 1 if result.kind_of? Numeric if result == node.position - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1) end elsif result.instance_of? Array if result.size > 0 and result.inject(false) {|k,s| s or k} if result.size > 0 - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1) end end else if result - new_nodeset_count += 1 - new_nodeset << XPathNode.new(node, position: new_nodeset_count) + new_nodeset << XPathNode.new(node, position: new_nodeset.count + 1) end end end @@ -661,43 +791,65 @@ def leave(tag, *args) # I wouldn't have to do this. Maybe add a document IDX for each node? # Problems with mutable documents. Or, rewrite everything. def sort(array_of_nodes, order) - new_arry = [] - array_of_nodes.each { |node| + return array_of_nodes if array_of_nodes.size <= 1 + + keyed = array_of_nodes.map do |node| node_idx = [] np = node.node_type == :attribute ? node.element : node while np.parent and np.parent.node_type == :element - node_idx << np.parent.index( np ) + node_idx << np.parent.index(np) np = np.parent end - new_arry << [ node_idx.reverse, node ] - } - ordered = new_arry.sort_by do |index, node| - if order == :forward - index - else - index.map(&:-@) - end - end - ordered.collect do |_index, node| - node + [node_idx.reverse, node] end + ordered = keyed.sort_by(&:first) + ordered.reverse! if order == :reverse + ordered.map { |_index, node| node } end - def descendant(nodeset, include_self) - nodesets = [] - nodeset.each do |node| - new_nodeset = [] - new_nodes = {} - descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self) - nodesets << new_nodeset unless new_nodeset.empty? + # Scanner for descendant-or-self axis + def descendant_or_self(raw_nodes, tester, selector) + descendant(raw_nodes, tester, selector, include_self: true) + end + + # Scanner for descendant axis + def descendant(raw_nodes, tester, selector, include_self: false) + raw_nodes = raw_nodes.select {|node| node.respond_to?(:children) } + case selector + when :uniq + seen = {}.compare_by_identity + recursive = ->(raw_node) do + node_type = raw_node.node_type + return if seen[raw_node] + seen[raw_node] = true + return unless node_type == :element || node_type == :document + raw_node.children.each do |child| + recursive.call(child) + end + end + raw_nodes.each do |raw_node| + if include_self + recursive.call(raw_node) + else + raw_node.children.each(&recursive) + end + end + seen.keys.select(&tester) + else + raw_nodesets = raw_nodes.map do |raw_node| + new_nodeset = [] + new_nodes = {} + descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) + new_nodeset + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) end - nodesets end def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) if include_self return if new_nodes.key?(raw_node) - new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1) + new_nodeset << raw_node new_nodes[raw_node] = true end @@ -709,11 +861,17 @@ def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self) end end + # Scanner for preceding axis + def preceding(raw_nodes, tester, selector) + raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map {|raw_node| preceding_nodes(raw_node) } + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + # Builds a nodeset of all of the preceding nodes of the supplied node, # in reverse document order # preceding:: includes every element in the document that precedes this node, # except for ancestors - def preceding(node) + def preceding_nodes(node) ancestors = [] parent = node.parent while parent @@ -727,8 +885,7 @@ def preceding(node) if ancestors.include?(preceding_node) ancestors.delete(preceding_node) else - precedings << XPathNode.new(preceding_node, - position: precedings.size + 1) + precedings << preceding_node end preceding_node = preceding_node_of(preceding_node) end @@ -750,12 +907,19 @@ def preceding_node_of( node ) psn end - def following(node) + # Scanner for following axis + def following(raw_nodes, tester, selector) + raw_nodesets = raw_nodes.select {|node| node.respond_to?(:parent) }.map do |raw_node| + following_nodes(raw_node) + end + non_optimized_raw_nodesets_select(raw_nodesets, tester, selector) + end + + def following_nodes(node) followings = [] following_node = next_sibling_node(node) while following_node - followings << XPathNode.new(following_node, - position: followings.size + 1) + followings << following_node following_node = following_node_of(following_node) end followings @@ -778,30 +942,19 @@ def next_sibling_node(node) end def child(nodeset) - nodesets = [] - nodeset.each do |node| + nodeset.map do |node| raw_node = node.raw_node node_type = raw_node.node_type # trace(:child, node_type, node) case node_type when :element - nodesets << raw_node.children.collect.with_index do |child_node, i| - XPathNode.new(child_node, position: i + 1) - end + raw_node.children when :document - new_nodeset = [] - raw_node.children.each do |child| - case child - when XMLDecl, Text - # Ignore - else - new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1) - end + raw_node.children.reject do |child| + XMLDecl === child || Text === child end - nodesets << new_nodeset unless new_nodeset.empty? end - end - nodesets + end.compact end def norm b diff --git a/test/xpath/test_attribute.rb b/test/xpath/test_attribute.rb index b778ff81..458fda9f 100644 --- a/test/xpath/test_attribute.rb +++ b/test/xpath/test_attribute.rb @@ -32,5 +32,18 @@ def test_no_namespace "nothing" => "") assert_equal(["child2"], children.collect(&:text)) end + + def test_no_error + REXML::XPath.match(@document, '//attribute::*/parent::*') + # Some of those are not yet supported in REXML, but at least it shouldn't raise an error + REXML::XPath.match(@document, '//attribute::*/preceding::*') + REXML::XPath.match(@document, '//attribute::*/following::*') + REXML::XPath.match(@document, '//attribute::*/preceding-sibling::*') + REXML::XPath.match(@document, '//attribute::*/following-sibling::*') + REXML::XPath.match(@document, '//attribute::*/ancestor::*') + REXML::XPath.match(@document, '//attribute::*/descendant::*') + REXML::XPath.match(@document, '//attribute::*/ancestor-or-self::*') + REXML::XPath.match(@document, '//attribute::*/descendant-or-self::*') + end end end diff --git a/test/xpath/test_axis_preceding_sibling.rb b/test/xpath/test_axis_preceding_sibling.rb index 9c44ad63..d71a17d6 100644 --- a/test/xpath/test_axis_preceding_sibling.rb +++ b/test/xpath/test_axis_preceding_sibling.rb @@ -34,5 +34,21 @@ def test_preceding_sibling_axis prev = XPath.first(context, "preceding-sibling::f[3]") assert_equal "3", prev.attributes["id"] end + + def test_preceding_sibling_position_less_than + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() < 1]")) + assert_equal(["5"], + XPath.match(context, "preceding-sibling::f[position() < 2]").map {|n| n.attributes["id"] }) + assert_equal(["5", "4"], + XPath.match(context, "preceding-sibling::f[position() < 3]").map {|n| n.attributes["id"] }) + end + + def test_preceding_sibling_position_less_than_or_equal + context = XPath.first(@@doc, "/a/e/f[last()]") + assert_equal([], XPath.match(context, "preceding-sibling::f[position() <= 0]")) + assert_equal(["5", "4"], + XPath.match(context, "preceding-sibling::f[position() <= 2]").map {|n| n.attributes["id"] }) + end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 1c6eb624..f51ab04f 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -485,6 +485,14 @@ def test_following_sibling_predicates assert_equal(["w", "x", "y", "z"], matches.map(&:name)) end + def test_following_sibling_position_less_than + source = "" + doc = REXML::Document.new(source) + assert_equal([], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 1]")) + assert_equal(["b"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 2]").map(&:name)) + assert_equal(["b", "c"], REXML::XPath.match(doc, "/r/a/following-sibling::*[position() < 3]").map(&:name)) + end + def test_preceding_sibling_across_multiple_nodes source = <<-XML @@ -575,14 +583,22 @@ def test_nested_predicates matches = XPath.match(doc, '(/div/div/test[3])').map(&:text) assert_equal [], matches + matches = XPath.match(doc, '/div/div/test[1][1]').map(&:text) + assert_equal ["ab", "ef", "hi"], matches matches = XPath.match(doc, '(/div/div/test[1])[1]').map(&:text) assert_equal ["ab"], matches + matches = XPath.match(doc, '/div/div/test[1][2]').map(&:text) + assert_equal [], matches matches = XPath.match(doc, '(/div/div/test[1])[2]').map(&:text) assert_equal ["ef"], matches matches = XPath.match(doc, '(/div/div/test[1])[3]').map(&:text) assert_equal ["hi"], matches + matches = XPath.match(doc, '/div/div/test[2][1]').map(&:text) + assert_equal ["cd", "gh"], matches matches = XPath.match(doc, '(/div/div/test[2])[1]').map(&:text) assert_equal ["cd"], matches + matches = XPath.match(doc, '/div/div/test[2][2]').map(&:text) + assert_equal [], matches matches = XPath.match(doc, '(/div/div/test[2])[2]').map(&:text) assert_equal ["gh"], matches matches = XPath.match(doc, '(/div/div/test[2])[3]').map(&:text) diff --git a/test/xpath/test_predicate.rb b/test/xpath/test_predicate.rb index 278e3765..f25bd758 100644 --- a/test/xpath/test_predicate.rb +++ b/test/xpath/test_predicate.rb @@ -59,12 +59,50 @@ def test_predicates_multi assert_equal( "1", m[0].attributes["id"] ) end + def test_predicate_multi_position + xml = <<~XML + + + + + XML + doc = REXML::Document.new(xml) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1]") + assert_equal(%w[2 3 4 5 7 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1]") + assert_equal(%w[3 4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3']") + assert_equal(%w[4 5 8 9 10], result.map { |node| node.attributes["id"] }) + + result = REXML::XPath.match(doc, "/a/b/c[position()>1][position()>1][@id!='3'][position()!=2]") + assert_equal(%w[4 8 10], result.map { |node| node.attributes["id"] }) + end + def do_path( path ) m = REXML::XPath.match( @doc, path ) #puts path, @parser.parse( path ).inspect return m end + def test_predicate_float_literal + doc = REXML::Document.new("") + # [N.0] is equivalent to [position() = N.0] = [position() = N] + assert_equal(["a"], REXML::XPath.match(doc, "/r/*[1.0]").map(&:name)) + assert_equal(["b"], REXML::XPath.match(doc, "/r/*[2.0]").map(&:name)) + # Non-integer numeric literals match no node. + assert_equal([], REXML::XPath.match(doc, "/r/*[1.5]")) + end + + def test_predicate_variable_as_position + doc = REXML::Document.new("") + parser = REXML::XPathParser.new + parser["x"] = 2 + assert_equal(["b"], parser.parse("/r/*[$x]", doc).map(&:name)) + end + def test_get_no_siblings_terminal_nodes source = <<-XML