Class: Nokogiri::CSS::XPathVisitor

Inherits:
Object
  • Object
show all
Defined in:
lib/nokogiri/css/xpath_visitor.rb

Overview

When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor class allows for changing some of the behaviors related to builtin xpath functions and quirks of HTML5.

Defined Under Namespace

Modules: BuiltinsConfig, DoctypeConfig

Constant Summary collapse

WILDCARD_NAMESPACES =

:nodoc:

Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch")

Instance Method Summary collapse

Constructor Details

#initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML) ⇒ XPathVisitor

:call-seq:

new() → XPathVisitor
new(builtins:, doctype:) → XPathVisitor
Parameters
  • builtins: (BuiltinsConfig) Determine when to use Nokogiri’s built-in xpath functions for performance improvements.

  • doctype: (DoctypeConfig) Make document-type-specific accommodations for CSS queries.

Returns

XPathVisitor

[View source]

57
58
59
60
61
62
63
64
65
66
67
# File 'lib/nokogiri/css/xpath_visitor.rb', line 57

def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
  unless BuiltinsConfig::VALUES.include?(builtins)
    raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
  end
  unless DoctypeConfig::VALUES.include?(doctype)
    raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
  end

  @builtins = builtins
  @doctype = doctype
end

Instance Method Details

#accept(node) ⇒ Object

[View source]

267
268
269
# File 'lib/nokogiri/css/xpath_visitor.rb', line 267

def accept(node)
  node.accept(self)
end

#configObject

:call-seq: config() → Hash

Returns

a Hash representing the configuration of the XPathVisitor, suitable for use as part of the CSS cache key.

[View source]

74
75
76
# File 'lib/nokogiri/css/xpath_visitor.rb', line 74

def config
  { builtins: @builtins, doctype: @doctype }
end

#visit_attrib_name(node) ⇒ Object

[View source]

263
264
265
# File 'lib/nokogiri/css/xpath_visitor.rb', line 263

def visit_attrib_name(node)
  "@#{node.value.first}"
end

#visit_attribute_condition(node) ⇒ Object

[View source]

154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/nokogiri/css/xpath_visitor.rb', line 154

def visit_attribute_condition(node)
  attribute = node.value.first.accept(self)
  return attribute if node.value.length == 1

  value = node.value.last
  value = "'#{value}'" unless /^['"]/.match?(value)

  # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
  if (value[0] == value[-1]) && %q{"'}.include?(value[0])
    str_value = value[1..-2]
    if str_value.include?(value[0])
      value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
    end
  end

  case node.value[1]
  when :equal
    attribute + "=" + value.to_s
  when :not_equal
    attribute + "!=" + value.to_s
  when :substring_match
    "contains(#{attribute},#{value})"
  when :prefix_match
    "starts-with(#{attribute},#{value})"
  when :dash_match
    "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
  when :includes
    value = value[1..-2] # strip quotes
    css_class(attribute, value)
  when :suffix_match
    "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
  else
    attribute + " #{node.value[1]} " + value.to_s
  end
end

#visit_class_condition(node) ⇒ Object

[View source]

215
216
217
# File 'lib/nokogiri/css/xpath_visitor.rb', line 215

def visit_class_condition(node)
  css_class("@class", node.value.first)
end

#visit_combinator(node) ⇒ Object

[View source]

219
220
221
222
223
224
225
# File 'lib/nokogiri/css/xpath_visitor.rb', line 219

def visit_combinator(node)
  if is_of_type_pseudo_class?(node.value.last)
    "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
  else
    "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
  end
end

#visit_conditional_selector(node) ⇒ Object

[View source]

240
241
242
243
# File 'lib/nokogiri/css/xpath_visitor.rb', line 240

def visit_conditional_selector(node)
  node.value.first.accept(self) + "[" +
    node.value.last.accept(self) + "]"
end

#visit_element_name(node) ⇒ Object

[View source]

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/nokogiri/css/xpath_visitor.rb', line 245

def visit_element_name(node)
  if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
    # HTML5 has namespaces that should be ignored in CSS queries
    # https://github.com/sparklemotion/nokogiri/issues/2376
    if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
      if WILDCARD_NAMESPACES
        "*:#{node.value.first}"
      else
        "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
      end
    else
      "*[local-name()='#{node.value.first}']"
    end
  else
    node.value.first
  end
end

#visit_function(node) ⇒ Object

:stopdoc:

[View source]

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/nokogiri/css/xpath_visitor.rb', line 79

def visit_function(node)
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
  return send(msg, node) if respond_to?(msg)

  case node.value.first
  when /^text\(/
    "child::text()"
  when /^self\(/
    "self::#{node.value[1]}"
  when /^eq\(/
    "position()=#{node.value[1]}"
  when /^(nth|nth-of-type)\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1])
    else
      "position()=#{node.value[1]}"
    end
  when /^nth-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], child: true)
    else
      "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^nth-last-of-type\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true)
    else
      index = node.value[1].to_i - 1
      index == 0 ? "position()=last()" : "position()=last()-#{index}"
    end
  when /^nth-last-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true, child: true)
    else
      "count(following-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^(first|first-of-type)\(/
    "position()=1"
  when /^(last|last-of-type)\(/
    "position()=last()"
  when /^contains\(/
    "contains(.,#{node.value[1]})"
  when /^gt\(/
    "position()>#{node.value[1]}"
  when /^only-child\(/
    "last()=1"
  when /^comment\(/
    "comment()"
  when /^has\(/
    is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
    ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
  else
    # xpath function call, let's marshal those arguments
    args = ["."]
    args += node.value[1..-1].map do |n|
      n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
    end
    "#{node.value.first}#{args.join(",")})"
  end
end

#visit_id(node) ⇒ Object

[View source]

149
150
151
152
# File 'lib/nokogiri/css/xpath_visitor.rb', line 149

def visit_id(node)
  node.value.first =~ /^#(.*)$/
  "@id='#{Regexp.last_match(1)}'"
end

#visit_not(node) ⇒ Object

[View source]

140
141
142
143
144
145
146
147
# File 'lib/nokogiri/css/xpath_visitor.rb', line 140

def visit_not(node)
  child = node.value.first
  if :ELEMENT_NAME == child.type
    "not(self::#{child.accept(self)})"
  else
    "not(#{child.accept(self)})"
  end
end

#visit_pseudo_class(node) ⇒ Object

[View source]

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/nokogiri/css/xpath_visitor.rb', line 190

def visit_pseudo_class(node)
  if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
    node.value.first.accept(self)
  else
    msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
    return send(msg, node) if respond_to?(msg)

    case node.value.first
    when "first" then "position()=1"
    when "first-child" then "count(preceding-sibling::*)=0"
    when "last" then "position()=last()"
    when "last-child" then "count(following-sibling::*)=0"
    when "first-of-type" then "position()=1"
    when "last-of-type" then "position()=last()"
    when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
    when "only-of-type" then "last()=1"
    when "empty" then "not(node())"
    when "parent" then "node()"
    when "root" then "not(parent::*)"
    else
      node.value.first + "(.)"
    end
  end
end