Class: Nokogiri::CSS::XPathVisitor

Inherits:
Object
  • Object
show all
Defined in:
lib/nokogiri/css/xpath_visitor.rb

Overview

When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor class allows for changing some of the behaviors related to builtin xpath functions and quirks of HTML5.

Defined Under Namespace

Modules: BuiltinsConfig, DoctypeConfig

Constant Summary collapse

WILDCARD_NAMESPACES =

:nodoc:

Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch")

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML, prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX, namespaces: nil) ⇒ XPathVisitor

:call-seq:

new() → XPathVisitor
new(builtins:, doctype:) → XPathVisitor
Parameters
  • builtins: (BuiltinsConfig) Determine when to use Nokogiri’s built-in xpath functions for performance improvements.

  • doctype: (DoctypeConfig) Make document-type-specific accommodations for CSS queries.

Returns

XPathVisitor



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/nokogiri/css/xpath_visitor.rb', line 69

def initialize(
  builtins: BuiltinsConfig::NEVER,
  doctype: DoctypeConfig::XML,
  prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
  namespaces: nil
)
  unless BuiltinsConfig::VALUES.include?(builtins)
    raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
  end
  unless DoctypeConfig::VALUES.include?(doctype)
    raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
  end

  @builtins = builtins
  @doctype = doctype
  @prefix = prefix
  @namespaces = namespaces
end

Instance Attribute Details

#builtinsObject (readonly)

The visitor configuration set via the builtins: keyword argument to XPathVisitor.new.



48
49
50
# File 'lib/nokogiri/css/xpath_visitor.rb', line 48

def builtins
  @builtins
end

#doctypeObject (readonly)

The visitor configuration set via the doctype: keyword argument to XPathVisitor.new.



51
52
53
# File 'lib/nokogiri/css/xpath_visitor.rb', line 51

def doctype
  @doctype
end

#namespacesObject (readonly)

The visitor configuration set via the namespaces: keyword argument to XPathVisitor.new.



57
58
59
# File 'lib/nokogiri/css/xpath_visitor.rb', line 57

def namespaces
  @namespaces
end

#prefixObject (readonly)

The visitor configuration set via the prefix: keyword argument to XPathVisitor.new.



54
55
56
# File 'lib/nokogiri/css/xpath_visitor.rb', line 54

def prefix
  @prefix
end

Instance Method Details

#accept(node) ⇒ Object



298
299
300
# File 'lib/nokogiri/css/xpath_visitor.rb', line 298

def accept(node)
  node.accept(self)
end

#configObject

:call-seq: config() → Hash

Returns

a Hash representing the configuration of the XPathVisitor, suitable for use as part of the CSS cache key.



93
94
95
# File 'lib/nokogiri/css/xpath_visitor.rb', line 93

def config
  { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
end

#visit_attrib_name(node) ⇒ Object



294
295
296
# File 'lib/nokogiri/css/xpath_visitor.rb', line 294

def visit_attrib_name(node)
  "@#{node.value.first}"
end

#visit_attribute_condition(node) ⇒ Object



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/nokogiri/css/xpath_visitor.rb', line 175

def visit_attribute_condition(node)
  attribute = node.value.first.accept(self)
  return attribute if node.value.length == 1

  value = node.value.last
  value = "'#{value}'" unless /^['"]/.match?(value)

  # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
  if (value[0] == value[-1]) && %q{"'}.include?(value[0])
    str_value = value[1..-2]
    if str_value.include?(value[0])
      value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
    end
  end

  case node.value[1]
  when :equal
    attribute + "=" + value.to_s
  when :not_equal
    attribute + "!=" + value.to_s
  when :substring_match
    "contains(#{attribute},#{value})"
  when :prefix_match
    "starts-with(#{attribute},#{value})"
  when :dash_match
    "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
  when :includes
    value = value[1..-2] # strip quotes
    css_class(attribute, value)
  when :suffix_match
    "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
  else
    attribute + " #{node.value[1]} " + value.to_s
  end
end

#visit_class_condition(node) ⇒ Object



237
238
239
# File 'lib/nokogiri/css/xpath_visitor.rb', line 237

def visit_class_condition(node)
  css_class("@class", node.value.first)
end

#visit_combinator(node) ⇒ Object



241
242
243
244
245
246
247
# File 'lib/nokogiri/css/xpath_visitor.rb', line 241

def visit_combinator(node)
  if is_of_type_pseudo_class?(node.value.last)
    "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
  else
    "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
  end
end

#visit_conditional_selector(node) ⇒ Object



262
263
264
265
# File 'lib/nokogiri/css/xpath_visitor.rb', line 262

def visit_conditional_selector(node)
  node.value.first.accept(self) + "[" +
    node.value.last.accept(self) + "]"
end

#visit_element_name(node) ⇒ Object



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/nokogiri/css/xpath_visitor.rb', line 267

def visit_element_name(node)
  if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
    # HTML5 has namespaces that should be ignored in CSS queries
    # https://github.com/sparklemotion/nokogiri/issues/2376
    if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
      if WILDCARD_NAMESPACES
        "*:#{node.value.first}"
      else
        "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
      end
    else
      "*[local-name()='#{node.value.first}']"
    end
  elsif node.value.length == 2 # has a namespace prefix
    if node.value.first.nil? # namespace prefix is empty
      node.value.last
    else
      node.value.join(":")
    end
  elsif node.value.first != "*" && @namespaces&.key?("xmlns")
    # apply the default namespace (if one is present) to a non-wildcard selector
    "xmlns:#{node.value.first}"
  else
    node.value.first
  end
end

#visit_function(node) ⇒ Object

:stopdoc:



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/nokogiri/css/xpath_visitor.rb', line 98

def visit_function(node)
  msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
  return send(msg, node) if respond_to?(msg)

  case node.value.first
  when /^text\(/
    "child::text()"
  when /^self\(/
    "self::#{node.value[1]}"
  when /^eq\(/
    "position()=#{node.value[1]}"
  when /^(nth|nth-of-type)\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1])
    else
      "position()=#{node.value[1]}"
    end
  when /^nth-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], child: true)
    else
      "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^nth-last-of-type\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true)
    else
      index = node.value[1].to_i - 1
      index == 0 ? "position()=last()" : "position()=last()-#{index}"
    end
  when /^nth-last-child\(/
    if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
      nth(node.value[1], last: true, child: true)
    else
      "count(following-sibling::*)=#{node.value[1].to_i - 1}"
    end
  when /^(first|first-of-type)\(/
    "position()=1"
  when /^(last|last-of-type)\(/
    "position()=last()"
  when /^contains\(/
    "contains(.,#{node.value[1]})"
  when /^gt\(/
    "position()>#{node.value[1]}"
  when /^only-child\(/
    "last()=1"
  when /^comment\(/
    "comment()"
  when /^has\(/
    is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
    ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
  else
    validate_xpath_function_name(node.value.first)

    # xpath function call, let's marshal those arguments
    args = ["."]
    args += node.value[1..-1].map do |n|
      n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
    end
    "nokogiri:#{node.value.first}#{args.join(",")})"
  end
end

#visit_id(node) ⇒ Object



170
171
172
173
# File 'lib/nokogiri/css/xpath_visitor.rb', line 170

def visit_id(node)
  node.value.first =~ /^#(.*)$/
  "@id='#{Regexp.last_match(1)}'"
end

#visit_not(node) ⇒ Object



161
162
163
164
165
166
167
168
# File 'lib/nokogiri/css/xpath_visitor.rb', line 161

def visit_not(node)
  child = node.value.first
  if :ELEMENT_NAME == child.type
    "not(self::#{child.accept(self)})"
  else
    "not(#{child.accept(self)})"
  end
end

#visit_pseudo_class(node) ⇒ Object



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/nokogiri/css/xpath_visitor.rb', line 211

def visit_pseudo_class(node)
  if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
    node.value.first.accept(self)
  else
    msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
    return send(msg, node) if respond_to?(msg)

    case node.value.first
    when "first" then "position()=1"
    when "first-child" then "count(preceding-sibling::*)=0"
    when "last" then "position()=last()"
    when "last-child" then "count(following-sibling::*)=0"
    when "first-of-type" then "position()=1"
    when "last-of-type" then "position()=last()"
    when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
    when "only-of-type" then "last()=1"
    when "empty" then "not(node())"
    when "parent" then "node()"
    when "root" then "not(parent::*)"
    else
      validate_xpath_function_name(node.value.first)
      "nokogiri:#{node.value.first}(.)"
    end
  end
end