Class: Hash

Inherits:
Object
  • Object
show all
Defined in:
lib/curly/hash.rb

Overview

Hash helpers

Instance Method Summary collapse

Instance Method Details

#array_match(array, key, comp) ⇒ Boolean

Test if values in an array match an operator

Parameters:

  • array (Array)

    The array

  • key (String)

    The key

  • comp (String)

    The comparison, e.g. *= or $=

Returns:

  • (Boolean)

    true if array contains match



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/curly/hash.rb', line 180

def array_match(array, key, comp)
  keep = false
  array.each do |el|
    keep = case comp
           when /^\^/
             key =~ /^#{el}/i ? true : false
           when /^\$/
             key =~ /#{el}$/i ? true : false
           when /^\*/
             key =~ /#{el}/i ? true : false
           else
             key =~ /^#{el}$/i ? true : false
           end
    break if keep
  end
  keep
end

#clean_outputArray

Clean up empty arrays and return an array with one or more elements

Returns:

  • (Array)

    output array



396
397
398
399
# File 'lib/curly/hash.rb', line 396

def clean_output
  output = ensure_array
  output.clean_output
end

#dot_query(path, root = nil, full_tag: true) ⇒ Object

Extract data using a dot-syntax path

Parameters:

Returns:

  • (Object)

    Result of path query



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/curly/hash.rb', line 71

def dot_query(path, root = nil, full_tag: true)
  res = stringify_keys
  res = res[root] unless root.nil?

  unless path =~ /\[/
    return res.get_value(path)
  end

  path.gsub!(/\[(.*?)\]/) do
    inter = Regexp.last_match(1).gsub(/\./, '%')
    "[#{inter}]"
  end

  out = []
  q = path.split(/(?<![\d.])\./)

  while q.count.positive?
    pth = q.shift
    pth.gsub!(/%/, '.')

    return nil if res.nil?

    unless pth =~ /\[/
      return res.get_value(pth)
    end

    el = Regexp.last_match(1) if pth =~ /\[([0-9,.]+)?\]/
    pth.sub!(/\[([0-9,.]+)?\]/, '')

    ats = []
    at = []
    while pth =~ /\[[+&,]?[\w.]+( *[\^*$=<>]=? *\w+)?/
      m = pth.match(/\[(?<com>[,+&])? *(?<key>[\w.]+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))? */)

      comp = [m['key'], m['op'], m['val']]
      case m['com']
      when ','
        ats.push(comp)
        at = []
      else
        at.push(comp)
      end

      pth.sub!(/\[(?<com>[,&+])? *(?<key>[\w.]+)( *(?<op>[\^*$=<>]{1,2}) *(?<val>[^,&\]]+))?/, '[')
    end
    ats.push(at) unless at.empty?
    pth.sub!(/\[\]/, '')

    res = res[0] if res.is_a?(Array) && res.count == 1
    if ats.empty? && el.nil? && res.is_a?(Array) && res[0]&.key?(pth)
      res.map! { |r| r[pth] }
      next
    end

    res.map!(&:stringify_keys) if res.is_a?(Array) && res[0].is_a?(Hash)
    # if res.is_a?(String) || (res.is_a?(Array) && res[0].is_a?(String))
    #   out.push(res)
    #   next
    # end

    # if res.is_a?(Array) && !pth.nil?
    #   return res.delete_if { |r| !r.key?(pth) }
    # else
    #   return false if el.nil? && ats.empty? && res.is_a?(Hash) && (res.nil? || !res.key?(pth))
    # end
    tag = res
    res = res[pth] unless pth.nil? || pth.empty?

    pth = ''

    return false if res.nil?

    if ats.count.positive?
      while ats.count.positive?
        atr = ats.shift
        res = [res] if res.is_a?(Hash)
        res.each do |r|
          out.push(full_tag ? tag : r) if evaluate_comp(r, atr)
        end
      end
    else
      out = res
    end

    out = out.get_value(pth) unless pth.nil?

    if el.nil? && out.is_a?(Array) && out[0].is_a?(Hash)
      out.map! { |o|
        o.stringify_keys
        # o.key?(pth) ? o[pth] : o
      }
    elsif out.is_a?(Array) && el =~ /^[\d.,]+$/
      out = out[eval(el)]
    end
    res = out
  end

  out = out[0] if out&.count == 1
  out
end

#ensure_arrayArray

Ensure that an object is an array

Returns:

  • (Array)

    object as Array



406
407
408
# File 'lib/curly/hash.rb', line 406

def ensure_array
  return [self]
end

#evaluate_comp(r, atr) ⇒ Boolean

Evaluate a comparison

Parameters:

  • r (Hash)

    hash of source elements and comparison operators

  • atr (Array)

    Array of arrays conaining [attribute,comparitor,value]

Returns:

  • (Boolean)

    whether the comparison passes or fails



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/curly/hash.rb', line 207

def evaluate_comp(r, atr)
  keep = true

  r = r.symbolize_keys

  atr.each do |a|
    key = a[0].to_sym
    val = if a[2] =~ /^\d+$/
            a[2].to_i
          elsif a[2] =~ /^\d+\.\d+$/
            a[2].to_f
          else
            a[2]
          end
    r = r.get_value(key.to_s) if key.to_s =~ /\./

    if val.nil?
      if r.is_a?(Hash)
        return r.key?(key) && !r[key].nil? && !r[key].empty?
      elsif r.is_a?(String)
        return r.nil? ? false : true
      elsif r.is_a?(Array)
        return r.empty? ? false : true
      end
    end

    if r.nil?
      keep = false
    elsif r.is_a?(Array)
      valid = r.filter do |k|
        if k.is_a? Array
          array_match(k, a[2], a[1])
        else
          case a[1]
          when /^\^/
            k =~ /^#{a[2]}/i ? true : false
          when /^\$/
            k =~ /#{a[2]}$/i ? true : false
          when /^\*/
            k =~ /#{a[2]}/i ? true : false
          else
            k =~ /^#{a[2]}$/i ? true : false
          end
        end
      end

      keep = valid.count.positive?
    elsif val.is_a?(Numeric) && a[1] =~ /^[<>=]{1,2}$/
      k = r.to_i
      comp = a[1] =~ /^=$/ ? '==' : a[1]
      keep = eval("#{k}#{comp}#{val}")
    else
      v = r.is_a?(Hash) ? r[key] : r
      if v.is_a? Array
        keep = array_match(v, a[2], a[1])
      else
        keep = case a[1]
               when /^\^/
                 v =~ /^#{a[2]}/i ? true : false
               when /^\$/
                 v =~ /#{a[2]}$/i ? true : false
               when /^\*/
                 v =~ /#{a[2]}/i ? true : false
               else
                 v =~ /^#{a[2]}$/i ? true : false
               end
      end
    end

    return false unless keep
  end

  keep
end

#get_value(query) ⇒ Object

Get a value from the hash using a dot-syntax query

Parameters:

  • query (String)

    The query (dot notation)

Returns:

  • (Object)

    result of querying the hash



52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/curly/hash.rb', line 52

def get_value(query)
  return nil if self.empty?
  stringify_keys!

  query.split('.').inject(self) do |v, k|
    return v.map { |el| el.get_value(k) } if v.is_a? Array
    # k = k.to_i if v.is_a? Array
    next v unless v.key?(k)

    v.fetch(k)
  end
end

#stringify_keysHash

Turn all keys into strings

If the hash has both a string and a symbol for key, keep the string value, discarding the symbol value

Returns:

  • (Hash)

    a copy of the hash where all its keys are strings



373
374
375
376
377
378
379
# File 'lib/curly/hash.rb', line 373

def stringify_keys
  each_with_object({}) do |(k, v), hsh|
    next if k.is_a?(Symbol) && key?(k.to_s)

    hsh[k.to_s] = v.is_a?(Hash) ? v.stringify_keys : v
  end
end

#stringify_keys!Object

Destructive version of #stringify_keys

See Also:



386
387
388
# File 'lib/curly/hash.rb', line 386

def stringify_keys!
  replace stringify_keys
end

#symbolize_keysHash

Turn all keys into symbols

If the hash has both a string and a symbol for key, keep the symbol value, discarding the string value

Returns:

  • (Hash)

    a copy of the hash where all its keys are strings



357
358
359
360
361
362
363
# File 'lib/curly/hash.rb', line 357

def symbolize_keys
  each_with_object({}) do |(k, v), hsh|
    next if k.is_a?(String) && key?(k.to_sym)

    hsh[k.to_sym] = v.is_a?(Hash) ? v.symbolize_keys : v
  end
end

#tag_match(tag_name, classes, id, attribute, operator, value, descendant: false) ⇒ Object

Test if a tag contains an attribute matching filter queries

Parameters:

  • tag_name (String)

    The tag name

  • classes (String)

    The classes to match

  • id (String)

    The id attribute to match

  • attribute (String)

    The attribute

  • operator (String)

    The operator, <>= *= $= ^=

  • value (String)

    The value to match

  • descendant (Boolean) (defaults to: false)

    Check descendant tags



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/curly/hash.rb', line 295

def tag_match(tag_name, classes, id, attribute, operator, value, descendant: false)
  tag = self
  keep = true

  keep = false if tag_name && !tag['tag'] =~ /^#{tag_name}$/i

  if tag.key?('attrs') && tag['attrs']
    if keep && id
      tag_id = tag['attrs'].filter { |a| a['key'] == 'id' }.first['value']
      keep = tag_id && tag_id =~ /#{id}/i
    end

    if keep && classes
      cls = tag['attrs'].filter { |a| a['key'] == 'class' }.first
      if cls
        all = true
        classes.each { |c| all = cls['value'].include?(c) }
        keep = all
      else
        keep = false
      end
    end

    if keep && attribute
      attributes = tag['attrs'].filter { |a| a['key'] =~ /^#{attribute}$/i }
      any = false
      attributes.each do |a|
        break if any

        any = case operator
              when /^*/
                a['value'] =~ /#{value}/i
              when /^\^/
                a['value'] =~ /^#{value}/i
              when /^\$/
                a['value'] =~ /#{value}$/i
              else
                a['value'] =~ /^#{value}$/i
              end
      end
      keep = any
    end
  end

  return false if descendant && !keep

  if !descendant && tag.key?('tags')
    tags = tag['tags'].filter { |t| t.tag_match(tag_name, classes, id, attribute, operator, value) }
    tags.count.positive?
  else
    keep
  end
end

#to_data(url: nil, clean: false) ⇒ Hash

Convert a Curly object to data hash

Parameters:

  • url (String) (defaults to: nil)

    A url to fall back to

  • clean (Boolean) (defaults to: false)

    Clean extra spaces and newlines in sources

Returns:

  • (Hash)

    return a hash with keys renamed and cleaned up



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/curly/hash.rb', line 13

def to_data(url: nil, clean: false)
  if key?(:body_links)
    {
      url: self[:url] || url,
      code: self[:code],
      headers: self[:headers],
      meta: self[:meta],
      meta_links: self[:links],
      head: clean ? self[:head]&.strip&.clean : self[:head],
      body: clean ? self[:body]&.strip&.clean : self[:body],
      source: clean ? self[:source]&.strip&.clean : self[:source],
      title: self[:title],
      description: self[:description],
      links: self[:body_links],
      images: self[:body_images]
    }
  else
    self
  end
end

#to_htmlString

Return the raw HTML of the object

Returns:

  • (String)

    Html representation of the object.



39
40
41
42
43
# File 'lib/curly/hash.rb', line 39

def to_html
  if key?(:source)
    self[:source]
  end
end