Class: ScrubDb::Webs

Inherits:
Object
  • Object
show all
Defined in:
lib/scrub_db/webs.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(criteria = {}) ⇒ Webs

Returns a new instance of Webs.



7
8
9
10
# File 'lib/scrub_db/webs.rb', line 7

def initialize(criteria={})
  @empty_criteria = criteria&.empty?
  @filter = ScrubDb::Filter.new(criteria) unless @empty_criteria
end

Instance Attribute Details

#empty_criteriaObject

Returns the value of attribute empty_criteria.



5
6
7
# File 'lib/scrub_db/webs.rb', line 5

def empty_criteria
  @empty_criteria
end

#filterObject

Returns the value of attribute filter.



5
6
7
# File 'lib/scrub_db/webs.rb', line 5

def filter
  @filter
end

Instance Method Details

#extract_exts(url_hash) ⇒ Object



48
49
50
51
# File 'lib/scrub_db/webs.rb', line 48

def extract_exts(url_hash)
  uri_parts = URI(url_hash[:url_f]).host&.split('.')
  url_exts = uri_parts[2..-1]
end

#merge_criteria_hash(url_hash) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/scrub_db/webs.rb', line 34

def merge_criteria_hash(url_hash)
  url_hash.merge!(
    {
      url_exts: [],
      neg_exts: [],
      pos_exts: [],
      neg_paths: [],
      pos_paths: [],
      neg_urls: [],
      pos_urls: []
    }
  )
end

#merge_criteria_hashes(hashes) ⇒ Object



28
29
30
31
32
# File 'lib/scrub_db/webs.rb', line 28

def merge_criteria_hashes(hashes)
  hashes.map! do |url_hash|
    merge_criteria_hash(url_hash)
  end
end

#pre_scrub(hashes) ⇒ Object



18
19
20
21
22
23
24
25
26
# File 'lib/scrub_db/webs.rb', line 18

def pre_scrub(hashes)
  hashes = hashes.map do |hsh|
    if hsh[:url_f].present?
      hsh[:url_exts] = extract_exts(hsh)
      hsh = scrub_url_hash(hsh)
    end
    hsh
  end
end

#scrub_url_hash(url_hash) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/scrub_db/webs.rb', line 53

def scrub_url_hash(url_hash)
  url = url_hash[:url_f]
  path = url_hash[:url_path]
  path = url_hash[:path]
  url_exts = url_hash[:url_exts]

  url_hash = @filter.scrub_oa(url_hash, url_exts, 'neg_exts', 'equal')
  url_hash = @filter.scrub_oa(url_hash, url_exts, 'pos_exts', 'equal')
  url_hash = @filter.scrub_oa(url_hash, url, 'neg_urls', 'include')
  url_hash = @filter.scrub_oa(url_hash, url, 'pos_urls', 'include')
  url_hash = @filter.scrub_oa(url_hash, path, 'neg_paths', 'include')
  url_hash = @filter.scrub_oa(url_hash, path, 'pos_paths', 'include')
  url_hash
end

#scrub_urls(urls = []) ⇒ Object



12
13
14
15
16
# File 'lib/scrub_db/webs.rb', line 12

def scrub_urls(urls=[])
  formatted_url_hashes = CrmFormatter.format_urls(urls)
  formatted_url_hashes = merge_criteria_hashes(formatted_url_hashes)
  formatted_url_hashes = pre_scrub(formatted_url_hashes)
end