Class: ScrubDb::Webs
- Inherits:
-
Object
- Object
- ScrubDb::Webs
- Defined in:
- lib/scrub_db/webs.rb
Instance Attribute Summary collapse
-
#empty_criteria ⇒ Object
Returns the value of attribute empty_criteria.
-
#filter ⇒ Object
Returns the value of attribute filter.
Instance Method Summary collapse
- #extract_exts(url_hash) ⇒ Object
-
#initialize(criteria = {}) ⇒ Webs
constructor
A new instance of Webs.
- #merge_criteria_hash(url_hash) ⇒ Object
- #merge_criteria_hashes(hashes) ⇒ Object
- #pre_scrub(hashes) ⇒ Object
- #scrub_url_hash(url_hash) ⇒ Object
- #scrub_urls(urls = []) ⇒ Object
Constructor Details
Instance Attribute Details
#empty_criteria ⇒ Object
Returns the value of attribute empty_criteria.
5 6 7 |
# File 'lib/scrub_db/webs.rb', line 5 def empty_criteria @empty_criteria end |
#filter ⇒ Object
Returns the value of attribute filter.
5 6 7 |
# File 'lib/scrub_db/webs.rb', line 5 def filter @filter end |
Instance Method Details
#extract_exts(url_hash) ⇒ Object
48 49 50 51 |
# File 'lib/scrub_db/webs.rb', line 48 def extract_exts(url_hash) uri_parts = URI(url_hash[:url_f]).host&.split('.') url_exts = uri_parts[2..-1] end |
#merge_criteria_hash(url_hash) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/scrub_db/webs.rb', line 34 def merge_criteria_hash(url_hash) url_hash.merge!( { url_exts: [], neg_exts: [], pos_exts: [], neg_paths: [], pos_paths: [], neg_urls: [], pos_urls: [] } ) end |
#merge_criteria_hashes(hashes) ⇒ Object
28 29 30 31 32 |
# File 'lib/scrub_db/webs.rb', line 28 def merge_criteria_hashes(hashes) hashes.map! do |url_hash| merge_criteria_hash(url_hash) end end |
#pre_scrub(hashes) ⇒ Object
18 19 20 21 22 23 24 25 26 |
# File 'lib/scrub_db/webs.rb', line 18 def pre_scrub(hashes) hashes = hashes.map do |hsh| if hsh[:url_f].present? hsh[:url_exts] = extract_exts(hsh) hsh = scrub_url_hash(hsh) end hsh end end |
#scrub_url_hash(url_hash) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/scrub_db/webs.rb', line 53 def scrub_url_hash(url_hash) url = url_hash[:url_f] path = url_hash[:url_path] path = url_hash[:path] url_exts = url_hash[:url_exts] url_hash = @filter.scrub_oa(url_hash, url_exts, 'neg_exts', 'equal') url_hash = @filter.scrub_oa(url_hash, url_exts, 'pos_exts', 'equal') url_hash = @filter.scrub_oa(url_hash, url, 'neg_urls', 'include') url_hash = @filter.scrub_oa(url_hash, url, 'pos_urls', 'include') url_hash = @filter.scrub_oa(url_hash, path, 'neg_paths', 'include') url_hash = @filter.scrub_oa(url_hash, path, 'pos_paths', 'include') url_hash end |
#scrub_urls(urls = []) ⇒ Object
12 13 14 15 16 |
# File 'lib/scrub_db/webs.rb', line 12 def scrub_urls(urls=[]) formatted_url_hashes = CrmFormatter.format_urls(urls) formatted_url_hashes = merge_criteria_hashes(formatted_url_hashes) formatted_url_hashes = pre_scrub(formatted_url_hashes) end |