Class: CrmFormatter::Web
- Inherits:
-
Object
- Object
- CrmFormatter::Web
- Defined in:
- lib/crm_formatter/web.rb
Instance Method Summary collapse
-
#check_web_status(hsh) ⇒ Object
COMPARE ORIGINAL AND FORMATTED URL ###.
- #consolidate_negs(hsh) ⇒ Object
- #errors?(url_hash) ⇒ Boolean
-
#extract_path(url_hash) ⇒ Object
Supporting Methods Below #######.
- #format_url(url) ⇒ Object
- #normalize_url(url) ⇒ Object
- #prep_for_uri(url) ⇒ Object
-
#remove_slashes(url) ⇒ Object
For rare cases w/ urls with mistaken double slash twice.
-
#remove_ww3(url) ⇒ Object
CALL: Wrap.new.remove_ww3(url).
- #validate_extension(url_hash, url) ⇒ Object
Instance Method Details
#check_web_status(hsh) ⇒ Object
COMPARE ORIGINAL AND FORMATTED URL ###
31 32 33 34 35 36 37 38 39 40 |
# File 'lib/crm_formatter/web.rb', line 31 def check_web_status(hsh) status = 'invalid' if hsh[:web_neg]&.include?('error') if hsh[:url] && hsh[:url_f] && status.nil? status = hsh[:url] != hsh[:url_f] ? 'formatted' : 'unchanged' end hsh[:web_status] = status if status.present? hsh end |
#consolidate_negs(hsh) ⇒ Object
42 43 44 45 46 |
# File 'lib/crm_formatter/web.rb', line 42 def consolidate_negs(hsh) neg = hsh[:web_neg].join(', ') hsh[:web_neg] = neg.present? ? neg : nil hsh end |
#errors?(url_hash) ⇒ Boolean
48 49 50 51 |
# File 'lib/crm_formatter/web.rb', line 48 def errors?(url_hash) errors = url_hash[:web_neg].map { |web_neg| web_neg.include?('error') } errors.any? end |
#extract_path(url_hash) ⇒ Object
Supporting Methods Below #######
143 144 145 146 147 148 149 150 151 |
# File 'lib/crm_formatter/web.rb', line 143 def extract_path(url_hash) path_parts = url_hash[:url_f].split('//').last.split('/')[1..-1] path = "/#{path_parts.join('/')}" if path&.length > 2 url_hash[:url_path] = path url_hash[:url_f] = url_hash[:url_f].gsub(url_hash[:url_path], '') end url_hash end |
#format_url(url) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/crm_formatter/web.rb', line 9 def format_url(url) prep_result = prep_for_uri(url) url_hash = prep_result[:url_hash] url = prep_result[:url] url = nil if errors?(url_hash) if url&.present? url = normalize_url(url) ext_result = validate_extension(url_hash, url) url_hash = ext_result[:url_hash] url = ext_result[:url] (url = nil if errors?(url_hash)) if url.present? end url_hash = consolidate_negs(url_hash) url_hash[:url_f] = url url_hash = extract_path(url_hash) if url.present? url_hash = check_web_status(url_hash) end |
#normalize_url(url) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/crm_formatter/web.rb', line 86 def normalize_url(url) return unless url.present? uri = URI(url) scheme = uri&.scheme host = uri&.host url = "#{scheme}://#{host}" if host.present? && scheme.present? url = "http://#{url}" if url[0..3] != 'http' return unless url.present? url.gsub!('//', '//www.') unless url.include?('www.') url end |
#prep_for_uri(url) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/crm_formatter/web.rb', line 53 def prep_for_uri(url) url_hash = { web_status: nil, url: url, url_f: nil, url_path: nil, web_neg: [] } begin url = url&.split('|')&.first url = url&.split('\\')&.first url&.gsub!(/\P{ASCII}/, '') url = url&.downcase&.strip 2.times { remove_ww3(url) } if url.present? url = remove_slashes(url) if url.present? url&.strip! if url.present? url = nil if url.include?(' ') url = url[0..-2] if url.present? && url[-1] == '/' end banned_symbols = ['!', '$', '%', "'", '(', ')', '*', '+', ',', '<', '>', '@', '[', ']', '^', '{', '}', '~'] url = nil if url.present? && banned_symbols.any? { |symb| url&.include?(symb) } unless url.present? url_hash[:web_neg] << 'error: syntax' url_hash[:url_f] = url end rescue StandardError => error url_hash[:web_neg] << "error: #{error}" url = nil url_hash end hsh = { url_hash: url_hash, url: url } hsh end |
#remove_slashes(url) ⇒ Object
For rare cases w/ urls with mistaken double slash twice.
162 163 164 165 166 167 |
# File 'lib/crm_formatter/web.rb', line 162 def remove_slashes(url) return url unless url.present? && url.include?('//') parts = url.split('//') return parts[0..1].join if parts.length > 2 url end |
#remove_ww3(url) ⇒ Object
CALL: Wrap.new.remove_ww3(url)
154 155 156 157 158 159 |
# File 'lib/crm_formatter/web.rb', line 154 def remove_ww3(url) return unless url.present? url.split('.').map { |part| url.gsub!(part, 'www') if part.scan(/ww[0-9]/).any? } url&.gsub!('www.www', 'www') url end |
#validate_extension(url_hash, url) ⇒ Object
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/crm_formatter/web.rb', line 101 def validate_extension(url_hash, url) return unless url.present? uri_parts = URI(url).host&.split('.') url_exts = uri_parts[2..-1] ### Finds Errors if url_exts.empty? ## Missing ext. err_msg = 'error: ext.none' else ## Has ext(s), but need to verify validity and count. iana_list = CrmFormatter::Extensions.list matched_exts = iana_list & url_exts if matched_exts.empty? ## Has ext, but not valid. err_msg = "error: ext.invalid [#{url_exts.join(', ')}]" elsif matched_exts.count > 1 ## Has too many valid exts, Limit 1. err_msg = "error: ext.valid > 1 [#{matched_exts.join(', ')}]" end end if err_msg url_hash[:web_neg] << err_msg url = nil url_hash[:url_f] = nil return { url_hash: url_hash, url: url } end ### Only Non-Errors Get Here ### ## Has one valid ext, but need to check if original url exts were > 1. Replace if so. if url_exts.count > matched_exts.count matched_ext = matched_exts.first u1 = url.split(matched_ext).first url = "#{u1}#{matched_ext}" # inv_ext = (url_exts - matched_exts).join # url = url.gsub(".#{inv_ext}", '') end ext_result = { url_hash: url_hash, url: url } ext_result end |