Class: CrmFormatter::Web

Inherits:
Object
  • Object
show all
Defined in:
lib/crm_formatter/web.rb

Instance Method Summary collapse

Instance Method Details

#check_web_status(hsh) ⇒ Object

COMPARE ORIGINAL AND FORMATTED URL ###



31
32
33
34
35
36
37
38
39
40
# File 'lib/crm_formatter/web.rb', line 31

def check_web_status(hsh)
  status = 'invalid' if hsh[:web_neg]&.include?('error')

  if hsh[:url] && hsh[:url_f] && status.nil?
    status = hsh[:url] != hsh[:url_f] ? 'formatted' : 'unchanged'
  end

  hsh[:web_status] = status if status.present?
  hsh
end

#consolidate_negs(hsh) ⇒ Object



42
43
44
45
46
# File 'lib/crm_formatter/web.rb', line 42

def consolidate_negs(hsh)
  neg = hsh[:web_neg].join(', ')
  hsh[:web_neg] = neg.present? ? neg : nil
  hsh
end

#errors?(url_hash) ⇒ Boolean

Returns:

  • (Boolean)


48
49
50
51
# File 'lib/crm_formatter/web.rb', line 48

def errors?(url_hash)
  errors = url_hash[:web_neg].map { |web_neg| web_neg.include?('error') }
  errors.any?
end

#extract_path(url_hash) ⇒ Object

Supporting Methods Below #######



143
144
145
146
147
148
149
150
151
# File 'lib/crm_formatter/web.rb', line 143

def extract_path(url_hash)
  path_parts = url_hash[:url_f].split('//').last.split('/')[1..-1]
  path = "/#{path_parts.join('/')}"
  if path&.length > 2
    url_hash[:url_path] = path
    url_hash[:url_f] = url_hash[:url_f].gsub(url_hash[:url_path], '')
  end
  url_hash
end

#format_url(url) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/crm_formatter/web.rb', line 9

def format_url(url)
  prep_result = prep_for_uri(url)
  url_hash = prep_result[:url_hash]
  url = prep_result[:url]
  url = nil if errors?(url_hash)

  if url&.present?
    url = normalize_url(url)
    ext_result = validate_extension(url_hash, url)

    url_hash = ext_result[:url_hash]
    url = ext_result[:url]
    (url = nil if errors?(url_hash)) if url.present?
  end

  url_hash = consolidate_negs(url_hash)
  url_hash[:url_f] = url
  url_hash = extract_path(url_hash) if url.present?
  url_hash = check_web_status(url_hash)
end

#normalize_url(url) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/crm_formatter/web.rb', line 86

def normalize_url(url)
  return unless url.present?
  uri = URI(url)
  scheme = uri&.scheme
  host = uri&.host
  url = "#{scheme}://#{host}" if host.present? && scheme.present?
  url = "http://#{url}" if url[0..3] != 'http'

  return unless url.present?
  url.gsub!('//', '//www.') unless url.include?('www.')
  url
end

#prep_for_uri(url) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/crm_formatter/web.rb', line 53

def prep_for_uri(url)
  url_hash = { web_status: nil, url: url, url_f: nil, url_path: nil, web_neg: [] }

  begin
    url = url&.split('|')&.first
    url = url&.split('\\')&.first
    url&.gsub!(/\P{ASCII}/, '')
    url = url&.downcase&.strip

    2.times { remove_ww3(url) } if url.present?
    url = remove_slashes(url) if url.present?
    url&.strip!

    if url.present?
      url = nil if url.include?(' ')
      url = url[0..-2] if url.present? && url[-1] == '/'
    end

    banned_symbols = ['!', '$', '%', "'", '(', ')', '*', '+', ',', '<', '>', '@', '[', ']', '^', '{', '}', '~']
    url = nil if url.present? && banned_symbols.any? { |symb| url&.include?(symb) }
    unless url.present?
      url_hash[:web_neg] << 'error: syntax'
      url_hash[:url_f] = url
    end
  rescue StandardError => error
    url_hash[:web_neg] << "error: #{error}"
    url = nil
    url_hash
  end
  hsh = { url_hash: url_hash, url: url }
  hsh
end

#remove_slashes(url) ⇒ Object

For rare cases w/ urls with mistaken double slash twice.



162
163
164
165
166
167
# File 'lib/crm_formatter/web.rb', line 162

def remove_slashes(url)
  return url unless url.present? && url.include?('//')
  parts = url.split('//')
  return parts[0..1].join if parts.length > 2
  url
end

#remove_ww3(url) ⇒ Object

CALL: Wrap.new.remove_ww3(url)



154
155
156
157
158
159
# File 'lib/crm_formatter/web.rb', line 154

def remove_ww3(url)
  return unless url.present?
  url.split('.').map { |part| url.gsub!(part, 'www') if part.scan(/ww[0-9]/).any? }
  url&.gsub!('www.www', 'www')
  url
end

#validate_extension(url_hash, url) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/crm_formatter/web.rb', line 101

def validate_extension(url_hash, url)
  return unless url.present?
  uri_parts = URI(url).host&.split('.')
  url_exts = uri_parts[2..-1]

  ### Finds Errors
  if url_exts.empty? ## Missing ext.
    err_msg = 'error: ext.none'
  else ## Has ext(s), but need to verify validity and count.
    iana_list = CrmFormatter::Extensions.list
    matched_exts = iana_list & url_exts

    if matched_exts.empty? ## Has ext, but not valid.
      err_msg = "error: ext.invalid [#{url_exts.join(', ')}]"
    elsif matched_exts.count > 1 ## Has too many valid exts, Limit 1.
      err_msg = "error: ext.valid > 1 [#{matched_exts.join(', ')}]"
    end
  end

  if err_msg
    url_hash[:web_neg] << err_msg
    url = nil
    url_hash[:url_f] = nil
    return { url_hash: url_hash, url: url }
  end

  ### Only Non-Errors Get Here ###
  ## Has one valid ext, but need to check if original url exts were > 1.  Replace if so.
  if url_exts.count > matched_exts.count
    matched_ext = matched_exts.first
    u1 = url.split(matched_ext).first
    url = "#{u1}#{matched_ext}"
    # inv_ext = (url_exts - matched_exts).join
    # url = url.gsub(".#{inv_ext}", '')
  end

  ext_result = { url_hash: url_hash, url: url }
  ext_result
end