Module: DomainExtractor::Parser

Defined in:
lib/domain_extractor/parser.rb

Overview

Parser orchestrates the pipeline for url normalization, validation, and domain extraction.

Constant Summary collapse

SCHEME_PATTERN =
%r{\A([a-z][a-z0-9+.-]*)://}i
RETRYABLE_URI_MESSAGES =
['bad URI', 'is not URI'].freeze

Class Method Summary collapse

Class Method Details

.call(raw_url) ⇒ Object



28
29
30
31
32
33
34
35
# File 'lib/domain_extractor/parser.rb', line 28

def call(raw_url)
  uri, host_attributes = extract_components(raw_url)
  return ParsedURL.new(nil) unless uri && host_attributes

  build_result(host_attributes: host_attributes, uri: uri)
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
  ParsedURL.new(nil)
end

.host_attributes(host) ⇒ Object



43
44
45
46
47
48
49
50
51
52
# File 'lib/domain_extractor/parser.rb', line 43

def host_attributes(host)
  return if invalid_host?(host)

  normalized_host = host.downcase
  domain = parse_domain(normalized_host)

  return domain_attributes(domain, normalized_host) if domain

  hostname_attributes(normalized_host) if Validators.valid_hostname?(normalized_host)
end

.valid?(raw_url) ⇒ Boolean

Returns:

  • (Boolean)


37
38
39
40
41
# File 'lib/domain_extractor/parser.rb', line 37

def valid?(raw_url)
  !!extract_components(raw_url)
rescue ::URI::InvalidURIError, ::PublicSuffix::Error
  false
end