Class: MetalArchives::Parsers::Artist

Inherits:
Parser
  • Object
show all
Defined in:
lib/metal_archives/parsers/artist.rb

Overview

Artist parser

Class Method Summary collapse

Methods inherited from Parser

rewrite, sanitize

Class Method Details

.map_params(query) ⇒ Object

Map attributes to MA attributes

Returns Hash

params

Hash



22
23
24
25
26
# File 'lib/metal_archives/parsers/artist.rb', line 22

def map_params(query)
  {
    query: query[:name] || "",
  }
end

.parse_html(response) ⇒ Object

Parse main HTML page

Returns Hash

Raises
  • MetalArchives::Errors::ParserError when parsing failed. Please report this error.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/metal_archives/parsers/artist.rb', line 36

def parse_html(response)
  # Set default props
  props = {
    name: nil,
    aliases: [],

    date_of_birth: nil,
    date_of_death: nil,
    cause_of_death: nil,
    gender: nil,

    country: nil,
    location: nil,

    photo: nil,

    bands: [],
  }

  doc = Nokogiri::HTML response

  # Photo
  unless doc.css(".member_img").empty?
    photo_uri = URI doc.css(".member_img img").first.attr("src")
    props[:photo] = rewrite(photo_uri)
  end

  doc.css("#member_info dl").each do |dl|
    dl.css("dt").each do |dt|
      content = sanitize(dt.next_element.content)

      next if content == "N/A"

      case sanitize(dt.content)
      when "Real/full name:"
        props[:name] = content
      when "Age:"
        props[:date_of_birth] = Parsers::Date.parse(content.strip.gsub(/[0-9]* *\(born ([^)]*)\)/, '\1'))
      when "R.I.P.:"
        props[:date_of_death] = Parsers::Date.parse(content)
      when "Died of:"
        props[:cause_of_death] = content
      when "Place of origin:"
        props[:country] = Country.parse(sanitize(dt.next_element.css("a").first.content))
        location = dt.next_element.xpath("text()").map(&:content).join.strip.gsub(/[()]/, "")
        props[:location] = location unless location.empty?
      when "Gender:"
        case content
        when "Male"
          props[:gender] = :male
        when "Female"
          props[:gender] = :female
        else
          raise Errors::ParserError, "Unknown gender: #{content}"
        end
      else
        raise Errors::ParserError, "Unknown token: #{dt.content}"
      end
    end
  end

  # Aliases
  alt = sanitize doc.css(".band_member_name").first.content
  props[:aliases] << alt unless props[:name] == alt

  # Active bands
  proc = proc do |row|
    link = row.css("h3 a")

    name, id = nil

    if link.any?
      # Band name contains a link
      id = Integer(link.attr("href").text.gsub(%r(^.*/([^/#]*)#.*$), '\1'))
    else
      # Band name does not contain a link
      name = sanitize row.css("h3").text
    end

    r = row.css(".member_in_band_role")

    range = Parsers::Year.parse(r.xpath("text()").map(&:content).join.strip.gsub(/[\n\r\t]/, "").gsub(/.*\((.*)\)/, '\1'))
    role = sanitize r.css("strong")&.first&.content

    {
      id: id,
      name: name,
      years_active: range,
      role: role,
    }.compact
  end

  doc.css("#artist_tab_active .member_in_band").each do |row|
    props[:bands] << proc.call(row).merge(active: true)
  end

  doc.css("#artist_tab_past .member_in_band").each do |row|
    props[:bands] << proc.call(row).merge(active: false)
  end

  props
rescue StandardError => e
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
  raise Errors::ParserError, e
end

Parse links HTML page

Returns Hash

Raises
  • MetalArchives::Errors::ParserError when parsing failed. Please report this error.



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/metal_archives/parsers/artist.rb', line 150

def parse_links_html(response)
  links = []

  doc = Nokogiri::HTML response

  # Default to official links
  type = :official

  doc.css("#linksTablemain tr").each do |row|
    if /^header_/.match?(row["id"])
      type = row["id"].gsub(/^header_/, "").downcase.to_sym
    else
      a = row.css("td a").first

      # No links have been added yet
      next unless a

      links << {
        url: a["href"],
        type: type,
        title: a.content,
      }
    end
  end

  links
rescue StandardError => e
  e.backtrace.each { |b| MetalArchives.config.logger.error b }
  raise Errors::ParserError, e
end