Class: MusicStory::Repository::ArtistXMLFile

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/music_story/repository/artist_xml_file.rb

Overview

Parses an XML file of MusicStory artiste objects. The top-level structure should be <items>…<artistes><artist>…</artist>…<artist>…</artist></artistes></items>.

A formal XSD doesn’t appear to exist, so this is based entirely on data seen so far, together with some small pieces of info (such as the ARTIST_GENRE_RELATIONS and ASSOCIATION_TYPES) gleaned from a brief PDF doc in franglais (descriptionxml_en.pdf).

Some elements mentioned in the PDF (such as collaboration, album, evenement etc) haven’t been seen so far in artist XML files so aren’t handled.

Constant Summary collapse

ARTIST_GENRE_RELATIONS =

Codes used in their XML file format:

{
  1 => :main,
  2 => :secondary,
  3 => :influenced_by
}
ASSOCIATION_TYPES =
{
  'A' => :similar,
  'I' => :influenced_by,
  'S' => :successor
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io) ⇒ ArtistXMLFile

Returns a new instance of ArtistXMLFile.



13
14
15
# File 'lib/music_story/repository/artist_xml_file.rb', line 13

def initialize(io)
  @reader = Nokogiri::XML::Reader.from_io(io)
end

Class Method Details

.new_with_open_file(filename, &block) ⇒ Object



17
18
19
20
21
# File 'lib/music_story/repository/artist_xml_file.rb', line 17

def self.new_with_open_file(filename, &block)
  File.open(filename, 'r') do |file|
    yield new(file)
  end
end

Instance Method Details

#eachObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/music_story/repository/artist_xml_file.rb', line 39

def each
  @reader.each do |node|
    next unless node.name == 'artiste' && node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT
    doc = Nokogiri::XML(node.outer_xml)

    # extract genres
    genres = Hash.new {|h,k| h[k]=[]}
    genres_and_relation = doc.xpath('//artiste/genres/genre').map do |node|
      genre = Model::Genre.new(
        :id   => to_i_or_nil(node.attr('id')),
        :name => node.inner_text.strip
      )
      [genre, ARTIST_GENRE_RELATIONS[to_i_or_nil(node.attr('relation'))]]
    end

    genres_and_relation.uniq.each do |genre, relation|
      genres[relation] << genre
    end

    # extract associations
    associations = Hash.new {|h,k| h[k]=[]}
    associated_artists_and_type = doc.xpath('//artiste/associes/associe').map do |node|
      artist = Model::Artist.new({
        :id => to_i_or_nil(node.attr('id_associe')),
        :name => node.attr('nom_associe')
      })
      [artist, ASSOCIATION_TYPES[node.inner_text]]
    end

    associated_artists_and_type.uniq.each do |artist, type|
      # FIXME track non-failing errors, rather than keeping quiet about it
      associations[type] << artist unless invalid_artist?(artist)
    end

    yield Model::Artist.new({
      :id        => to_i_or_nil(doc.xpath('//artiste').attr('id').value),
      :name      => doc.xpath('//artiste/nom').inner_text,
      :forename  => unless_empty(doc.xpath('//artiste/prenom').inner_text),
      :real_name => unless_empty(doc.xpath('//artiste/nom_reel').inner_text),
      :role      => unless_empty(doc.xpath('//artiste/role').inner_text),
      :type      => unless_empty(doc.xpath('//artiste/type').inner_text),
      :country   => unless_empty(doc.xpath('//artiste/pays').inner_text),
      # not sure what the appropriate translation for resume vs texte_bio is here,
      # but in data seen so far they are both the same and both HTML not plain text:
      :summary_html          => unless_empty(doc.xpath('//artiste/resume').inner_text),
      :image_filename        => unless_empty(doc.xpath('//artiste/image').inner_text),

      :bio_html              => unless_empty(doc.xpath('//artiste/texte_bio').inner_text),
      :main_genres           => genres[:main],
      :secondary_genres      => genres[:secondary],
      :influenced_by_genres  => genres[:influenced_by],
      :similar_artists       => associations[:similar],
      :influenced_by_artists => associations[:influenced_by],
      :successor_artists     => associations[:successor]
    })
  end
end

#get_allObject



37
# File 'lib/music_story/repository/artist_xml_file.rb', line 37

def get_all; self; end