Class: EPUB::Parser::Publication

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/epub/parser/publication.rb

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utils

extract_attribute

Constructor Details

#initialize(opf, rootfile) ⇒ Publication

Returns a new instance of Publication.


21
22
23
24
25
# File 'lib/epub/parser/publication.rb', line 21

def initialize(opf, rootfile)
  @package = EPUB::Publication::Package.new
  @rootfile = Addressable::URI.parse(rootfile)
  @doc = Nokogiri.XML(opf)
end

Class Method Details

.parse(container, file) ⇒ Object


14
15
16
17
18
# File 'lib/epub/parser/publication.rb', line 14

def parse(container, file)
  opf = container.read(Addressable::URI.unencode(file))

  new(opf, file).parse
end

Instance Method Details

#extract_model(elem, id_map, xpath, klass = :DCMES, attributes = %w[id lang dir])) ⇒ Object


167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/epub/parser/publication.rb', line 167

def extract_model(elem, id_map, xpath, klass=:DCMES, attributes=%w[id lang dir])
  models = elem.xpath(xpath, EPUB::NAMESPACES).collect do |e|
    model = EPUB::Publication::Package::Metadata.const_get(klass).new
    attributes.each do |attr|
      model.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
    end
    model.content = e.content unless klass == :Link

    yield model, e if block_given?

    model
  end

  models.each do |model|
    id_map[model.id] = {metadata: model} if model.respond_to?(:id) && model.id
  end

  models
end

#extract_refinee(elem, id_map, xpath, klass, attributes) ⇒ Object


187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/epub/parser/publication.rb', line 187

def extract_refinee(elem, id_map, xpath, klass, attributes)
  extract_model(elem, id_map, xpath, klass, attributes) {|model, e|
    yield model, e if block_given?
    refines = extract_attribute(e, 'refines')
    if refines && refines[0] == '#'
      id = refines[1..-1]
      id_map[id] ||= {}
      id_map[id][:refiners] ||= []
      id_map[id][:refiners] << model
    end
  }
end

#parseObject


27
28
29
30
31
32
33
# File 'lib/epub/parser/publication.rb', line 27

def parse
  ([:package] + EPUB::Publication::Package::CONTENT_MODELS).each do |model|
    __send__ "parse_#{model}"
  end

  @package
end

#parse_bindingsObject


137
138
139
140
141
142
143
144
145
146
147
# File 'lib/epub/parser/publication.rb', line 137

def parse_bindings
  bindings = @package.bindings = EPUB::Publication::Package::Bindings.new
  @doc.xpath('/opf:package/opf:bindings/opf:mediaType', EPUB::NAMESPACES).each do |elem|
    media_type = EPUB::Publication::Package::Bindings::MediaType.new
    media_type.media_type = extract_attribute(elem, 'media-type')
    media_type.handler = @package.manifest[extract_attribute(elem, 'handler')]
    bindings << media_type
  end

  bindings
end

#parse_guideObject


123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/epub/parser/publication.rb', line 123

def parse_guide
  guide = @package.guide = EPUB::Publication::Package::Guide.new
  @doc.xpath('/opf:package/opf:guide/opf:reference', EPUB::NAMESPACES).each do |ref|
    reference = EPUB::Publication::Package::Guide::Reference.new
    %w[type title].each do |attr|
      reference.__send__ "#{attr}=", extract_attribute(ref, attr)
    end
    reference.href = Addressable::URI.parse(extract_attribute(ref, 'href'))
    guide << reference
  end

  guide
end

#parse_manifestObject


77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/epub/parser/publication.rb', line 77

def parse_manifest
  manifest = @package.manifest = EPUB::Publication::Package::Manifest.new
  elem = @doc.xpath('/opf:package/opf:manifest', EPUB::NAMESPACES).first
  manifest.id = extract_attribute(elem, 'id')

  fallback_map = {}
  elem.xpath('./opf:item', EPUB::NAMESPACES).each do |e|
    item = EPUB::Publication::Package::Manifest::Item.new
    %w[id media-type media-overlay].each do |attr|
      item.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(e, attr)
    end
    item.href = Addressable::URI.parse(extract_attribute(e, 'href'))
    fallback = extract_attribute(e, 'fallback')
    fallback_map[fallback] = item if fallback
    properties = extract_attribute(e, 'properties')
    item.properties = properties.split(' ') if properties
    manifest << item
  end
  fallback_map.each_pair do |id, from|
    from.fallback = manifest[id]
  end

  manifest
end

#parse_metadataObject


47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/epub/parser/publication.rb', line 47

def 
   = @package. = EPUB::Publication::Package::Metadata.new
  elem = @doc.xpath('/opf:package/opf:metadata', EPUB::NAMESPACES).first
  id_map = {}

  .identifiers = extract_model(elem, id_map, './dc:identifier', :Identifier, ['id']) {|identifier, e|
    identifier.scheme = extract_attribute(e, 'scheme', 'opf')
    .unique_identifier = identifier if identifier.id == @unique_identifier_id
  }
  .titles = extract_model(elem, id_map, './dc:title', :Title)
  .languages = extract_model(elem, id_map, './dc:language', :DCMES, %w[id])
  %w[contributor coverage creator date description format publisher relation source subject type].each do |dcmes|
    .__send__ "#{dcmes}s=", extract_model(elem, id_map, "./dc:#{dcmes}")
  end
  .rights = extract_model(elem, id_map, './dc:rights')
  .metas = extract_refinee(elem, id_map, './opf:meta', :Meta, %w[property id scheme])
  .links = extract_refinee(elem, id_map, './opf:link', :Link, %w[id media-type]) {|link, e|
    link.href = Addressable::URI.parse(extract_attribute(e, 'href'))
    link.rel = Set.new(extract_attribute(e, 'rel').split(nil))
  }

  id_map.values.each do |hsh|
    next unless hsh[:refiners]
    next unless hsh[:metadata]
    hsh[:refiners].each {|meta| meta.refines = hsh[:metadata]}
  end

  
end

#parse_packageObject


35
36
37
38
39
40
41
42
43
44
45
# File 'lib/epub/parser/publication.rb', line 35

def parse_package
  elem = @doc.root
  %w[version xml:lang dir id].each do |attr|
    @package.__send__ "#{attr.gsub(/\:/, '_')}=", extract_attribute(elem, attr)
  end
  @unique_identifier_id = elem['unique-identifier']
  @package.prefix = parse_prefix(extract_attribute(elem, 'prefix'))
  EPUB::Publication.__send__ :include, EPUB::Publication::FixedLayout if @package.prefix.key? EPUB::Publication::FixedLayout::PREFIX_KEY

  @package
end

#parse_prefix(str) ⇒ Object


149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/epub/parser/publication.rb', line 149

def parse_prefix(str)
  prefixes = {}
  return prefixes if str.nil? or str.empty?
  scanner = StringScanner.new(str)
  scanner.scan /\s*/
  while prefix = scanner.scan(/[^\:\s]+/)
    scanner.scan /[\:\s]+/
    iri = scanner.scan(/[^\s]+/)
    if iri.nil? or iri.empty?
      warn "no IRI detected for prefix `#{prefix}`"
    else
      prefixes[prefix] = iri
    end
    scanner.scan /\s*/
  end
  prefixes
end

#parse_spineObject


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/epub/parser/publication.rb', line 102

def parse_spine
  spine = @package.spine = EPUB::Publication::Package::Spine.new
  elem = @doc.xpath('/opf:package/opf:spine', EPUB::NAMESPACES).first
  %w[id toc page-progression-direction].each do |attr|
    spine.__send__ "#{attr.gsub(/-/, '_')}=", extract_attribute(elem, attr)
  end

  elem.xpath('./opf:itemref', EPUB::NAMESPACES).each do |e|
    itemref = EPUB::Publication::Package::Spine::Itemref.new
    %w[idref id].each do |attr|
      itemref.__send__ "#{attr}=", extract_attribute(e, attr)
    end
    itemref.linear = (extract_attribute(e, 'linear') != 'no')
    properties = extract_attribute(e, 'properties')
    itemref.properties = properties.split(' ') if properties
    spine << itemref
  end

  spine
end