Class: EPUB::Parser::ContentDocument

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/epub/parser/content_document.rb

Instance Method Summary collapse

Methods included from Utils

extract_attribute

Constructor Details

#initialize(item) ⇒ ContentDocument

Returns a new instance of ContentDocument.



12
13
14
# File 'lib/epub/parser/content_document.rb', line 12

def initialize(item)
  @item = item
end

Instance Method Details

#parseObject



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/epub/parser/content_document.rb', line 16

def parse
  content_document = case @item.media_type
                     when 'application/xhtml+xml'
                       if @item.nav?
                         EPUB::ContentDocument::Navigation.new
                       else
                         EPUB::ContentDocument::XHTML.new
                       end
                     when 'image/svg+xml'
                       EPUB::ContentDocument::SVG.new
                     else
                       nil
                     end
  return content_document if content_document.nil?
  content_document.item = @item
  document = Nokogiri.XML(@item.read)
  # parse_content_document(document)
  if @item.nav?
    content_document.navigations = parse_navigations(document)
  end
  content_document
end

#parse_navigation(element) ⇒ EPUB::ContentDocument::Navigation::Nav

Returns nav Nav object.

Parameters:

  • element (Nokogiri::XML::Element)

    nav element

Returns:

  • (EPUB::ContentDocument::Navigation::Nav)

    nav Nav object



47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/epub/parser/content_document.rb', line 47

def parse_navigation(element)
  nav = EPUB::ContentDocument::Navigation::Navigation.new
  nav.text = find_heading(element)
  hidden = extract_attribute(element, 'hidden')
  nav.hidden = hidden.nil? ? nil : true
  nav.type = extract_attribute(element, 'type', 'epub')
  element.xpath('./xhtml:ol/xhtml:li', EPUB::NAMESPACES).map do |elem|
    nav.items << parse_navigation_item(elem)
  end

  nav
end

#parse_navigation_item(element) ⇒ Object

Parameters:

  • element (Nokogiri::XML::Element)

    li element



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/epub/parser/content_document.rb', line 61

def parse_navigation_item(element)
  item = EPUB::ContentDocument::Navigation::Item.new
  a_or_span = element.xpath('./xhtml:a[1]|xhtml:span[1]', EPUB::NAMESPACES).first
  return a_or_span if a_or_span.nil?

  item.text = a_or_span.text
  if a_or_span.name == 'a'
    if item.text.empty?
      embedded_content = a_or_span.xpath('./xhtml:audio[1]|xhtml:canvas[1]|xhtml:embed[1]|xhtml:iframe[1]|xhtml:img[1]|xhtml:math[1]|xhtml:object[1]|xhtml:svg[1]|xhtml:video[1]', EPUB::NAMESPACES).first
      unless embedded_content.nil?
        case embedded_content.name
        when 'audio', 'canvas', 'embed', 'iframe'
          item.text = extract_attribute(embedded_content, 'name') || extract_attribute(embedded_content, 'srcdoc')
        when 'img'
          item.text = extract_attribute(embedded_content, 'alt')
        when 'math', 'object'
          item.text = extract_attribute(embedded_content, 'name')
        when 'svg', 'video'
        else
        end
      end
      item.text = extract_attribute(a_or_span, 'title').to_s if item.text.nil? || item.text.empty?
    end
    item.href = extract_attribute(a_or_span, 'href')
    item.item = @item.manifest.items.find {|it| it.href.request_uri == item.href.request_uri}
  end
  item.items = element.xpath('./xhtml:ol[1]/xhtml:li', EPUB::NAMESPACES).map {|li| parse_navigation_item(li)}

  item
end

#parse_navigations(document) ⇒ Array<EPUB::ContentDocument::Navigation::Nav>

Returns navs array of Nav object.

Parameters:

  • document (Nokogiri::HTML::Document)

    HTML document or element including nav

Returns:

  • (Array<EPUB::ContentDocument::Navigation::Nav>)

    navs array of Nav object



41
42
43
# File 'lib/epub/parser/content_document.rb', line 41

def parse_navigations(document)
  document.search('/xhtml:html/xhtml:body//xhtml:nav', EPUB::NAMESPACES).collect {|elem| parse_navigation elem}
end