Class: Bookit::Parser::Html

Inherits:
Object
  • Object
show all
Defined in:
lib/bookit/parser/html.rb

Instance Method Summary collapse

Instance Method Details

#parse(content) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/bookit/parser/html.rb', line 6

def parse(content)
  elements = []

  doc = Nokogiri::HTML(content)
  elements = walk(doc.root, [])
  
  elements.compact
end

#walk(element, tree) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/bookit/parser/html.rb', line 15

def walk(element, tree)
  return tree if element.nil? || (element.content.strip.empty? if element.name != "img")

  tree << case element.name
  when "p"
    Bookit::Content::Paragraph.new(walk_children(element, []))
  when "text"
    Bookit::Content::Text.new(element.content.strip)
  when "h1", "h2", "h3", "h4"
    Bookit::Content::Header.new(element.content.strip)
  when "a"
    Bookit::Content::Link.new(element.attributes["href"].value, walk_children(element, []))
  when "img"
    attrs = {}
    ['width', 'height'].each {|a| attrs[a.to_sym] = element.attributes[a] ? element.attributes[a].value.to_i : nil}

    Bookit::Content::Image.new(element.attributes["src"].value, attrs)
  when "ul", "ol"
    Bookit::Content::List.new(walk_children(element, []))
  else
    walk_children(element, tree)
    nil
  end

  return tree
end

#walk_children(element, tree) ⇒ Object



42
43
44
# File 'lib/bookit/parser/html.rb', line 42

def walk_children(element, tree)
  element.children.inject(tree) {|past, child| walk(child, past)}
end