4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
# File 'lib/manbook/parser.rb', line 4
def parse(html_file)
doc = Nokogiri::HTML(File.read(html_file))
title = doc.xpath("//b[text() = 'NAME']/../following-sibling::p[1]/descendant-or-self::text()").to_s
if title.blank?
title = doc.xpath("//h2[text() = 'NAME']/following-sibling::p[1]/descendant-or-self::text()").to_s
end
if title.blank?
title = doc.xpath("//html/head/title/text()").to_s
end
author = doc.xpath("//b[text() = 'AUTHORS']/../following-sibling::p[1]/descendant-or-self::text()").to_s
if author.empty?
author = doc.xpath("//h2[text() = 'AUTHORS']/following-sibling::p[1]/descendant-or-self::text()").to_s
end
Page.new.tap do |page|
page.file_name = File.basename(html_file)
page.title = title.split("\n").join(' ')
page.author = author.split("\n").join(' ')
end
end
|