12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# File 'lib/analysis/titles.rb', line 12
def self.include_in pdf
pdf.spatials :titles, :depends_on => [:regions] do |parser|
titles = []
parser.objects :regions do |region|
titles << region
end
parser.after do
titles.reject! { |r| Spatial.get_text_content(r).strip.length < 2}
titles.reject! { |r| r[:y] < (r[:page_height] / 2.0) }
titles.sort_by! { |r| -r[:line_height] }
tallest_line = titles.first[:line_height]
title_slop = tallest_line - (tallest_line * pdf.settings[:title_slop])
titles.reject! { |r| r[:line_height] < title_slop }
titles.sort_by! { |r| r[:page] }
first_page = titles.first[:page]
titles.reject! { |r| r[:page] != first_page }
titles.sort_by! { |r| -r[:y] }
if titles.count.zero?
[]
else
{
:content => Spatial.get_text_content(titles.first),
:line_height => titles.first[:line_height],
:font => titles.first[:font]
}
end
end
end
end
|