Class: Govspeak::StructuredHeaderExtractor
- Inherits:
-
Object
- Object
- Govspeak::StructuredHeaderExtractor
- Defined in:
- lib/govspeak/structured_header_extractor.rb
Instance Method Summary collapse
- #add_child(header) ⇒ Object
- #add_sibling(header) ⇒ Object
- #add_top_level(header) ⇒ Object
- #add_uncle_or_aunt(header) ⇒ Object
- #call ⇒ Object
- #header_at_higher_level_than_prev?(header) ⇒ Boolean
- #header_at_same_level_as_prev?(header) ⇒ Boolean
- #header_higher_than_top_level?(header) ⇒ Boolean
- #header_one_level_lower_than_prev?(header) ⇒ Boolean
- #headers_list ⇒ Object
-
#initialize(document) ⇒ StructuredHeaderExtractor
constructor
A new instance of StructuredHeaderExtractor.
- #pop_stack_to_level(header) ⇒ Object
- #reset_stack ⇒ Object
Constructor Details
#initialize(document) ⇒ StructuredHeaderExtractor
Returns a new instance of StructuredHeaderExtractor.
19 20 21 22 23 |
# File 'lib/govspeak/structured_header_extractor.rb', line 19 def initialize(document) @doc = document @structured_headers = [] reset_stack end |
Instance Method Details
#add_child(header) ⇒ Object
66 67 68 |
# File 'lib/govspeak/structured_header_extractor.rb', line 66 def add_child(header) stack.last.headers << header end |
#add_sibling(header) ⇒ Object
61 62 63 64 |
# File 'lib/govspeak/structured_header_extractor.rb', line 61 def add_sibling(header) stack.pop stack.last.headers << header end |
#add_top_level(header) ⇒ Object
56 57 58 59 |
# File 'lib/govspeak/structured_header_extractor.rb', line 56 def add_top_level(header) structured_headers.push(header) reset_stack end |
#add_uncle_or_aunt(header) ⇒ Object
70 71 72 73 |
# File 'lib/govspeak/structured_header_extractor.rb', line 70 def add_uncle_or_aunt(header) pop_stack_to_level(header) stack.last.headers << header end |
#call ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/govspeak/structured_header_extractor.rb', line 25 def call headers_list.each do |header| next if header_higher_than_top_level?(header) if header.top_level? add_top_level(header) elsif header_at_same_level_as_prev?(header) add_sibling(header) elsif header_one_level_lower_than_prev?(header) add_child(header) elsif header_at_higher_level_than_prev?(header) add_uncle_or_aunt(header) else next # ignore semantically invalid headers end stack.push(header) end structured_headers end |
#header_at_higher_level_than_prev?(header) ⇒ Boolean
88 89 90 91 |
# File 'lib/govspeak/structured_header_extractor.rb', line 88 def header_at_higher_level_than_prev?(header) # higher level means level integer is lower stack.last && (stack.last.level > header.level) end |
#header_at_same_level_as_prev?(header) ⇒ Boolean
79 80 81 |
# File 'lib/govspeak/structured_header_extractor.rb', line 79 def header_at_same_level_as_prev?(header) stack.last && stack.last.level == header.level end |
#header_higher_than_top_level?(header) ⇒ Boolean
75 76 77 |
# File 'lib/govspeak/structured_header_extractor.rb', line 75 def header_higher_than_top_level?(header) header.level < header.top_level end |
#header_one_level_lower_than_prev?(header) ⇒ Boolean
83 84 85 86 |
# File 'lib/govspeak/structured_header_extractor.rb', line 83 def header_one_level_lower_than_prev?(header) # lower level means level integer is higher stack.last && (stack.last.level - header.level == -1) end |
#headers_list ⇒ Object
50 51 52 53 54 |
# File 'lib/govspeak/structured_header_extractor.rb', line 50 def headers_list @headers_list ||= doc.headers.map do |h| StructuredHeader.new(h.text, h.level, h.id, []) end end |
#pop_stack_to_level(header) ⇒ Object
93 94 95 96 |
# File 'lib/govspeak/structured_header_extractor.rb', line 93 def pop_stack_to_level(header) times_to_pop = stack.last.level - header.level + 1 times_to_pop.times { stack.pop } end |
#reset_stack ⇒ Object
98 99 100 |
# File 'lib/govspeak/structured_header_extractor.rb', line 98 def reset_stack @stack = [] end |