Class: Govspeak::StructuredHeaderExtractor
- Inherits:
-
Object
- Object
- Govspeak::StructuredHeaderExtractor
- Defined in:
- lib/govspeak/structured_header_extractor.rb
Instance Method Summary collapse
- #add_child(header) ⇒ Object
- #add_sibling(header) ⇒ Object
- #add_top_level(header) ⇒ Object
- #add_uncle_or_aunt(header) ⇒ Object
- #call ⇒ Object
- #header_at_higher_level_than_prev?(header) ⇒ Boolean
- #header_at_same_level_as_prev?(header) ⇒ Boolean
- #header_higher_than_top_level?(header) ⇒ Boolean
- #header_one_level_lower_than_prev?(header) ⇒ Boolean
- #headers_list ⇒ Object
-
#initialize(document) ⇒ StructuredHeaderExtractor
constructor
A new instance of StructuredHeaderExtractor.
- #pop_stack_to_level(header) ⇒ Object
- #reset_stack ⇒ Object
Constructor Details
#initialize(document) ⇒ StructuredHeaderExtractor
Returns a new instance of StructuredHeaderExtractor.
19 20 21 22 23 |
# File 'lib/govspeak/structured_header_extractor.rb', line 19 def initialize(document) @doc = document @structured_headers = [] reset_stack end |
Instance Method Details
#add_child(header) ⇒ Object
63 64 65 |
# File 'lib/govspeak/structured_header_extractor.rb', line 63 def add_child(header) stack.last.headers << header end |
#add_sibling(header) ⇒ Object
58 59 60 61 |
# File 'lib/govspeak/structured_header_extractor.rb', line 58 def add_sibling(header) stack.pop stack.last.headers << header end |
#add_top_level(header) ⇒ Object
53 54 55 56 |
# File 'lib/govspeak/structured_header_extractor.rb', line 53 def add_top_level(header) structured_headers.push(header) reset_stack end |
#add_uncle_or_aunt(header) ⇒ Object
67 68 69 70 |
# File 'lib/govspeak/structured_header_extractor.rb', line 67 def add_uncle_or_aunt(header) pop_stack_to_level(header) stack.last.headers << header end |
#call ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/govspeak/structured_header_extractor.rb', line 25 def call headers_list.each do |header| next if header_higher_than_top_level?(header) if header.top_level? add_top_level(header) elsif header_at_same_level_as_prev?(header) add_sibling(header) elsif header_one_level_lower_than_prev?(header) add_child(header) elsif header_at_higher_level_than_prev?(header) add_uncle_or_aunt(header) else next # ignore semantically invalid headers end stack.push(header) end structured_headers end |
#header_at_higher_level_than_prev?(header) ⇒ Boolean
85 86 87 88 |
# File 'lib/govspeak/structured_header_extractor.rb', line 85 def header_at_higher_level_than_prev?(header) # higher level means level integer is lower stack.last && (stack.last.level > header.level) end |
#header_at_same_level_as_prev?(header) ⇒ Boolean
76 77 78 |
# File 'lib/govspeak/structured_header_extractor.rb', line 76 def header_at_same_level_as_prev?(header) stack.last && stack.last.level == header.level end |
#header_higher_than_top_level?(header) ⇒ Boolean
72 73 74 |
# File 'lib/govspeak/structured_header_extractor.rb', line 72 def header_higher_than_top_level?(header) header.level < header.top_level end |
#header_one_level_lower_than_prev?(header) ⇒ Boolean
80 81 82 83 |
# File 'lib/govspeak/structured_header_extractor.rb', line 80 def header_one_level_lower_than_prev?(header) # lower level means level integer is higher stack.last && (stack.last.level - header.level == -1) end |
#headers_list ⇒ Object
47 48 49 50 51 |
# File 'lib/govspeak/structured_header_extractor.rb', line 47 def headers_list @headers_list ||= doc.headers.map do |h| StructuredHeader.new(h.text, h.level, h.id, []) end end |
#pop_stack_to_level(header) ⇒ Object
90 91 92 93 |
# File 'lib/govspeak/structured_header_extractor.rb', line 90 def pop_stack_to_level(header) times_to_pop = stack.last.level - header.level + 1 times_to_pop.times { stack.pop } end |
#reset_stack ⇒ Object
95 96 97 |
# File 'lib/govspeak/structured_header_extractor.rb', line 95 def reset_stack @stack = [] end |