Class: Arboretum::DocTree::Elements::Tree

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/arboretum/doctree.rb

Overview

Tree is a representation of a tree data structure consisting of elements A Tree holds only reference to the root Element of the tree A tree is useful for contextual operations on elements with the root as an ancestor

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(root = nil) ⇒ Tree

Returns a new instance of Tree.



75
76
77
78
79
80
81
82
# File 'lib/arboretum/doctree.rb', line 75

def initialize(root=nil)
  @root = root            # Element
  @listeners = Array.new  # Array of GroupListeners

  @id_cache = Hash.new    # Hash: String => Element

  root.update_tree_residence(self)
end

Instance Attribute Details

#id_cacheObject

Returns the value of attribute id_cache.



73
74
75
# File 'lib/arboretum/doctree.rb', line 73

def id_cache
  @id_cache
end

#rootObject

Returns the value of attribute root.



73
74
75
# File 'lib/arboretum/doctree.rb', line 73

def root
  @root
end

Instance Method Details

#apply_countersObject



152
153
154
155
156
157
# File 'lib/arboretum/doctree.rb', line 152

def apply_counters
  self.each do |element|
    element.resetters.each {|name, r| r.counter.reset}
    element.incrementers.each {|name, i| i.value = i.counter.current_value; i.counter.increment}
  end
end

#apply_listenersObject



137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/arboretum/doctree.rb', line 137

def apply_listeners
  self.each do |element|
    @listeners.each do |listener|
      if listener.rule.valid_on?(element)
        listener << element
        listener.exe_block.call(element) if !listener.exe_block.nil?
      end
    end
  end
  @listeners.each do |listener|
    puts "--Warning: Rule #{listener.rule} did not match any elements!--" if listener.empty?
  end
  @listeners = []
end

#dump_markup(style = :pretty, type = :xml) ⇒ Object



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
# File 'lib/arboretum/doctree.rb', line 168

def dump_markup(style=:pretty, type=:xml)
  # The string containing all the markup
  tree_string = ""
  # Array of possible whitespace chars
  whitespace_chars = [" ", "\t", "\n"]
  # Whether there has been whitespace since the last text node
  whitespace_trailing = true
  # Whether the whitespace since the last text node has been honored
  whitespace_honored = true
  # To track which elements must be closed explicitly
  open_elements = Array.new
  if style.eql? :pretty
    self.each_with_level do |element, level|
      # Close elements that should close before the current element
      until open_elements.empty? or level > open_elements.last[1]
        # The element to be closed and its respective indentation level and whether it was a text-only element
        closed_element, closed_level, text_only = open_elements.pop
        closed_indent = "  " * closed_level

        # Whether the tail text of an element begins with whitespace e.g. `<div></div> I am tail text for the div` => true
        tail_leading_space = (closed_element.sibling_next.is_a?(TextElement) and
                              whitespace_chars.include?(closed_element.sibling_next.text[0]))
        # Whether element can break before the closing tag (to preserve whitespace):
        #  Element can break and still maintain whitespace if there has been whitespace since the
        #  last text element and the tail text of the current element
        #  Additionally, element can break if its instance variable :break_within is true
        can_break_after = (whitespace_trailing or closed_element.break_within or tail_leading_space)

        # Add a newline if it preserve whitespace and is not redundant and the element was not fit into a single line
        tree_string << "\n" if can_break_after and !tree_string[-1].eql? "\n" and !text_only
        # Add the indentation for this level if a newline occurred
        tree_string << closed_indent if tree_string[-1].eql? "\n"
        # If we added whitespace, then we have honored any whitespace in the document
        whitespace_honored = true if whitespace_chars.include?(tree_string[-1])
        # If we added whitespace, then the next element should be safe to break as well
        whitespace_trailing = true if can_break_after
        # Dump the closing tag of the element
        tree_string << closed_element.dump_markup_close
      end
      # Determine the indentation level for the current element
      indent = "  " * level

      # Handle element depending on its type
      if element.is_a? TaggedElement
        # Whether the element has any non-text children (text-only elements will be fit into a single line)
        text_only = element.children.select{|c| !c.is_a?(TextElement)}.length == 0
        # Whether element can break before the opening tag (to preserve whitespace):
        #   Element can break and still maintain whitespace if there has been whitespace since the
        #   last text element and the body text of the current element
        #   Additionally, element can break if its instance variable :break_within is true
        can_break_before = (whitespace_trailing or element.break_within)

        # Lookahead to determine if whitespace is in between element and next TextElement
        # e.g. `<div> I am body text for the div <a>I am not</a></div>` => true
        # Only takes place if the less expensive options didn't pan out
        unless can_break_before
          current = element
          until current.nil? or current.is_a?(TextElement) or can_break_before
            current = current.children.first
            can_break_before = true if current.is_a?(TaggedElement) and current.break_within
          end
          can_break_before = true if current.is_a?(TextElement) and whitespace_chars.include?(current.text[0])
        end

        # Add a newline if it preserves whitespace and is not redundant
        tree_string << "\n" if can_break_before and !tree_string[-1].eql?("\n")
        # Add the indentation for this level if a newline occurs before the opening tag
        tree_string << indent if tree_string[-1].eql? "\n"
        # If we added whitespace, then we have honored any whitespace in the document
        whitespace_honored = true if whitespace_chars.include?(tree_string[-1])
        # If we added whitespace, then the next element should be safe to break as well
        whitespace_trailing = true if can_break_before
        # Dump the opening tag of the element
        tree_string << element.dump_markup(type)
        # Add another newline if it preserves whitespace and this elements has a non-text element
        tree_string << "\n" if can_break_before and !text_only
        # Mark the element for closing if it is paired
        open_elements << [element, level, text_only] if element.paired?
      elsif element.is_a? TextElement
        # Whether this element has any non-text siblings (and will not be fit into a single line)
        non_text_siblings = (element.preceding_siblings + element.following_siblings).select{|s| !s.is_a?(TextElement)}.length > 0
        text_prev = element.sibling_prev.is_a? TextElement
        text_next = element.sibling_next.is_a? TextElement

        # The text of the element, to be modified before adding to the markup string
        element_text = element.dump_markup(type)

        element_trailing_space = whitespace_chars.include?(element_text[-1])
        element_preceding_space = whitespace_chars.include?(element_text[0])
        # Determine if the preceding space in the element is redundant or not needed
        can_remove_preceding = (element_preceding_space and !text_prev and whitespace_trailing)

        tree_string << "\n" if can_remove_preceding and !tree_string[-1].eql?("\n") and non_text_siblings
        tree_string << indent if tree_string[-1].eql?("\n") and !element_text.strip.empty?
        # If we added whitespace, then we have honored any whitespace in the document
        whitespace_honored = true if whitespace_chars.include?(tree_string[-1])

        # Tack on some whitespace or mark leading whitespace as not redundant if whitespace hasn't been honored yet
        if whitespace_trailing and !whitespace_honored
          if can_remove_preceding
            can_remove_preceding = false
          else
            element_text = " " << element_text
          end
        end

        # Strip redundant or unwanted whitespace
        element_text[0] = "" if (can_remove_preceding and whitespace_chars.include?(tree_string[-1])) or
                                (can_remove_preceding and !non_text_siblings)
        element_text[-1] = non_text_siblings ? "\n" : "" if element_trailing_space and !text_next and !element_text.strip.empty?

        # Determine whether whitespace is trailing and if that trailing whitespace is honored by the end of the text node
        whitespace_trailing = element_trailing_space
        whitespace_honored = !(element_trailing_space and !whitespace_chars.include?(element_text[-1]))

        tree_string << element_text unless tree_string[-1].eql?("\n") and element_text.strip.empty?
      elsif element.is_a? DocRootElement
        # Do nothing
      else
        # Just treat most elements like TaggedElements except for dumping a closing tag
        #####
        # Whether the element has any non-text children (text-only elements will be fit into a single line)
        text_only = element.children.select{|c| !c.is_a?(TextElement)}.length == 0
        # Whether element can break before the opening tag (to preserve whitespace):
        #   Element can break and still maintain whitespace if there has been whitespace since the
        #   last text element and the body text of the current element
        #   Additionally, element can break if its instance variable :break_within is true
        can_break_before = (whitespace_trailing or element.break_within)

        # Lookahead to determine if whitespace is in between element and next TextElement
        # e.g. `<div> I am body text for the div <a>I am not</a></div>` => true
        # Only takes place if the less expensive options didn't pan out
        unless can_break_before
          current = element
          until current.nil? or current.is_a?(TextElement) or can_break_before
            current = current.children.first
            can_break_before = true if current.is_a?(TaggedElement) and current.break_within
          end
          can_break_before = true if current.is_a?(TextElement) and whitespace_chars.include?(current.text[0])
        end

        # Add a newline if it preserves whitespace and is not redundant
        tree_string << "\n" if can_break_before and !tree_string[-1].eql?("\n")
        # Add the indentation for this level if a newline occurs before the opening tag
        tree_string << indent if tree_string[-1].eql? "\n"
        # If we added whitespace, then we have honored any whitespace in the document
        whitespace_honored = true if whitespace_chars.include?(tree_string[-1])
        # If we added whitespace, then the next element should be safe to break as well
        whitespace_trailing = true if can_break_before
        # Dump the opening tag of the element
        tree_string << element.dump_markup(type)
        # Add another newline if it preserves whitespace and this elements has a non-text element
        tree_string << "\n" if can_break_before and !text_only
      end
    end
    # Close remaining
    until open_elements.empty?
      # The element to be closed and its respective indentation level and whether it was a text-only element
      closed_element, closed_level, text_only = open_elements.pop
      closed_indent = "  " * closed_level

      # Whether the tail text of an element begins with whitespace e.g. `<div></div> I am tail text for the div` => true
      tail_leading_space = (closed_element.sibling_next.is_a?(TextElement) and
                            whitespace_chars.include?(closed_element.sibling_next.text[0]))
      # Whether element can break before the closing tag (to preserve whitespace):
      #  Element can break and still maintain whitespace if there has been whitespace since the
      #  last text element and the tail text of the current element
      #  Additionally, element can break if its instance variable :break_within is true
      can_break_after = (whitespace_trailing or closed_element.break_within or tail_leading_space)

      # Add a newline if it preserve whitespace and is not redundant and the element was not fit into a single line
      tree_string << "\n" if can_break_after and !tree_string[-1].eql? "\n" and !text_only
      # Add the indentation for this level if a newline occurred
      tree_string << closed_indent if tree_string[-1].eql? "\n"
      # If we added whitespace, then we have honored any whitespace in the document
      whitespace_honored = true if whitespace_chars.include?(tree_string[-1])
      # If we added whitespace, then the next element should be safe to break as well
      whitespace_trailing = true if can_break_after
      # Dump the closing tag of the element
      tree_string << closed_element.dump_markup_close
    end
  elsif style.eql? :compact
    self.each_with_level do |element, level|
      until open_elements.empty? or level > open_elements.last[1]
        closed_element, closed_level = open_elements.pop
        tree_string << " " if closed_element.break_within
        whitespace_trailing = true if whitespace_chars.include?(tree_string[-1])
        tree_string << closed_element.dump_markup_close
      end
      if element.is_a? TaggedElement
        tree_string << element.dump_markup(type)
        tree_string << " " if element.break_within
        whitespace_trailing = true if whitespace_chars.include?(tree_string[-1])
        open_elements << [element, level] if element.paired?
      elsif element.is_a? TextElement
        tree_string << element.dump_markup(type) unless whitespace_chars.include?(tree_string[-1]) and element.text.strip.empty?
        whitespace_trailing = whitespace_chars.include?(tree_string[-1])
      elsif element.is_a? DocRootElement
        # Do nothing
      else
        tree_string << element.dump_markup(type)
        tree_string << " " if element.break_within
        whitespace_trailing = true if whitespace_chars.include?(tree_string[-1])
      end
    end
  # Unknown print style
  else
    puts "Warning: Unknown print style. Using `:pretty`..."
    tree_string = dump_markup(:pretty, type)
  end
  (whitespace_chars.include?(tree_string[0])) ? tree_string[1..-1] : tree_string
end

#each(element = self.root) {|element| ... } ⇒ Object

Redefine the ‘each` method to iterate through all elements in the tree in depth-first order

Yields:

  • (element)


85
86
87
88
# File 'lib/arboretum/doctree.rb', line 85

def each(element=self.root)
  yield element
  element.children.each {|child| self.each(child) {|c| yield c}} unless element.nil?
end

#each_with_level(element = self.root, level = 0) {|[element, level]| ... } ⇒ Object

Yields:

  • ([element, level])


90
91
92
93
94
# File 'lib/arboretum/doctree.rb', line 90

def each_with_level(element=self.root, level=0)
  yield [element, level]
  level += 1 unless element.is_a?(DocRootElement)
  element.children.each {|child| self.each_with_level(child, level) {|c,l| yield [c, l]}} unless element.nil?
end

#element_from_id_hash(hash_id) ⇒ Object

Raises:

  • (IndexError)


114
115
116
117
# File 'lib/arboretum/doctree.rb', line 114

def element_from_id_hash(hash_id)
  raise IndexError.new("Tried to get element from hash id with no hash: #{hash_id}") if !hash_id[0].eql?("#")
  self.id_cache_get(hash_id[1..-1]).tap {|element| yield element if block_given?}
end

#get_DF_elementsObject

Returns an array with all elements in the subtree of the root in depth-first order



97
98
99
# File 'lib/arboretum/doctree.rb', line 97

def get_DF_elements
  Array.new.tap {|list| self.each {|element| list << element}}
end

#id_cache_add(id, element) ⇒ Object



101
102
103
# File 'lib/arboretum/doctree.rb', line 101

def id_cache_add(id, element)
  @id_cache[id] = element
end

#id_cache_get(id) ⇒ Object Also known as: element_from_id



109
110
111
# File 'lib/arboretum/doctree.rb', line 109

def id_cache_get(id)
  @id_cache[id].tap {|element| yield element if block_given?}
end

#id_cache_remove(id) ⇒ Object



105
106
107
# File 'lib/arboretum/doctree.rb', line 105

def id_cache_remove(id)
  @id_cache.delete(id)
end

#listen(rule_string, &block) ⇒ Object



131
132
133
134
135
# File 'lib/arboretum/doctree.rb', line 131

def listen(rule_string, &block)
  listener = GroupListener.new(rule_string, block)
  @listeners << listener
  listener
end

#scan(rule_string) ⇒ Object

Find any and all elements in the tree that match a given ScandentRule string



120
121
122
123
124
125
126
127
128
129
# File 'lib/arboretum/doctree.rb', line 120

def scan(rule_string)
  selected = []
  rule = Arboretum::Scandent::Parser.parse_rule_string(rule_string, :PATH_LISTENER)
  self.each do |element|
    selected << element if rule.valid_on?(element)
    yield element if rule.valid_on?(element) and block_given?
  end
  puts "--Warning: Rule #{rule_string} did not match any elements!--" if selected.empty?
  ElementGroup.new(selected)
end

#to_s(root = true, pretty = true) ⇒ Object



159
160
161
162
163
164
165
166
# File 'lib/arboretum/doctree.rb', line 159

def to_s(root=true, pretty=true)
  tree_string = (root ? "<<Tree: \n" : "")
  if not self.root.nil?
    tree_string << (root ? self.root.to_s + (pretty ? "\n" : "") : "")
    self.root.children.each {|child| tree_string << self.r_to_s(child, 1, pretty)}
  end
  tree_string << (root ? ">>" : "")
end