Class: Jekyll::Tree

Inherits:
Object
  • Object
show all
Defined in:
lib/jekyll-semtree/tree.rb

Constant Summary collapse

OPEN_BRACKETS =
'[['
CLOSE_BRACKETS =
']]'
MARKDOWN_BULLET_ASTERISK =
'* '
MARKDOWN_BULLET_DASH =
'- '
MARKDOWN_BULLET_PLUS =
'+ '
REGEX =
{
  LEVEL: /^[ \t]*/,  # TODO: link
  TEXT_WITH_LOC: /([^\\:^|\[\]]+)-(\d+)-(\d+)/i,
  TEXT_WITH_ID: /([^\\:^|\[\]]+)-\(([A-Za-z0-9]{5})\)/i,
  WIKITEXT_WITH_ID: /([+*-]) \[\[([^\\:\\^|\[\]]+)-\(([A-Za-z0-9]{5})\)\]\]/i,
  WHITESPACE: /^\s*$/,
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(content, root_doc, virtual_trunk = false) ⇒ Tree

Returns a new instance of Tree.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/jekyll-semtree/tree.rb', line 40

def initialize(content, root_doc, virtual_trunk = false)
  # init
  # tree properties
  @chunk_size    = -1
  @level_max     = -1
  @duplicates    = []
  @mkdn_list     = true
  @virtual_trunk = virtual_trunk
  # tree nodes
  @nodes          = []
  @petiole_map   = {}
  @root          = ''
  @trunk         = []

  # go
  root_fname = File.basename(root_doc.basename, File.extname(root_doc.basename))
  # tree_data.each do |data|
  #   if doc != root_doc
  #     # jekyll pages don't have the slug attribute: https://github.com/jekyll/jekyll/blob/master/lib/jekyll/page.rb#L8
  #     if doc.type == :pages
  #       page_basename = File.basename(doc.name, File.extname(doc.name))
  #       doc.data['slug'] = Jekyll::Utils.slugify(page_basename)
  #     end
  #   end
  # end

  # prep
  lines = []
  # single file
  if content.is_a?(String)
    lines = content.split("\n")
    set_units(lines)
    return build_tree('root', { 'root' => lines })
  # multiple files
  elsif content.is_a?(Hash)
    unless root_fname
      puts 'Cannot parse multiple files without a "root" defined'
      return
    end
    unless content.keys.include?(root_fname)
      raise "content hash does not contain: '#{root_fname}'; keys are: #{content.keys.join(', ')}"
    end
    lines = content[root_fname].split("\n")
    set_units(lines)
    content_hash = {}
    content.each do |filename, file_content|
      content_hash[filename] = file_content.split("\n")
    end
    self.clear
    return build_tree(root_fname, deepcopy(content_hash))
  else
    raise "content is not a string or hash: #{content}"
  end
  # print_tree(root)
end

Instance Attribute Details

#chunk_sizeObject

size of indentation for each tree level (set by the first indentation found)



31
32
33
# File 'lib/jekyll-semtree/tree.rb', line 31

def chunk_size
  @chunk_size
end

#duplicatesObject

duplicate node names in the tree



32
33
34
# File 'lib/jekyll-semtree/tree.rb', line 32

def duplicates
  @duplicates
end

#level_maxObject

Returns the value of attribute level_max.



33
34
35
# File 'lib/jekyll-semtree/tree.rb', line 33

def level_max
  @level_max
end

#nodesObject

the tree nodes



34
35
36
# File 'lib/jekyll-semtree/tree.rb', line 34

def nodes
  @nodes
end

#petiole_mapObject

a hash where each key is each node in the tree and the value is the index file that contains that node/doc



35
36
37
# File 'lib/jekyll-semtree/tree.rb', line 35

def petiole_map
  @petiole_map
end

#rootObject

name of the root node/document



36
37
38
# File 'lib/jekyll-semtree/tree.rb', line 36

def root
  @root
end

#trunkObject

list of index doc fnames



37
38
39
# File 'lib/jekyll-semtree/tree.rb', line 37

def trunk
  @trunk
end

#virtual_trunkObject

whether or not the trunk/index documents should be included in the tree data



38
39
40
# File 'lib/jekyll-semtree/tree.rb', line 38

def virtual_trunk
  @virtual_trunk
end

Instance Method Details

#add_branch(text, ancestry_titles, trnk_fname = nil) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/jekyll-semtree/tree.rb', line 187

def add_branch(text, ancestry_titles, trnk_fname = nil)
  trnk_fname ||= text
  ancestry_titles.each_with_index do |ancestry_title, i|
    if i < (ancestry_titles.length - 1)
      node = @nodes.find { |n| n.text == ancestry_title }
      if node && !node.children.include?(ancestry_titles[i + 1])
        node.children << ancestry_titles[i + 1]
      end
    else
      node = @nodes.find { |n| n.text == ancestry_title }
      if node && !node.children.include?(text)
        node.children << text
      end
    end
  end
  @nodes << TreeNode.new(text, ancestry_titles)
  @petiole_map[text] = trnk_fname
end

#add_root(text) ⇒ Object

helper methods



181
182
183
184
185
# File 'lib/jekyll-semtree/tree.rb', line 181

def add_root(text)
  @root = text
  @nodes << TreeNode.new(text)
  @petiole_map[text] = text
end

#build_tree(cur_key, content, ancestors = [], total_level = 0) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/jekyll-semtree/tree.rb', line 96

def build_tree(cur_key, content, ancestors = [], total_level = 0)
  @trunk = content.keys
  # if the trunk isn't virtual, handle index/trunk file
  unless @virtual_trunk
    node = TreeNode.new(
      cur_key,
      ancestors.map { |n| raw_text(n.text) },
      total_level,
    )
    if total_level == 0
      add_root(cur_key)
    else
      add_branch(cur_key, node.ancestors)
    end
    ancestors << node
    total_level += 1
  end
  # handle file...
  lines = content[cur_key]
  lines.each_with_index do |line, i|
    text = line.gsub(REGEX[:LEVEL], '')
    next if text.nil? || text.empty?
    if @nodes.map(&:text).include?(raw_text(text))
      @duplicates << raw_text(text)
      next
    end
    # calculate numbers
    line_num = i + 1
    level_match = line.match(REGEX[:LEVEL])
    # number of spaces
    next if level_match.nil?
    size = get_whitespace_size(level_match[0])
    level = get_level(size) + total_level
    @chunk_size = 2 if @chunk_size < 0
    # root
    if total_level == 0 && i == 0
      node = TreeNode.new(
        text,
        [],
        level,
        line_num,
      )
      add_root(raw_text(node.text))
      ancestors << node
    # node
    else
      # connect subtree via 'virtual' semantic-tree node
      # TODO: if cur_key == raw_text(text), print a warning: don't do that.
      if cur_key != raw_text(text) && content.keys.include?(raw_text(text))
        # virtual_levels += @chunk_size  # This line is commented out as in the original TypeScript
        ancestors = calc_ancestry(level, ancestors)
        build_tree(raw_text(text), content, deepcopy(ancestors), get_level(size))
        next
      end
      node = TreeNode.new(
        text,
        [],
        level,
        line_num,
      )
      node.text = raw_text(node.text)
      ancestors = calc_ancestry(level, ancestors)
      node.ancestors = ancestors.map { |p| raw_text(p.text) }
      ancestors << node
      add_branch(node.text, node.ancestors, cur_key)
    end
  end
  content.delete(cur_key)
  if content.any? && total_level == 0
    return "Some files were not processed: #{content.keys.join(', ')}"
  end
  if content.empty?
    if @duplicates.any?
      duplicates = @duplicates.uniq
      error_msg = "Tree did not build, duplicate nodes found:\n\n"
      error_msg += duplicates.join(', ') + "\n\n"
      clear
      return error_msg
    end
    return @nodes.dup
  end
end

#build_tree_str(cur_node_name = @root, prefix = '') ⇒ Object



376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
# File 'lib/jekyll-semtree/tree.rb', line 376

def build_tree_str(cur_node_name = @root, prefix = '')
  output = "#{cur_node_name}\n"
  node = @nodes.find { |n| n.text == cur_node_name }
  if node.nil?
    puts `SemTree.build_tree_str: error: nil node for name '#{cur_node_name}'`
    return output
  end
  node.children.each_with_index do |child, index|
    is_last_child = index == node.children.length - 1
    child_prefix = prefix + (is_last_child ? '└── ' : '├── ')
    grandchild_prefix = prefix + (is_last_child ? '    ' : '|   ')
    subtree = build_tree_str(child, grandchild_prefix)
    output += "#{child_prefix}#{subtree}"
  end
  return output
end

#calc_ancestry(level, ancestors) ⇒ Object



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/jekyll-semtree/tree.rb', line 206

def calc_ancestry(level, ancestors)
  parent = ancestors.last
  is_child = (parent.level == (level - 1))
  is_sibling = (parent.level == level)
  # child:
  # - [[parent]]
  #   - [[child]]
  if is_child
    # continue...
  # sibling:
  # - [[sibling]]
  # - [[sibling]]
  elsif is_sibling
    # we can safely throw away the last node name because
    # it can't have children if we've already decreased the level
    ancestors.pop
  # unrelated (great+) (grand)parent:
  #     - [[descendent]]
  # - [[great-grandparent]]
  else  # (parent.level < level)
    level_diff = parent.level - level
    (1..(level_diff + 1)).each do
      ancestors.pop
    end
  end
  return ancestors
end

#clearObject



277
278
279
280
281
282
# File 'lib/jekyll-semtree/tree.rb', line 277

def clear
  @root = ''
  @nodes = []
  @petiole_map = {}
  @duplicates = []
end

#deepcopy(obj) ⇒ Object



284
285
286
# File 'lib/jekyll-semtree/tree.rb', line 284

def deepcopy(obj)
  Marshal.load(Marshal.dump(obj))
end

#define_level_size(whitespace) ⇒ Object



249
250
251
252
253
254
255
256
257
258
259
# File 'lib/jekyll-semtree/tree.rb', line 249

def define_level_size(whitespace)
  if whitespace[0] == ' '
    return whitespace.length
  elsif whitespace[0] == "\t"
    tab_size = 4
    return tab_size
  else
    # puts "defineLevelSize: unknown whitespace: #{whitespace}"
    return -1
  end
end

#find_doc_ancestors_and_children_metadata(target_doc) ⇒ Object

find the parent and children of the ‘target_doc’.



353
354
355
356
357
# File 'lib/jekyll-semtree/tree.rb', line 353

def (target_doc)
  fname = File.basename(target_doc.basename, File.extname(target_doc.basename))
  node = @nodes.detect { |n| n.text == fname }
  return node.ancestors, node.children
end

#get_all_lineage_ids(target_node_id, node = @nodes.detect { |n| n.text == @root }, ancestors = [], descendents = [], found = false) ⇒ Object

metadata methods



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/jekyll-semtree/tree.rb', line 307

def get_all_lineage_ids(target_node_id, node=@nodes.detect { |n| n.text == @root }, ancestors=[], descendents=[], found=false)
  # found target node, stop adding ancestors and build descendents
  if target_node_id == node.id || target_node_id == node.text || found
    node.children.each do |child|
      child_node = @nodes.detect { |n| n.text == child }
      # if the child document is an empty string, it is a missing node
      if child_node.missing
        descendents << child_node.text
      else
        descendents << child_node.id
      end
      self.get_all_lineage_ids(target_node_id, child_node, ancestors.clone, descendents, found=true)
    end
    return ancestors, descendents
  # target node not yet found, build ancestors
  else
    # if the node document is an empty string, it is a missing node
    if node.missing
      ancestors << node.text
    else
      ancestors << node.id
    end
    results = []
    node.children.each do |child|
      child_node = @nodes.detect { |n| n.text == child }
      results.concat(self.get_all_lineage_ids(target_node_id, child_node, ancestors.clone))
    end
    return results.select { |r| !r.nil? }
  end
end

#get_level(size) ⇒ Object



273
274
275
# File 'lib/jekyll-semtree/tree.rb', line 273

def get_level(size)
  (size / @chunk_size) + 1
end

#get_sibling_ids(target_node_id, node = @nodes.detect { |n| n.text == @root }, parent = nil) ⇒ Object



338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/jekyll-semtree/tree.rb', line 338

def get_sibling_ids(target_node_id, node=@nodes.detect { |n| n.text == @root }, parent=nil)
  return [] if node.text === @root
  # found target node
  if target_node_id == node.id || target_node_id == node.text
    return parent.children.select { |c| c.id }
  # target node not yet found
  else
    node.children.each do |child|
      child_node = @nodes.detect { |n| n.text == child }
      self.get_sibling_ids(target_node_id, child_node, node)
    end
  end
end

#get_whitespace_size(whitespace) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
# File 'lib/jekyll-semtree/tree.rb', line 261

def get_whitespace_size(whitespace)
  if whitespace.include?(' ')
    return whitespace.length
  elsif whitespace.include?("\t")
    tab_size = 4
    return whitespace.length * tab_size
  else
    # puts "getWhitespaceSize: unknown whitespace: #{whitespace}"
    return whitespace.length
  end
end

#in_tree?(fname) ⇒ Boolean

Returns:

  • (Boolean)


359
360
361
# File 'lib/jekyll-semtree/tree.rb', line 359

def in_tree?(fname)
  return @nodes.map(&:text).include?(fname)
end

#is_markdown_bullet(text) ⇒ Object



23
24
25
26
27
28
29
# File 'lib/jekyll-semtree/tree.rb', line 23

def is_markdown_bullet(text)
  return [
    MARKDOWN_BULLET_ASTERISK,
    MARKDOWN_BULLET_DASH,
    MARKDOWN_BULLET_PLUS,
  ].include?(text)
end


369
370
371
372
373
374
# File 'lib/jekyll-semtree/tree.rb', line 369

def print_nodes
  puts "# Tree Nodes: "
  @nodes.each do |node|
    puts "# #{node.to_s}"
  end
end

#raw_text(full_text) ⇒ Object

util methods



236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/jekyll-semtree/tree.rb', line 236

def raw_text(full_text)
  # strip markdown list marker if it exists
  if @mkdn_list && is_markdown_bullet(full_text[0..1])
    full_text = full_text[2..-1]
  end
  # strip wikistring special chars and line breaks
  # using gsub to replace substrings in Ruby
  full_text.gsub!(OPEN_BRACKETS, '')
  full_text.gsub!(CLOSE_BRACKETS, '')
  full_text.gsub!(/\r?\n|\r/, '')
  return full_text
end

#set_units(lines) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
# File 'lib/jekyll-semtree/tree.rb', line 288

def set_units(lines)
  # calculate number of spaces per level and size of deepest level
  lines.each do |line|
    level_match = line.match(REGEX[:LEVEL])
    # calculates number of spaces
    if level_match
      if @chunk_size < 0
        @chunk_size = define_level_size(level_match[0])
      end
      level = get_level(level_match[0].length)
    else
      next
    end
    @level_max = level > @level_max ? level : @level_max
  end
end

#to_sObject

…for debugging



365
366
367
# File 'lib/jekyll-semtree/tree.rb', line 365

def to_s
  puts build_tree_str
end