Class: SameSame::DendrogramPrinter

Inherits:
Object
  • Object
show all
Defined in:
lib/same_same/dendrogram_printer.rb

Instance Method Summary collapse

Instance Method Details

#formatted_datapoint_name(content, common_words) ⇒ Object



55
56
57
58
59
60
61
# File 'lib/same_same/dendrogram_printer.rb', line 55

def formatted_datapoint_name( content, common_words )
  if content =~ /^(\d+:)(.*)/
    "#{$1.cyan} #{highlight_common( $2, common_words) }"
  else
    highlight_common( content, common_words )
  end
end

#highlight_common(content, common_words) ⇒ Object



50
51
52
53
# File 'lib/same_same/dendrogram_printer.rb', line 50

def highlight_common( content, common_words )
  words = content.strip.split(/\s+/)
  words.map {|word| common_words.include?(word.downcase) ? word : word.bold.red}.join(" ")
end


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/same_same/dendrogram_printer.rb', line 21

def print(dnd)
  dnd.levels.each_with_index do |level, i|
    single_point_clusters = level.clusters.select {|cluster| cluster.size == 1}
    ungrouped = single_point_clusters.map {|c| c.datapoints}.flatten
    
    puts
    puts "-" * 80
    puts "#{dnd.level_label}: #{level.name}"
    puts
    puts "Clusters: #{level.clusters.size - single_point_clusters.size}"
    puts "Ungrouped: #{ungrouped.size}"
    puts "-" * 80
    puts
    
    level.clusters.each do |cluster|
      if cluster.size > 1
        print_points( cluster.datapoints )
      end
    end
    puts


    if i == dnd.levels.size - 1
      puts "FINAL UNGROUPED"
      print_points(ungrouped)
    end
  end
end


12
13
14
15
16
17
18
19
# File 'lib/same_same/dendrogram_printer.rb', line 12

def print_clusters(clusters)
  clusters.each do |cluster|
    if cluster.size > 1
      puts "## #{cluster.name}" if cluster.name
      print_points( cluster.datapoints )
    end
  end
end


7
8
9
10
# File 'lib/same_same/dendrogram_printer.rb', line 7

def print_last(dnd)
  level = dnd.levels.last
  print_clusters( level.clusters )
end


63
64
65
66
67
68
69
70
71
# File 'lib/same_same/dendrogram_printer.rb', line 63

def print_points(datapoints)
  puts
  all_terms = datapoints.map(&:id).map(&:downcase).map {|id| id.split(/\s+/)}
  common_words = all_terms.inject(all_terms.flatten.uniq) {|m,v| m & v}
  datapoints.sort_by {|dp| dp.id.gsub(/^\d+:/, '')}.each do |dp|
    puts formatted_datapoint_name( dp.id, common_words )
  end
  puts
end