Class: Flay

Inherits:
Object
  • Object
show all
Defined in:
lib/flay.rb,
lib/flay_erb.rb

Defined Under Namespace

Classes: Erubis, Item, Location

Constant Summary collapse

VERSION =

:nodoc:

"2.7.0"
DEFAULT_IGNORE =

so I can move this to flog wholesale

".flayignore"
MAX_NODE_SIZE =

:stopdoc:

10
MAX_AVG_MASS =

prevents exponential blowout

12

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(option = nil) ⇒ Flay

Create a new instance of Flay with options.



209
210
211
212
213
214
215
216
217
# File 'lib/flay.rb', line 209

def initialize option = nil
  @option = option || Flay.default_options
  @hashes = Hash.new { |h,k| h[k] = [] }

  self.identical      = {}
  self.masses         = {}
  self.total          = 0
  self.mass_threshold = @option[:mass]
end

Instance Attribute Details

#hashesObject (readonly)

Returns the value of attribute hashes.



203
204
205
# File 'lib/flay.rb', line 203

def hashes
  @hashes
end

#identicalObject

:stopdoc:



202
203
204
# File 'lib/flay.rb', line 202

def identical
  @identical
end

#mass_thresholdObject

:stopdoc:



202
203
204
# File 'lib/flay.rb', line 202

def mass_threshold
  @mass_threshold
end

#massesObject

:stopdoc:



202
203
204
# File 'lib/flay.rb', line 202

def masses
  @masses
end

#optionObject (readonly)

Returns the value of attribute option.



203
204
205
# File 'lib/flay.rb', line 203

def option
  @option
end

#totalObject

:stopdoc:



202
203
204
# File 'lib/flay.rb', line 202

def total
  @total
end

Class Method Details

.default_optionsObject

Returns the default options.



31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/flay.rb', line 31

def self.default_options
  {
    :diff    => false,
    :mass    => 16,
    :summary => false,
    :verbose => false,
    :number  => true,
    :timeout => 10,
    :liberal => false,
    :fuzzy   => false,
    :only   => nil,
  }
end

.expand_dirs_to_files(*dirs) ⇒ Object

Expands *dirs to all files within that match ruby and rake extensions. – REFACTOR: from flog



128
129
130
131
132
133
134
135
136
137
138
# File 'lib/flay.rb', line 128

def self.expand_dirs_to_files *dirs
  extensions = ["rb"] + Flay.load_plugins

  dirs.flatten.map { |p|
    if File.directory? p then
      Dir[File.join(p, "**", "*.{#{extensions.join(",")}}")]
    else
      p
    end
  }.flatten.map { |s| s.sub(/^\.\//, "") } # strip "./" from paths
end

.filter_files(files, ignore = DEFAULT_IGNORE) ⇒ Object

A file filter mechanism similar to, but not as extensive as, .gitignore files:

+ If a pattern does not contain a slash, it is treated as a shell glob. + If a pattern ends in a slash, it matches on directories (and contents). + Otherwise, it matches on relative paths.

File.fnmatch is used throughout, so glob patterns work for all 3 types.



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/flay.rb', line 153

def self.filter_files files, ignore = DEFAULT_IGNORE
  ignore_paths = if ignore.respond_to? :read then
                   ignore.read
                 elsif File.exists? ignore then
                   File.read ignore
                 end

  if ignore_paths then
    nonglobs, globs = ignore_paths.split("\n").partition { |p| p.include? "/" }
    dirs, ifiles    = nonglobs.partition { |p| p.end_with? "/" }
    dirs            = dirs.map { |s| s.chomp "/" }

    only_paths = File::FNM_PATHNAME
    files = files.reject { |f|
      dirs.any?     { |i| File.fnmatch?(i, File.dirname(f), only_paths) } ||
        globs.any?  { |i| File.fnmatch?(i, f) } ||
        ifiles.any? { |i| File.fnmatch?(i, f, only_paths) }
    }
  end

  files
end

.load_pluginsObject

Loads all flay plugins. Files must be named “flay_*.rb”.



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/flay.rb', line 179

def self.load_plugins
  unless defined? @@plugins then
    @@plugins = []

    plugins = Gem.find_files("flay_*.rb").reject { |p| p =~ /flay_task/ }

    plugins.each do |plugin|
      plugin_name = File.basename(plugin, ".rb").sub(/^flay_/, "")
      next if @@plugins.include? plugin_name
      begin
        load plugin
        @@plugins << plugin_name
      rescue LoadError => e
        warn "error loading #{plugin.inspect}: #{e.message}. skipping..."
      end
    end
  end
  @@plugins
rescue
  # ignore
end

.parse_options(args = ARGV) ⇒ Object

Process options in args, defaulting to ARGV.



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/flay.rb', line 48

def self.parse_options args = ARGV
  options = self.default_options

  OptionParser.new do |opts|
    opts.banner  = "flay [options] files_or_dirs"
    opts.version = Flay::VERSION

    opts.separator ""
    opts.separator "Specific options:"
    opts.separator ""

    opts.on("-h", "--help", "Display this help.") do
      puts opts
      exit
    end

    opts.on("-f", "--fuzzy [DIFF]", Integer,
            "Detect fuzzy (copy & paste) duplication (default 1).") do |n|
      options[:fuzzy] = n || 1
    end

    opts.on("-l", "--liberal", "Use a more liberal detection method.") do
      options[:liberal] = true
    end

    opts.on("-m", "--mass MASS", Integer,
            "Sets mass threshold (default = #{options[:mass]})") do |m|
      options[:mass] = m.to_i
    end

    opts.on("-#", "Don't number output (helps with diffs)") do |m|
      options[:number] = false
    end

    opts.on("-v", "--verbose", "Verbose. Show progress processing files.") do
      options[:verbose] = true
    end

    opts.on("-o", "--only NODE", String, "Only show matches on NODE type.") do |s|
      options[:only] = s.to_sym
    end

    opts.on("-d", "--diff", "Diff Mode. Display N-Way diff for ruby.") do
      options[:diff] = true
    end

    opts.on("-s", "--summary", "Summarize. Show flay score per file only.") do
      options[:summary] = true
    end

    opts.on("-t", "--timeout TIME", Integer,
            "Set the timeout. (default = #{options[:timeout]})") do |t|
      options[:timeout] = t.to_i
    end

    extensions = ["rb"] + Flay.load_plugins

    opts.separator ""
    opts.separator "Known extensions: #{extensions.join(", ")}"

    extensions.each do |meth|
      msg = "options_#{meth}"
      send msg, opts, options if self.respond_to?(msg)
    end

    begin
      opts.parse! args
    rescue => e
      abort "#{e}\n\n#{opts}"
    end
  end

  options
end

Instance Method Details

#analyze(filter = nil) ⇒ Object

Prune, find identical nodes, and update masses.



256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/flay.rb', line 256

def analyze filter = nil
  self.prune

  self.hashes.each do |hash,nodes|
    identical[hash] = nodes[1..-1].all? { |n| n == nodes.first }
  end

  update_masses

  sorted = masses.sort_by { |h,m|
    [-m,
     hashes[h].first.file,
     hashes[h].first.line,
     hashes[h].first.first.to_s]
  }

  sorted.map { |hash, mass|
    nodes = hashes[hash]

    next unless nodes.first.first == filter if filter

    same  = identical[hash]
    node  = nodes.first
    n     = nodes.size
    bonus = "*#{n}" if same

    locs = nodes.sort_by { |x| [x.file, x.line] }.each_with_index.map { |x, i|
      extra = :fuzzy if x.modified?
      Location[x.file, x.line, extra]
    }

    Item[hash, node.first, bonus, mass, locs]
  }.compact
end

#collapse_and_label(ary) ⇒ Object

:nodoc:



474
475
476
477
478
479
480
481
482
# File 'lib/flay.rb', line 474

def collapse_and_label ary # :nodoc:
  ary[0].zip(*ary[1..-1]).map { |lines|
    if lines.uniq.size == 1 then
      "   #{lines.first}"
    else
      lines.reject { |l| l.empty? }.map { |l| "#{l.group}: #{l}" }
    end
  }
end

#n_way_diff(*data) ⇒ Object

Output an n-way diff from data. This is only used if –diff is given.



438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
# File 'lib/flay.rb', line 438

def n_way_diff *data
  comments = []
  codes    = []

  split_and_group(data).each do |subdata|
    n = subdata.find_index { |s| s !~ /^#/ }

    comment, code = subdata[0..n-1], subdata[n..-1]
    comment = [] if n == 0

    comments << comment
    codes    << code
  end

  comments = collapse_and_label pad_with_empty_strings comments
  codes    = collapse_and_label pad_with_empty_strings codes

  (comments + codes).flatten.join("\n")
end

#pad_with_empty_strings(ary) ⇒ Object

:nodoc:



468
469
470
471
472
# File 'lib/flay.rb', line 468

def pad_with_empty_strings ary # :nodoc:
  max = ary.map { |s| s.size }.max

  ary.map { |a| a + ([""] * (max - a.size)) }
end

#process(*files) ⇒ Object

Process any number of files.



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/flay.rb', line 222

def process(*files) # TODO: rename from process - should act as SexpProcessor
  files.each do |file|
    warn "Processing #{file}" if option[:verbose]

    ext = File.extname(file).sub(/^\./, "")
    ext = "rb" if ext.nil? || ext.empty?
    msg = "process_#{ext}"

    unless respond_to? msg then
      warn "  Unknown file type: #{ext}, defaulting to ruby"
      msg = "process_rb"
    end

    begin
      sexp = begin
               send msg, file
             rescue => e
               warn "  #{e.message.strip}"
               warn "  skipping #{file}"
               nil
             end

      next unless sexp

      process_sexp sexp
    rescue SyntaxError => e
      warn "  skipping #{file}: #{e.message}"
    end
  end
end

#process_erb(file) ⇒ Object

Process erb and parse the result. Returns the sexp of the parsed ruby.



13
14
15
16
17
18
19
20
21
22
23
# File 'lib/flay_erb.rb', line 13

def process_erb file
  erb = File.read file

  ruby = Erubis.new(erb).src
  begin
    RubyParser.new.process(ruby, file)
  rescue => e
    warn ruby if option[:verbose]
    raise e
  end
end

#process_fuzzy(node, difference) ⇒ Object

Process “fuzzy” matches for node. A fuzzy match is a subset of node up to difference elements less than the original.



341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# File 'lib/flay.rb', line 341

def process_fuzzy node, difference
  return unless node.has_code?

  avg_mass = node.mass / node.size
  return if node.size > MAX_NODE_SIZE or avg_mass > MAX_AVG_MASS

  tmpl, code = node.split_code
  tmpl.modified = true

  (code.size - 1).downto(code.size - difference) do |n|
    code.combination(n).each do |subcode|
      new_node = tmpl + subcode

      next unless new_node.any? { |sub| Sexp === sub }
      next if new_node.mass < self.mass_threshold

      # they're already structurally similar, don"t bother adding another
      next if self.hashes[new_node.structural_hash].any? { |sub|
        sub.file == new_node.file and sub.line == new_node.line
      }

      self.hashes[new_node.structural_hash] << new_node
    end
  end
end

#process_rb(file) ⇒ Object

Parse a ruby file and return the sexp.

– TODO: change the system and rename this to parse_rb.



310
311
312
313
314
315
316
# File 'lib/flay.rb', line 310

def process_rb file
  begin
    RubyParser.new.process(File.binread(file), file, option[:timeout])
  rescue Timeout::Error
    warn "TIMEOUT parsing #{file}. Skipping."
  end
end

#process_sexp(pt) ⇒ Object

Process a sexp pt.



321
322
323
324
325
326
327
328
329
330
# File 'lib/flay.rb', line 321

def process_sexp pt
  pt.deep_each do |node|
    next unless node.any? { |sub| Sexp === sub }
    next if node.mass < self.mass_threshold

    self.hashes[node.structural_hash] << node

    process_fuzzy node, option[:fuzzy] if option[:fuzzy]
  end
end

#pruneObject

Prunes nodes that aren’t relevant to analysis or are already covered by another node.



371
372
373
374
375
376
377
378
379
# File 'lib/flay.rb', line 371

def prune
  # prune trees that aren't duped at all, or are too small
  self.hashes.delete_if { |_,nodes| nodes.size == 1 }
  self.hashes.delete_if { |_,nodes| nodes.all?(&:modified?) }

  return prune_liberally if option[:liberal]

  prune_conservatively
end

#prune_conservativelyObject

Conservative prune. Remove any bucket that is known to contain a subnode element of a node in another bucket.



385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/flay.rb', line 385

def prune_conservatively
  hashes_to_prune = {}

  # extract all subtree hashes from all nodes
  self.hashes.values.each do |nodes|
    nodes.first.all_structural_subhashes.each do |h|
      hashes_to_prune[h] = true
    end
  end

  # nuke subtrees so we show the biggest matching tree possible
  self.hashes.delete_if { |h,_| hashes_to_prune[h] }
end

#prune_liberallyObject

Liberal prune. Remove any element from a bucket that is known to be a subnode of another node. Removed by identity.



403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
# File 'lib/flay.rb', line 403

def prune_liberally
  update_masses

  hashes_to_prune = Hash.new { |h,k| h[k] = [] }

  # record each subtree by subhash, but skip if subtree mass > parent mass
  self.hashes.values.each do |nodes|
    nodes.each do |node|
      tophash  = node.structural_hash
      topscore = self.masses[tophash]

      node.deep_each do |subnode|
        subhash  = subnode.structural_hash
        subscore = self.masses[subhash]

        next if subscore and subscore > topscore

        hashes_to_prune[subhash] << subnode
      end
    end
  end

  # nuke only individual items by object identity
  self.hashes.each do |h,v|
    v.delete_eql hashes_to_prune[h]
  end

  # nuke buckets we happened to fully empty
  self.hashes.delete_if { |k,v| v.size <= 1 }
end

#report(io = $stdout) ⇒ Object

Output the report. Duh.



504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
# File 'lib/flay.rb', line 504

def report io = $stdout
  only = option[:only]

  data = analyze only

  io.puts "Total score (lower is better) = #{self.total}"

  if option[:summary] then
    io.puts

    self.summary.sort_by { |_,v| -v }.each do |file, score|
      io.puts "%8.2f: %s" % [score, file]
    end

    return
  end

  data.each_with_index do |item, count|
    prefix = "%d) " % (count + 1) if option[:number]

    match = item.identical? ? "IDENTICAL" : "Similar"

    io.puts
    io.puts "%s%s code found in %p (mass%s = %d)" %
      [prefix, match, item.name, item.bonus, item.mass]

    item.locations.each_with_index do |loc, i|
      loc_prefix = "%s: " % (?A.ord + i).chr if option[:diff]
      extra = " (FUZZY)" if loc.fuzzy?
      io.puts "  %s%s:%d%s" % [loc_prefix, loc.file, loc.line, extra]
    end

    if option[:diff] then
      io.puts

      nodes = hashes[item.structural_hash]

      sources = nodes.map do |s|
        msg = "sexp_to_#{File.extname(s.file).sub(/./, "")}"
        self.respond_to?(msg) ? self.send(msg, s) : sexp_to_rb(s)
      end

      io.puts n_way_diff(*sources)
    end
  end
end

#sexp_to_rb(sexp) ⇒ Object



551
552
553
554
555
556
557
558
559
# File 'lib/flay.rb', line 551

def sexp_to_rb sexp
  begin
    require "ruby2ruby"
  rescue LoadError
    return "ruby2ruby is required for diff"
  end
  @r2r ||= Ruby2Ruby.new
  @r2r.process sexp.deep_clone
end

#split_and_group(ary) ⇒ Object

:nodoc:



458
459
460
461
462
463
464
465
466
# File 'lib/flay.rb', line 458

def split_and_group ary # :nodoc:
  ary.each_with_index.map { |s, i|
    c = (?A.ord + i).chr
    s.scan(/^.*/).map { |s2|
      s2.group = c
      s2
    }
  }
end

#summaryObject

Calculate summary scores on a per-file basis. For –summary.



487
488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/flay.rb', line 487

def summary
  score = Hash.new 0

  masses.each do |hash, mass|
    sexps = hashes[hash]
    mass_per_file = mass.to_f / sexps.size
    sexps.each do |sexp|
      score[sexp.file] += mass_per_file
    end
  end

  score
end

#update_massesObject

Reset total and recalculate the masses for all nodes in hashes.



294
295
296
297
298
299
300
301
302
# File 'lib/flay.rb', line 294

def update_masses
  self.total = 0
  masses.clear
  self.hashes.each do |hash, nodes|
    masses[hash] = nodes.first.mass * nodes.size
    masses[hash] *= (nodes.size) if identical[hash]
    self.total += masses[hash]
  end
end