Class: Plagiarism::Strategies::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/plagiarism/strategries/engine.rb

Direct Known Subclasses

Bing, Duck, Google, Yahoo

Constant Summary collapse

THRESHOLD =
0.8

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(c, p) ⇒ Engine

Returns a new instance of Engine.



34
35
36
# File 'lib/plagiarism/strategries/engine.rb', line 34

def initialize(c, p)
  @content, @params = c, p
end

Instance Attribute Details

#contentObject

Returns the value of attribute content.



6
7
8
# File 'lib/plagiarism/strategries/engine.rb', line 6

def content
  @content
end

#paramsObject

Returns the value of attribute params.



6
7
8
# File 'lib/plagiarism/strategries/engine.rb', line 6

def params
  @params
end

Class Method Details

.exists?(response) ⇒ Boolean

Returns:

  • (Boolean)


17
18
19
# File 'lib/plagiarism/strategries/engine.rb', line 17

def exists?(response)
  iterate(response) { |uri| uri.host =~ whitelists_regex }
end

.fetch(content, params) ⇒ Object



9
10
11
# File 'lib/plagiarism/strategries/engine.rb', line 9

def fetch(content, params)
  raise
end

.iterate(r, a = :all?) ⇒ Object



13
14
15
# File 'lib/plagiarism/strategries/engine.rb', line 13

def iterate(r, a = :all?)
  raise
end

.valid_segments(ps, params) ⇒ Object



21
22
23
24
25
26
# File 'lib/plagiarism/strategries/engine.rb', line 21

def valid_segments(ps, params)
  ps.segment.count do |sentence|
    typhoeus = fetch("\"#{sentence}\"", params)
    typhoeus.success? && exists?(typhoeus.response_body)
  end
end

.whitelists_regexObject



28
29
30
31
# File 'lib/plagiarism/strategries/engine.rb', line 28

def whitelists_regex
  whitelists = Config.whitelists.map { |w| Regexp.new w }
  Regexp.union whitelists
end

Instance Method Details

#matchObject



45
46
47
48
# File 'lib/plagiarism/strategries/engine.rb', line 45

def match
  typhoeus = self.class.fetch("\"#{content}\"", params)
  typhoeus.success? && retrieve_link(typhoeus.response_body)
end


50
51
52
# File 'lib/plagiarism/strategries/engine.rb', line 50

def retrieve_link(response)
  self.class.iterate(response, :find) { |uri| uri.host !~ self.class.whitelists_regex and return uri.to_s }
end

#unique?Boolean

Returns:

  • (Boolean)


38
39
40
41
42
43
# File 'lib/plagiarism/strategries/engine.rb', line 38

def unique?
  threshold = Config.threshold || THRESHOLD
  ps = PragmaticSegmenter::Segmenter.new(text: content)
  valid_segments = self.class.valid_segments(ps, params)
  valid_segments.to_f / ps.segment.size >= threshold
end