Class: Plagiarism::Strategies::Engine
- Inherits:
-
Object
- Object
- Plagiarism::Strategies::Engine
show all
- Defined in:
- lib/plagiarism/strategries/engine.rb
Constant Summary
collapse
- THRESHOLD =
0.8
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(c, p) ⇒ Engine
Returns a new instance of Engine.
34
35
36
|
# File 'lib/plagiarism/strategries/engine.rb', line 34
def initialize(c, p)
@content, @params = c, p
end
|
Instance Attribute Details
#content ⇒ Object
Returns the value of attribute content.
6
7
8
|
# File 'lib/plagiarism/strategries/engine.rb', line 6
def content
@content
end
|
#params ⇒ Object
Returns the value of attribute params.
6
7
8
|
# File 'lib/plagiarism/strategries/engine.rb', line 6
def params
@params
end
|
Class Method Details
.exists?(response) ⇒ Boolean
17
18
19
|
# File 'lib/plagiarism/strategries/engine.rb', line 17
def exists?(response)
iterate(response) { |uri| uri.host =~ whitelists_regex }
end
|
.fetch(content, params) ⇒ Object
9
10
11
|
# File 'lib/plagiarism/strategries/engine.rb', line 9
def fetch(content, params)
raise
end
|
.iterate(r, a = :all?) ⇒ Object
13
14
15
|
# File 'lib/plagiarism/strategries/engine.rb', line 13
def iterate(r, a = :all?)
raise
end
|
.valid_segments(ps, params) ⇒ Object
21
22
23
24
25
26
|
# File 'lib/plagiarism/strategries/engine.rb', line 21
def valid_segments(ps, params)
ps.segment.count do |sentence|
typhoeus = fetch("\"#{sentence}\"", params)
typhoeus.success? && exists?(typhoeus.response_body)
end
end
|
.whitelists_regex ⇒ Object
28
29
30
31
|
# File 'lib/plagiarism/strategries/engine.rb', line 28
def whitelists_regex
whitelists = Config.whitelists.map { |w| Regexp.new w }
Regexp.union whitelists
end
|
Instance Method Details
#match ⇒ Object
45
46
47
48
|
# File 'lib/plagiarism/strategries/engine.rb', line 45
def match
typhoeus = self.class.fetch("\"#{content}\"", params)
typhoeus.success? && retrieve_link(typhoeus.response_body)
end
|
#retrieve_link(response) ⇒ Object
50
51
52
|
# File 'lib/plagiarism/strategries/engine.rb', line 50
def retrieve_link(response)
self.class.iterate(response, :find) { |uri| uri.host !~ self.class.whitelists_regex and return uri.to_s }
end
|
#unique? ⇒ Boolean
38
39
40
41
42
43
|
# File 'lib/plagiarism/strategries/engine.rb', line 38
def unique?
threshold = Config.threshold || THRESHOLD
ps = PragmaticSegmenter::Segmenter.new(text: content)
valid_segments = self.class.valid_segments(ps, params)
valid_segments.to_f / ps.segment.size >= threshold
end
|