Class: TaggerJob::WordsCollection

Inherits:
Object
  • Object
show all
Defined in:
app/jobs/tagger_job.rb

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ WordsCollection

Returns a new instance of WordsCollection.


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'app/jobs/tagger_job.rb', line 5

def initialize(text)
  @stemmer_en = Lingua::Stemmer.new(language: :en)
  @text_en = text.mb_chars.downcase.remove(/[а-я]+/)
  @words_en = @text_en.split(/[^[:alnum:]]+/).uniq.compact.map do |w|
    @stemmer_en.stem w
  end.uniq.compact
  @stemmed_text_en = @words_en.join(' ')

  @stemmer_ru = Lingua::Stemmer.new(language: :ru)
  @text_ru = text.mb_chars.downcase.remove(/[a-z]+/)
  @words_ru = @text_ru.split(/[^[:alnum:]]+/).uniq.compact.map do |w|
    @stemmer_ru.stem w
  end.uniq.compact
  @stemmed_text_ru = @words_ru.join(' ')
end

Instance Method Details

#match_tag?(tag) ⇒ Boolean

Returns:

  • (Boolean)

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'app/jobs/tagger_job.rb', line 21

def match_tag?(tag)
  result = nil

  stemmer = stemmed_text = words = nil
  if tag.russian?
    stemmer = @stemmer_ru
    stemmed_text = @stemmed_text_ru
    words = @words_ru
  else
    stemmer = @stemmer_en
    stemmed_text = @stemmed_text_en
    words = @words_en
  end

  tag_aliases = tag.self_and_aliases.map do |w|
    tag_words = w.split(/[^[:alnum:]]+/)

    if tag_words.size > 1
      stemmed_tag = tag_words.map { |w| stemmer.stem(w).mb_chars.downcase.to_s }.join(' ')

      if stemmed_text.include?(stemmed_tag)
        result = true
        break
      end
    else
      stemmed_tag = stemmer.stem(w).mb_chars.downcase.to_s

      if words.include? stemmed_tag
        result = true
        break
      end
    end
  end

  result
end