Class: WordWatcher

Inherits:
Object
  • Object
show all
Defined in:
app/services/word_watcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw) ⇒ WordWatcher

Returns a new instance of WordWatcher.



7
8
9
# File 'app/services/word_watcher.rb', line 7

def initialize(raw)
  @raw = raw
end

Class Method Details

.apply_to_text(text) ⇒ Object



152
153
154
155
156
157
# File 'app/services/word_watcher.rb', line 152

def self.apply_to_text(text)
  text = censor_text(text)
  text = replace_text(text)
  text = replace_link(text)
  text
end

.cache_enabled?Boolean

Returns:

  • (Boolean)


17
18
19
# File 'app/services/word_watcher.rb', line 17

def self.cache_enabled?
  @cache_enabled
end

.censor(html) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'app/services/word_watcher.rb', line 119

def self.censor(html)
  regexps = word_matcher_regexp_list(:censor)
  return html if regexps.blank?

  doc = Nokogiri::HTML5.fragment(html)
  doc.traverse do |node|
    regexps.each do |regexp|
      node.content = censor_text_with_regexp(node.content, regexp) if node.text?
    end
  end

  doc.to_s
end

.censor_text(text) ⇒ Object



133
134
135
136
137
138
139
140
# File 'app/services/word_watcher.rb', line 133

def self.censor_text(text)
  return text if text.blank?

  regexps = word_matcher_regexp_list(:censor)
  return text if regexps.blank?

  regexps.inject(text) { |txt, regexp| censor_text_with_regexp(txt, regexp) }
end

.clear_cache!Object



159
160
161
# File 'app/services/word_watcher.rb', line 159

def self.clear_cache!
  WatchedWord.actions.each { |a, i| Discourse.cache.delete word_matcher_regexp_key(a) }
end

.disable_cacheObject



13
14
15
# File 'app/services/word_watcher.rb', line 13

def self.disable_cache
  @cache_enabled = false
end

.get_cached_words(action) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
# File 'app/services/word_watcher.rb', line 36

def self.get_cached_words(action)
  if cache_enabled?
    Discourse
      .cache
      .fetch(word_matcher_regexp_key(action), expires_in: 1.day) do
        words_for_action(action).presence
      end
  else
    words_for_action(action).presence
  end
end


147
148
149
150
# File 'app/services/word_watcher.rb', line 147

def self.replace_link(text)
  return text if text.blank?
  replace(text, :link)
end

.replace_text(text) ⇒ Object



142
143
144
145
# File 'app/services/word_watcher.rb', line 142

def self.replace_text(text)
  return text if text.blank?
  replace(text, :replace)
end

.serializable_word_matcher_regexp(action, engine: :ruby) ⇒ Object



48
49
50
51
52
# File 'app/services/word_watcher.rb', line 48

def self.serializable_word_matcher_regexp(action, engine: :ruby)
  word_matcher_regexp_list(action, engine: engine).map do |r|
    { r.source => { case_sensitive: !r.casefold? } }
  end
end

.word_matcher_regexp_key(action) ⇒ Object



115
116
117
# File 'app/services/word_watcher.rb', line 115

def self.word_matcher_regexp_key(action)
  "watched-words-list:v#{CACHE_VERSION}:#{action}"
end

.word_matcher_regexp_list(action, engine: :ruby, raise_errors: false) ⇒ Object

This regexp is run in miniracer, and the client JS app Make sure it is compatible with major browsers when changing hint: non-chrome browsers do not support ‘lookbehind’



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'app/services/word_watcher.rb', line 57

def self.word_matcher_regexp_list(action, engine: :ruby, raise_errors: false)
  words = get_cached_words(action)
  return [] if words.blank?

  grouped_words = { case_sensitive: [], case_insensitive: [] }

  words.each do |word, attrs|
    word = word_to_regexp(word, whole: SiteSetting.watched_words_regular_expressions?)
    group_key = attrs[:case_sensitive] ? :case_sensitive : :case_insensitive
    grouped_words[group_key] << word
  end

  regexps = grouped_words.select { |_, w| w.present? }.transform_values { |w| w.join("|") }

  if !SiteSetting.watched_words_regular_expressions?
    regexps.transform_values! { |regexp| wrap_regexp(regexp, engine: engine) }
  end

  regexps.map { |c, regexp| Regexp.new(regexp, c == :case_sensitive ? nil : Regexp::IGNORECASE) }
rescue RegexpError
  raise if raise_errors
  [] # Admin will be alerted via admin_dashboard_data.rb
end

.word_matcher_regexps(action, engine: :ruby) ⇒ Object



81
82
83
# File 'app/services/word_watcher.rb', line 81

def self.word_matcher_regexps(action, engine: :ruby)
  get_cached_words(action)&.to_h { |word, attrs| [word_to_regexp(word, engine: engine), attrs] }
end

.word_to_regexp(word, engine: :ruby, whole: true) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'app/services/word_watcher.rb', line 85

def self.word_to_regexp(word, engine: :ruby, whole: true)
  if SiteSetting.watched_words_regular_expressions?
    # Strip Ruby regexp format if present
    regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
    regexp = "(#{regexp})" if whole
    return regexp
  end

  # Escape regular expression. Avoid using Regexp.escape because it escapes
  # more characters than it should (for example, whitespaces)
  regexp = word.gsub(/([.*+?^${}()|\[\]\\])/, '\\\\\1')

  # Handle wildcards
  regexp = regexp.gsub("\\*", '\S*')

  regexp = wrap_regexp(regexp, engine: engine) if whole

  regexp
end

.words_for_action(action) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'app/services/word_watcher.rb', line 21

def self.words_for_action(action)
  WatchedWord
    .where(action: WatchedWord.actions[action.to_sym])
    .limit(WatchedWord::MAX_WORDS_PER_ACTION)
    .order(:id)
    .pluck(:word, :replacement, :case_sensitive)
    .to_h do |w, r, c|
      [w, { word: word_to_regexp(w, whole: false), replacement: r, case_sensitive: c }.compact]
    end
end

.words_for_action_exists?(action) ⇒ Boolean

Returns:

  • (Boolean)


32
33
34
# File 'app/services/word_watcher.rb', line 32

def self.words_for_action_exists?(action)
  WatchedWord.where(action: WatchedWord.actions[action.to_sym]).exists?
end

.wrap_regexp(regexp, engine: :ruby) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'app/services/word_watcher.rb', line 105

def self.wrap_regexp(regexp, engine: :ruby)
  if engine == :js
    "(?:\\P{L}|^)(#{regexp})(?=\\P{L}|$)"
  elsif engine == :ruby
    "(?:[^[:word:]]|^)(#{regexp})(?=[^[:word:]]|$)"
  else
    "(?:\\W|^)(#{regexp})(?=\\W|$)"
  end
end

Instance Method Details

#requires_approval?Boolean

Returns:

  • (Boolean)


163
164
165
# File 'app/services/word_watcher.rb', line 163

def requires_approval?
  word_matches_for_action?(:require_approval)
end

#should_block?Boolean

Returns:

  • (Boolean)


171
172
173
# File 'app/services/word_watcher.rb', line 171

def should_block?
  word_matches_for_action?(:block, all_matches: true)
end

#should_flag?Boolean

Returns:

  • (Boolean)


167
168
169
# File 'app/services/word_watcher.rb', line 167

def should_flag?
  word_matches_for_action?(:flag)
end

#should_silence?Boolean

Returns:

  • (Boolean)


175
176
177
# File 'app/services/word_watcher.rb', line 175

def should_silence?
  word_matches_for_action?(:silence)
end

#word_matches?(word, case_sensitive: false) ⇒ Boolean

Returns:

  • (Boolean)


223
224
225
226
# File 'app/services/word_watcher.rb', line 223

def word_matches?(word, case_sensitive: false)
  options = case_sensitive ? nil : Regexp::IGNORECASE
  Regexp.new(WordWatcher.word_to_regexp(word), options).match?(@raw)
end

#word_matches_for_action?(action, all_matches: false) ⇒ Boolean

Returns:

  • (Boolean)


179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'app/services/word_watcher.rb', line 179

def word_matches_for_action?(action, all_matches: false)
  regexps = self.class.word_matcher_regexp_list(action)
  return if regexps.blank?

  match_list = []
  regexps.each do |regexp|
    match = regexp.match(@raw)

    if !all_matches
      return match if match
      next
    end

    next if !match

    if SiteSetting.watched_words_regular_expressions?
      set = Set.new
      @raw
        .scan(regexp)
        .each do |m|
          if Array === m
            set.add(m.find(&:present?))
          elsif String === m
            set.add(m)
          end
        end

      matches = set.to_a
    else
      matches = @raw.scan(regexp)
      matches.flatten!
    end

    match_list.concat(matches)
  end

  return if match_list.blank?

  match_list.compact!
  match_list.uniq!
  match_list.sort!
  match_list
end