Class: WordWatcher

Inherits:
Object
  • Object
show all
Defined in:
app/services/word_watcher.rb

Constant Summary collapse

REPLACEMENT_LETTER =
CGI.unescape_html("■")
CACHE_VERSION =
3

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw) ⇒ WordWatcher

Returns a new instance of WordWatcher.



7
8
9
# File 'app/services/word_watcher.rb', line 7

def initialize(raw)
  @raw = raw
end

Class Method Details

.apply_to_text(text) ⇒ Object



159
160
161
162
163
164
# File 'app/services/word_watcher.rb', line 159

def self.apply_to_text(text)
  text = censor_text(text)
  text = replace_text(text)
  text = replace_link(text)
  text
end

.cache_enabled?Boolean

Returns:

  • (Boolean)


17
18
19
# File 'app/services/word_watcher.rb', line 17

def self.cache_enabled?
  @cache_enabled
end

.cache_key(action) ⇒ Object



21
22
23
# File 'app/services/word_watcher.rb', line 21

def self.cache_key(action)
  "watched-words-list:v#{CACHE_VERSION}:#{action}"
end

.cached_words_for_action(action) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'app/services/word_watcher.rb', line 46

def self.cached_words_for_action(action)
  if cache_enabled?
    Discourse
      .cache
      .fetch(cache_key(action), expires_in: 1.day) { words_for_action(action).presence }
  else
    words_for_action(action).presence
  end
end

.censor(html) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'app/services/word_watcher.rb', line 126

def self.censor(html)
  regexps = compiled_regexps_for_action(:censor)
  return html if regexps.blank?

  doc = Nokogiri::HTML5.fragment(html)
  doc.traverse do |node|
    regexps.each do |regexp|
      node.content = censor_text_with_regexp(node.content, regexp) if node.text?
    end
  end

  doc.to_s
end

.censor_text(text) ⇒ Object



140
141
142
143
144
145
146
147
# File 'app/services/word_watcher.rb', line 140

def self.censor_text(text)
  return text if text.blank?

  regexps = compiled_regexps_for_action(:censor)
  return text if regexps.blank?

  regexps.inject(text) { |txt, regexp| censor_text_with_regexp(txt, regexp) }
end

.clear_cache!Object



25
26
27
# File 'app/services/word_watcher.rb', line 25

def self.clear_cache!
  WatchedWord.actions.each { |action, _| Discourse.cache.delete(cache_key(action)) }
end

.compiled_regexps_for_action(action, engine: :ruby, raise_errors: false) ⇒ Object

This regexp is run in miniracer, and the client JS app Make sure it is compatible with major browsers when changing hint: non-chrome browsers do not support ‘lookbehind’



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'app/services/word_watcher.rb', line 65

def self.compiled_regexps_for_action(action, engine: :ruby, raise_errors: false)
  words = cached_words_for_action(action)
  return [] if words.blank?

  words
    .values
    .group_by { |attrs| attrs[:case_sensitive] ? :case_sensitive : :case_insensitive }
    .map do |group_key, attrs_list|
      words = attrs_list.map { |attrs| attrs[:word] }

      # Compile all watched words into a single regular expression
      regexp =
        words
          .map do |word|
            r = word_to_regexp(word, match_word: SiteSetting.watched_words_regular_expressions?)
            begin
              r if Regexp.new(r)
            rescue RegexpError
              raise if raise_errors
            end
          end
          .select { |r| r.present? }
          .join("|")

      # Add word boundaries to the regexp for regular watched words
      regexp =
        match_word_regexp(
          regexp,
          engine: engine,
        ) if !SiteSetting.watched_words_regular_expressions?

      # Add case insensitive flag if needed
      Regexp.new(regexp, group_key == :case_sensitive ? nil : Regexp::IGNORECASE)
    end
end

.disable_cacheObject



13
14
15
# File 'app/services/word_watcher.rb', line 13

def self.disable_cache
  @cache_enabled = false
end

.regexps_for_action(action, engine: :ruby) ⇒ Object



56
57
58
59
60
# File 'app/services/word_watcher.rb', line 56

def self.regexps_for_action(action, engine: :ruby)
  cached_words_for_action(action)&.to_h do |_, attrs|
    [word_to_regexp(attrs[:word], engine: engine), attrs]
  end
end


154
155
156
157
# File 'app/services/word_watcher.rb', line 154

def self.replace_link(text)
  return text if text.blank?
  replace(text, :link)
end

.replace_text(text) ⇒ Object



149
150
151
152
# File 'app/services/word_watcher.rb', line 149

def self.replace_text(text)
  return text if text.blank?
  replace(text, :replace)
end

.serialized_regexps_for_action(action, engine: :ruby) ⇒ Object



101
102
103
104
105
# File 'app/services/word_watcher.rb', line 101

def self.serialized_regexps_for_action(action, engine: :ruby)
  compiled_regexps_for_action(action, engine: engine).map do |r|
    { r.source => { case_sensitive: !r.casefold? } }
  end
end

.word_to_regexp(word, engine: :ruby, match_word: true) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'app/services/word_watcher.rb', line 107

def self.word_to_regexp(word, engine: :ruby, match_word: true)
  if SiteSetting.watched_words_regular_expressions?
    regexp = word
    regexp = "(#{regexp})" if match_word
    regexp
  else
    # Convert word to regex by escaping special characters in a regexp.
    # Avoid using Regexp.escape because it escapes more characters than
    # it should (for example, whitespaces, dashes, etc)
    regexp = word.gsub(/([.*+?^${}()|\[\]\\])/, '\\\\\1')

    # Convert wildcards to regexp
    regexp = regexp.gsub("\\*", '\S*')

    regexp = match_word_regexp(regexp, engine: engine) if match_word
    regexp
  end
end

.words_for_action(action) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
# File 'app/services/word_watcher.rb', line 29

def self.words_for_action(action)
  WatchedWord
    .where(action: WatchedWord.actions[action.to_sym])
    .limit(WatchedWord::MAX_WORDS_PER_ACTION)
    .order(:id)
    .pluck(:word, :replacement, :case_sensitive, :html)
    .to_h do |w, r, c, h|
      opts = { word: w, replacement: r, case_sensitive: c }.compact
      opts[:html] = true if h
      [word_to_regexp(w, match_word: false), opts]
    end
end

.words_for_action_exist?(action) ⇒ Boolean

Returns:

  • (Boolean)


42
43
44
# File 'app/services/word_watcher.rb', line 42

def self.words_for_action_exist?(action)
  WatchedWord.where(action: WatchedWord.actions[action.to_sym]).exists?
end

Instance Method Details

#requires_approval?Boolean

Returns:

  • (Boolean)


166
167
168
# File 'app/services/word_watcher.rb', line 166

def requires_approval?
  word_matches_for_action?(:require_approval)
end

#should_block?Boolean

Returns:

  • (Boolean)


174
175
176
# File 'app/services/word_watcher.rb', line 174

def should_block?
  word_matches_for_action?(:block, all_matches: true)
end

#should_flag?Boolean

Returns:

  • (Boolean)


170
171
172
# File 'app/services/word_watcher.rb', line 170

def should_flag?
  word_matches_for_action?(:flag)
end

#should_silence?Boolean

Returns:

  • (Boolean)


178
179
180
# File 'app/services/word_watcher.rb', line 178

def should_silence?
  word_matches_for_action?(:silence)
end

#word_matches?(word, case_sensitive: false) ⇒ Boolean

Returns:

  • (Boolean)


226
227
228
229
# File 'app/services/word_watcher.rb', line 226

def word_matches?(word, case_sensitive: false)
  options = case_sensitive ? nil : Regexp::IGNORECASE
  Regexp.new(WordWatcher.word_to_regexp(word), options).match?(@raw)
end

#word_matches_for_action?(action, all_matches: false) ⇒ Boolean

Returns:

  • (Boolean)


182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'app/services/word_watcher.rb', line 182

def word_matches_for_action?(action, all_matches: false)
  regexps = self.class.compiled_regexps_for_action(action)
  return if regexps.blank?

  match_list = []
  regexps.each do |regexp|
    match = regexp.match(@raw)

    if !all_matches
      return match if match
      next
    end

    next if !match

    if SiteSetting.watched_words_regular_expressions?
      set = Set.new
      @raw
        .scan(regexp)
        .each do |m|
          if Array === m
            set.add(m.find(&:present?))
          elsif String === m
            set.add(m)
          end
        end

      matches = set.to_a
    else
      matches = @raw.scan(regexp)
      matches.flatten!
    end

    match_list.concat(matches)
  end

  return if match_list.blank?

  match_list.compact!
  match_list.uniq!
  match_list.sort!
  match_list
end