Module: Sanitizer

Defined in:
lib/sanitizer/version.rb,
lib/sanitizer/sanitizer.rb

Constant Summary collapse

VERSION =
"0.2.1"
@@htmle =

HTMLEntris

HTMLEntities.new

Class Method Summary collapse

Class Method Details

.clean_spaces(text) ⇒ Object



28
29
30
# File 'lib/sanitizer/sanitizer.rb', line 28

def clean_spaces(text)
  clean_spaces! text.dup
end

.clean_spaces!(text) ⇒ Object

Clean retundant spaces



22
23
24
25
26
# File 'lib/sanitizer/sanitizer.rb', line 22

def clean_spaces!(text)
  text.gsub!(/\s+/, " ")
  text.strip!
  text
end

.entities_to_chars(text) ⇒ Object



89
90
91
# File 'lib/sanitizer/sanitizer.rb', line 89

def entities_to_chars(text)
  entities_to_chars! text.dup
end

.entities_to_chars!(text) ⇒ Object

Alguns feeds retornam tags “escapadas” dentro do conteúdo (ex: <br/>) Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo



83
84
85
86
87
# File 'lib/sanitizer/sanitizer.rb', line 83

def entities_to_chars!(text)
  text.gsub!(/\&lt;/uim, "<")
  text.gsub!(/\&gt;/uim, ">")
  text
end

.html_decode(text) ⇒ Object

Convert invalid chars to HTML Entries



100
101
102
103
# File 'lib/sanitizer/sanitizer.rb', line 100

def html_decode(text)
  text = text.to_s  
  @@htmle.decode(text)
end

.html_encode(text) ⇒ Object

Convert invalid chars to HTML Entries



94
95
96
97
# File 'lib/sanitizer/sanitizer.rb', line 94

def html_encode(text)
  text = text.to_s  
  @@htmle.encode(text, :named)
end

.sanitize(text) ⇒ Object



17
18
19
# File 'lib/sanitizer/sanitizer.rb', line 17

def sanitize(text)
  sanitize! text.dup
end

.sanitize!(text) ⇒ Object

Sanitize to clean text



10
11
12
13
14
15
# File 'lib/sanitizer/sanitizer.rb', line 10

def sanitize!(text)
  strip_tags!(text)
  clean_spaces!(text)
  text.replace html_encode(text)
  text
end

.strip_comments(text) ⇒ Object



39
40
41
# File 'lib/sanitizer/sanitizer.rb', line 39

def strip_comments(text)
  strip_comments! text.dup
end

.strip_comments!(text) ⇒ Object

remove comments



33
34
35
36
37
# File 'lib/sanitizer/sanitizer.rb', line 33

def strip_comments!(text)
  text.gsub!(/(\<\!\-\-\b*[^\-\-\>]*.*?\-\-\>)/ui, "")
  text.gsub!(/(\&lt;\s?\!--.*\s?--\&gt;)/uim, "")
  text
end

.strip_disallowed_tags(text) ⇒ Object



58
59
60
# File 'lib/sanitizer/sanitizer.rb', line 58

def strip_disallowed_tags(text)
  strip_disallowed_tags! text.dup
end

.strip_disallowed_tags!(text) ⇒ Object

Remove all <script> and <style> tags



44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/sanitizer/sanitizer.rb', line 44

def strip_disallowed_tags!(text)
  text.gsub!(/(<script\s*.*>.*<\/script>)/uim, "")
  text.gsub!(/(<script\s*.*\/?>)/uim, "")
  text.gsub!(/(<link\s*.*\/?>)/uim, "")
  text.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")

  # Stripping html entities too
  text.gsub!(/(\&lt;script\s*.*\&gt;.*\&lt;\/script\&gt;)/uim, "")
  text.gsub!(/(\&lt;script\s*.*\/?\&gt;)/uim, "")
  text.gsub!(/(\&lt;link\s*.*\/?\&gt;)/uim, "")
  text.gsub!(/(\&lt;style\s*.*\&gt;.*\&lt;\/style\&gt;)/uim, "")
  text
end

.strip_tags(text, *tags) ⇒ Object



77
78
79
# File 'lib/sanitizer/sanitizer.rb', line 77

def strip_tags(text, *tags)
  strip_tags! text.dup, *tags
end

.strip_tags!(text, *tags) ⇒ Object

Remove all tags from from text



63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/sanitizer/sanitizer.rb', line 63

def strip_tags!(text, *tags)
  if tags.empty? # clear all tags by default
    text.gsub!(/<\/?[^>]*>/uim, "")
    text.gsub!(/\&lt;\/?[^\&gt;]*\&gt;/uim, "")
  else # clean only selected tags 
    strip = tags.map do |tag|  
      %Q{(#{tag})}
    end.join('|')
    text.gsub!(/<\/?(#{strip})[^>]*>/uim, "")
    text.gsub!(/\&lt;\/?(#{strip})[^\&gt;]*\&gt;/uim, "")
  end
  text
end