Class: Snatch::Clean::HTML
- Inherits:
-
Object
- Object
- Snatch::Clean::HTML
- Defined in:
- lib/snatch/clean/html.rb
Defined Under Namespace
Modules: HrefFixMethods, SrcFixMethods
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
-
#working_directory ⇒ Object
Returns the value of attribute working_directory.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(doc, working_directory) ⇒ HTML
constructor
A new instance of HTML.
- #update ⇒ Object
Constructor Details
#initialize(doc, working_directory) ⇒ HTML
Returns a new instance of HTML.
49 50 51 52 |
# File 'lib/snatch/clean/html.rb', line 49 def initialize(doc, working_directory) @doc = doc @working_directory = working_directory end |
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
47 48 49 |
# File 'lib/snatch/clean/html.rb', line 47 def doc @doc end |
#working_directory ⇒ Object
Returns the value of attribute working_directory.
47 48 49 |
# File 'lib/snatch/clean/html.rb', line 47 def working_directory @working_directory end |
Class Method Details
.html_encode(string) ⇒ Object
58 59 60 |
# File 'lib/snatch/clean/html.rb', line 58 def self.html_encode(string) string.gsub(/./){ |char| "&#x#{char.unpack('U')[0].to_s(16)};" } end |
.update(doc, working_directory) ⇒ Object
54 55 56 |
# File 'lib/snatch/clean/html.rb', line 54 def self.update(doc, working_directory) new(doc, working_directory).update end |
.url_encode(string) ⇒ Object
62 63 64 |
# File 'lib/snatch/clean/html.rb', line 62 def self.url_encode(string) string.gsub(/./) { |char| '%' + char.unpack('H2' * char.size).join('%').upcase } end |
Instance Method Details
#update ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/snatch/clean/html.rb', line 66 def update @doc.css('base, meta[name=generator]').each { |node| node.remove } @doc.search('//comment()').remove klass = Class.new { include HrefFixMethods }.new HrefFixMethods.instance_methods.each do |m| @doc.css('a[href]').each { |a| klass.send m, a } end klass = Class.new { include SrcFixMethods }.new SrcFixMethods.instance_methods.each do |m| @doc.css('[src]').each { |a| klass.send m, a } end end |