Class: Snatch::Clean::HTML
- Inherits:
-
Object
- Object
- Snatch::Clean::HTML
- Defined in:
- lib/snatch/clean/html.rb
Defined Under Namespace
Modules: HrefFixMethods, SrcFixMethods
Instance Attribute Summary collapse
-
#doc ⇒ Object
Returns the value of attribute doc.
-
#working_directory ⇒ Object
Returns the value of attribute working_directory.
Class Method Summary collapse
Instance Method Summary collapse
-
#initialize(doc, working_directory) ⇒ HTML
constructor
A new instance of HTML.
- #update ⇒ Object
Constructor Details
#initialize(doc, working_directory) ⇒ HTML
Returns a new instance of HTML.
48 49 50 51 |
# File 'lib/snatch/clean/html.rb', line 48 def initialize(doc, working_directory) @doc = doc @working_directory = working_directory end |
Instance Attribute Details
#doc ⇒ Object
Returns the value of attribute doc.
46 47 48 |
# File 'lib/snatch/clean/html.rb', line 46 def doc @doc end |
#working_directory ⇒ Object
Returns the value of attribute working_directory.
46 47 48 |
# File 'lib/snatch/clean/html.rb', line 46 def working_directory @working_directory end |
Class Method Details
.html_encode(string) ⇒ Object
57 58 59 |
# File 'lib/snatch/clean/html.rb', line 57 def self.html_encode(string) string.gsub(/./){ |char| "&#x#{char.unpack('U')[0].to_s(16)};" } end |
.update(doc, working_directory) ⇒ Object
53 54 55 |
# File 'lib/snatch/clean/html.rb', line 53 def self.update(doc, working_directory) new(doc, working_directory).update end |
.url_encode(string) ⇒ Object
61 62 63 |
# File 'lib/snatch/clean/html.rb', line 61 def self.url_encode(string) string.gsub(/./) { |char| '%' + char.unpack('H2' * char.size).join('%').upcase } end |
Instance Method Details
#update ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/snatch/clean/html.rb', line 65 def update @doc.css('base, meta[name=generator]').each { |node| node.remove } @doc.search('//comment()').remove klass = Class.new { include HrefFixMethods }.new HrefFixMethods.instance_methods.each do |m| @doc.css('a[href]').each { |a| klass.send m, a } end klass = Class.new { include SrcFixMethods }.new SrcFixMethods.instance_methods.each do |m| @doc.css('[src]').each { |a| klass.send m, a } end end |