Module: Newscrapi::Encoding
- Defined in:
- lib/newscrapi/encoding.rb
Class Method Summary collapse
Class Method Details
.get_html_doc_with_changed_encoding(obj, encode_to) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/newscrapi/encoding.rb', line 17 def self.get_html_doc_with_changed_encoding(obj, encode_to) doc, page = parse_parameters_doc_page(obj) if encode_to guessed_encoding = guess_html_encoding(page) if guessed_encoding != encode_to doc = doc.serialize(:encoding => encode_to) page = doc.to_s return Nokogiri::HTML(page) end end doc end |
.guess_html_encoding(obj) ⇒ Object
9 10 11 12 13 14 15 |
# File 'lib/newscrapi/encoding.rb', line 9 def self.guess_html_encoding(obj) doc, page = parse_parameters_doc_page(obj) = doc. return unless .nil? CharDet.detect(page)['encoding'] end |