Module: Newscrapi::Encoding

Defined in:
lib/newscrapi/encoding.rb

Class Method Summary collapse

Class Method Details

.get_html_doc_with_changed_encoding(obj, encode_to) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/newscrapi/encoding.rb', line 17

def self.get_html_doc_with_changed_encoding(obj, encode_to)
  doc, page = parse_parameters_doc_page(obj)

  if encode_to
    guessed_encoding = guess_html_encoding(page)
    if guessed_encoding != encode_to
      doc = doc.serialize(:encoding => encode_to)
      page = doc.to_s
      return Nokogiri::HTML(page)
    end
  end
  doc
end

.guess_html_encoding(obj) ⇒ Object



9
10
11
12
13
14
15
# File 'lib/newscrapi/encoding.rb', line 9

def self.guess_html_encoding(obj)
  doc, page = parse_parameters_doc_page(obj)

  meta_encoding = doc.meta_encoding
  return meta_encoding unless meta_encoding.nil?
  CharDet.detect(page)['encoding']
end