Class: Mechanize::Util

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize/util.rb

Constant Summary collapse

CODE_DIC =
{
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP",
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}

Class Method Summary collapse

Class Method Details

.build_query_string(parameters, enc = nil) ⇒ Object



12
13
14
15
16
17
# File 'lib/mechanize/util.rb', line 12

def build_query_string(parameters, enc=nil)
  parameters.map { |k,v|
    # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
    [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
  }.compact.join('&')
end

.detect_charset(src) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/mechanize/util.rb', line 54

def detect_charset(src)
  tmp = NKF.guess(src || "<html></html>")
  if RUBY_VERSION >= "1.9.0"
    enc = tmp.to_s.upcase
  else
    enc = NKF.constants.find{|c|
      NKF.const_get(c) == tmp
    }
    enc = CODE_DIC[enc.intern]
  end
  enc || "ISO-8859-1"
end

.from_native_charset(s, code) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/mechanize/util.rb', line 29

def from_native_charset(s, code)
  return s unless s && code
  return s unless Mechanize.html_parser == Nokogiri::HTML

  begin
    Iconv.iconv(code.to_s, "UTF-8", s).join("")
  rescue Iconv::InvalidEncoding
    s
  end
end

.html_unescape(s) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/mechanize/util.rb', line 40

def html_unescape(s)
  return s unless s
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
    number = case match
             when /&(\w+);/
               Mechanize.html_parser::NamedCharacters[$1]
             when /&#([0-9]+);/
               $1.to_i
             end

    number ? ([number].pack('U') rescue match) : match
  }
end

.to_native_charset(s, code = nil) ⇒ Object



19
20
21
22
23
24
25
26
27
# File 'lib/mechanize/util.rb', line 19

def to_native_charset(s, code=nil)
  if Mechanize.html_parser == Nokogiri::HTML
    return unless s
    code ||= detect_charset(s)
    Iconv.iconv("UTF-8", code, s).join("")
  else
    s
  end
end