Class: WWW::Mechanize::Util

Inherits:
Object
  • Object
show all
Defined in:
lib/www/mechanize/util.rb

Constant Summary collapse

CODE_DIC =
{
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP", 
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}

Class Method Summary collapse

Class Method Details

.build_query_string(parameters, enc = nil) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/www/mechanize/util.rb', line 13

def build_query_string(parameters, enc=nil)
  parameters.map { |k,v|
    if k
      # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
      [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=")
=begin
      [WEBrick::HTTPUtils.escape_form(k.to_s),
        WEBrick::HTTPUtils.escape_form(v.to_s)].join("=")
=end

    end
  }.compact.join('&')
end

.detect_charset(src) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/www/mechanize/util.rb', line 60

def detect_charset(src)
  tmp = NKF.guess(src || "<html></html>")
  if RUBY_VERSION >= "1.9.0"
    enc = tmp.to_s.upcase
  else
    enc = NKF.constants.find{|c|
      NKF.const_get(c) == tmp
    }
    enc = CODE_DIC[enc.intern]
  end
  enc || "ISO-8859-1"
end

.from_native_charset(s, code) ⇒ Object



37
38
39
40
41
42
43
44
# File 'lib/www/mechanize/util.rb', line 37

def from_native_charset(s, code)
  if Mechanize.html_parser == Nokogiri::HTML
    return unless s
    Iconv.iconv(code, "UTF-8", s).join("")
  else
    return s
  end
end

.html_unescape(s) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/www/mechanize/util.rb', line 46

def html_unescape(s)
  return s unless s
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
    number = case match
    when /&(\w+);/
      Mechanize.html_parser::NamedCharacters[$1]
    when /&#([0-9]+);/
      $1.to_i
    end
  
    number ? ([number].pack('U') rescue match) : match
  }
end

.to_native_charset(s, code = nil) ⇒ Object



27
28
29
30
31
32
33
34
35
# File 'lib/www/mechanize/util.rb', line 27

def to_native_charset(s, code=nil)
  if Mechanize.html_parser == Nokogiri::HTML
    return unless s
    code ||= detect_charset(s)
    Iconv.iconv("UTF-8", code, s).join("")
  else
    s
  end
end