Module: WWMD

Defined in:
lib/wwmd/page/form_array.rb,
lib/wwmd.rb,
lib/wwmd/urlparse.rb,
lib/wwmd/page/auth.rb,
lib/wwmd/page/form.rb,
lib/wwmd/page/page.rb,
lib/wwmd/viewstate.rb,
lib/wwmd/wwmd_puts.rb,
lib/wwmd/page/inputs.rb,
lib/wwmd/page/scrape.rb,
lib/wwmd/page/spider.rb,
lib/wwmd/wwmd_config.rb,
lib/wwmd/page/headers.rb,
lib/wwmd/page/helpers.rb,
lib/wwmd/page/constants.rb,
lib/wwmd/page/irb_helpers.rb,
lib/wwmd/viewstate/viewstate.rb,
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/reporting_helpers.rb,
lib/wwmd/page/html2text_nokogiri.rb,
lib/wwmd/page/parsing_convenience.rb,
lib/wwmd/viewstate/viewstate_types.rb,
lib/wwmd/viewstate/viewstate_utils.rb,
lib/wwmd/viewstate/vs_stubs/vs_list.rb,
lib/wwmd/viewstate/vs_stubs/vs_pair.rb,
lib/wwmd/viewstate/vs_stubs/vs_type.rb,
lib/wwmd/viewstate/vs_stubs/vs_unit.rb,
lib/wwmd/viewstate/vs_stubs/vs_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_value.rb,
lib/wwmd/viewstate/viewstate_from_xml.rb,
lib/wwmd/viewstate/vs_stubs/vs_string.rb,
lib/wwmd/viewstate/vs_stubs/vs_triplet.rb,
lib/wwmd/viewstate/vs_stubs/vs_int_enum.rb,
lib/wwmd/viewstate/vs_stubs/vs_hashtable.rb,
lib/wwmd/viewstate/vs_stubs/vs_read_types.rb,
lib/wwmd/viewstate/vs_stubs/vs_read_value.rb,
lib/wwmd/viewstate/vs_stubs/vs_hybrid_dict.rb,
lib/wwmd/viewstate/vs_stubs/vs_sparse_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_string_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_stub_helpers.rb,
lib/wwmd/viewstate/vs_stubs/vs_indexed_string.rb,
lib/wwmd/viewstate/vs_stubs/vs_string_formatted.rb,
lib/wwmd/class_extensions/mixins_string_encoding.rb,
lib/wwmd/viewstate/vs_stubs/vs_binary_serialized.rb,
lib/wwmd/viewstate/viewstate_deserializer_methods.rb,
lib/wwmd/viewstate/vs_stubs/vs_indexed_string_ref.rb

Overview

Place methods to character encodings here

Defined Under Namespace

Modules: Encoding, VSStubHelpers, VSStubs, ViewStateUtils Classes: Field, Form, FormArray, Inputs, Page, Scrape, Spider, URLParse, ViewState, WWMDConfig

Constant Summary collapse

VERSION =

:stopdoc:

"0.2.20.3"
PARSER =

:nokogiri || :hpricot

:nokogiri
LIBPATH =
::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
PATH =
::File.dirname(LIBPATH) + ::File::SEPARATOR
HDOC =
Hpricot
[
  /window\.open\s*\(([^\)]+)/i,
  /open_window\s*\(([^\)]+)/i,
  /window\.location\s*=\s*(['"][^'"]+['"])/i,
  /.*location.href\s*=\s*(['"][^'"]+['"])/i,
  /document.forms.*action\s*=\s*(['"][^'"]+['"])/i,
  /Ajax\.Request\s*\((['"][^'"]+['"])/i,
]
XSSFISH =
"<;'\"}()[]>{"
DEFAULTS =
{
  :base_url => "",
  :use_auth => true,
  :enable_cookies => true,
  :cookiejar => "./__cookiejar",
  :follow_location => true,
  :max_redirects => 20,
  :use_proxy => false,
  :debug => false,
  :scrape_warn => true,
  :parse => true,
  :timeout => 20,
}
ESCAPE =
{
  :url     => /[^a-zA-Z0-9\-_%]/,
  :nalnum  => /[^a-zA-Z0-9]/,
  :xss     => /[^a-zA-Z0-9=?()']/,
  :ltgt    => /[<>]/,
  :all     => /.*/,
#    :b64     => /[=+\/]/,
  :b64     => /[^a-zA-Z0-9]/,
  :none    => :none,
  :default => :default,
}
UA =
{
  :mozilla => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16",
  :moz3 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.1) Gecko/2008070206 Firefox/3.0.1",
  :ie6 => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
  :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
  :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)",
  :opera => "Opera/9.20 (Windows NT 6.0; U; en)",
  :safari => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22",
  :safari4 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17",
  :wwmd => "Mozilla/5.0 (compatible; WWMD #{WWMD::VERSION}; o_hai)"
}
DEFAULT_HEADERS =
{
  "User-Agent" => UA[:wwmd],
  "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
  "Accept-Language" => "en-US,en;q=0.8,en-au;q=0.6,en-us;q=0.4,en;q=0.2",
  "Accept-Encoding" => "gzip,deflate",
  "Accept-Charset" => "SO-8859-1,utf-8;q=0.7,*;q=0.7",
  "Keep-Alive" => "300",
  "Connection" => "keep-alive",
}
HEADERS =
{
  :default => nil,
  :utf7 => {
    "Content-Type" => "application/x-www-form-urlencoded;charset=UTF-7",
    "Content-Transfer-Encoding" => "7bit",
  },
  :ajax => {
    "X-Requested-With" => "XMLHttpRequest",
    "X-Prototype-Version" => "1.5.0",
  },
}
InlineTags =
['a','abbr','acronym','address','b','bdo','big','cite','code','del','dfn','em','font','i','ins','kbd','label','noframes','noscript','q','s','samp','small','span','strike','strong','sub','sup','td','th','tt','u','html','body','table']
BlockTags =
['blockquote','br','center','dd','div','fieldset','form','h1','h2','h3', 'h4','h5','h6','hr','p','pre','tr','var',]
ListTags =
['dir','dl','menu','ol','ul']
ItemTags =
['li','dt']
INLINETAGS =
['a','abbr','acronym','address','b','bdo','big','cite',
'code','del','dfn','em','font','i','ins','kbd','label',
'noframes','noscript','q','s','samp','small','span',
'strike','strong','sub','sup','td','th','tt','u',
'html','body','table']
BLOCKTAGS =
['blockquote','center','dd','div','fieldset','form',
'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
LISTTAGS =
['dir','dl','menu','ol','ul']
ITEMTAGS =
['li','dt']
SPECIALTAGS =
['br','hr']
VIEWSTATE_TYPES =
{
# System.Web.UI.LosFormatter
# System.Web.UI.ObjectStateFormatter
#   .DeserializeValue

  0x00 => :debug,               ##X  debugging
  0x01 => :int16,               #RX  private const byte Token_Int16 = 1;
  0x02 => :int32,               #RX  private const byte Token_Int32 = 2;
  0x03 => :byte,                #RX  private const byte Token_Byte = 3;
  0x04 => :char,                #RX  private const byte Token_Char = 4;
  0x05 => :string,              ##X  private const byte Token_String = 5;
  0x06 => :date_time,           #RX  private const byte Token_DateTime = 6;
  0x07 => :double,              #RX  private const byte Token_Double = 7;
  0x08 => :single,              #RX  private const byte Token_Single = 8;
  0x09 => :color,               ##X  private const byte Token_Color = 9;
  0x0a => :known_color,         ##X  private const byte Token_KnownColor = 10;
  0x0b => :int_enum,            ##X  private const byte Token_IntEnum = 11;
  0x0c => :empty_color,         #VX  private const byte Token_EmptyColor = 12;
  0x0f => :pair,                ##X  private const byte Token_Pair = 15;
  0x10 => :triplet,             ##X  private const byte Token_Triplet = 0x10;
  0x14 => :array,               ##X  private const byte Token_Array = 20;
  0x15 => :string_array,        ##X  private const byte Token_StringArray = 0x15;
  0x16 => :list,                ##X  private const byte Token_ArrayList = 0x16;
  0x17 => :hashtable,           ##X  private const byte Token_Hashtable = 0x17
  0x18 => :hybrid_dict,         ##X  private const byte Token_HybridDictionary = 0x18;
  0x19 => :type,                ##X  private const byte Token_Type = 0x19;
  0x1b => :unit,                ##X  private const byte Token_Unit = 0x1b;
  0x1c => :empty_unit,          #VX  private const byte Token_EmptyUnit = 0x1c;
  0x1e => :indexed_string,      ##X  private const byte Token_IndexedStringAdd = 30;
  0x1f => :indexed_string_ref,  ##X  private const byte Token_IndexedString = 0x1f;
  0x28 => :string_formatted,    ##X  private const byte Token_StringFormatted = 40;
  0x29 => :typeref_add,         ##X  private const byte Token_TypeRefAdd = 0x29;
  0x2a => :typeref_add_local,   ##X  private const byte Token_TypeRefAddLocal = 0x2a;
  0x2b => :typeref,             ##X  private const byte Token_TypeRef = 0x2b;
  0x32 => :binary_serialized,   ##X  private const byte Token_BinarySerialized = 50;
  0x3c => :sparse_array,        ##X  private const byte Token_SparseArray = 60;
  0x64 => :null,                #VX  private const byte Token_Null = 100;
  0x65 => :empty_byte,          #VX  private const byte Token_EmptyString = 0x65;
  0x66 => :zeroint32,           #VX  private const byte Token_ZeroInt32 = 0x66;
  0x67 => :bool_true,           #VX  private const byte Token_True = 0x67;
  0x68 => :bool_false,          #VX  private const byte Token_False = 0x68;
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#consoleObject

Returns the value of attribute console.



2
3
4
# File 'lib/wwmd/wwmd_puts.rb', line 2

def console
  @console
end

#debugObject

Returns the value of attribute debug.



3
4
5
# File 'lib/wwmd/wwmd_puts.rb', line 3

def debug
  @debug
end

Class Method Details

.libpath(*args) ⇒ Object

Returns the library path for the module. If any arguments are given, they will be joined to the end of the libray path using File.join.



40
41
42
# File 'lib/wwmd.rb', line 40

def self.libpath( *args )
  args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten)
end

.path(*args) ⇒ Object

Returns the lpath for the module. If any arguments are given, they will be joined to the end of the path using File.join.



48
49
50
# File 'lib/wwmd.rb', line 48

def self.path( *args )
  args.empty? ? PATH : ::File.join(PATH, args.flatten)
end

.require_all_libs_relative_to(fname, dir = nil) ⇒ Object

Utility method used to require all files ending in .rb that lie in the directory below this file that has the same name as the filename passed in. Optionally, a specific directory name can be passed in such that the filename does not have to be equivalent to the directory.



57
58
59
60
61
62
63
64
65
66
# File 'lib/wwmd.rb', line 57

def self.require_all_libs_relative_to( fname, dir = nil )
  dir ||= ::File.basename(fname, '.*')
  search_me = ::File.expand_path(
      ::File.join(::File.dirname(fname), dir, '**', '*.rb'))

  Dir.glob(search_me).sort.each do |rb|
    next if rb =~ /html2text_/
    require rb
  end
end

.versionObject

Returns the version string for the library.



32
33
34
# File 'lib/wwmd.rb', line 32

def self.version
  VERSION
end

Instance Method Details

#load_config(file) ⇒ Object



9
10
11
12
13
14
15
16
17
18
# File 'lib/wwmd/wwmd_config.rb', line 9

def load_config(file)
  begin
    config = YAML.load_file(file)
  rescue => e
    putw "config file not found #{file}"
    putw e.inspect
    exit
  end
  return config
end

#parse_opts(args) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/wwmd/wwmd_config.rb', line 20

def parse_opts(args)
  inopts = Hash.new
  inopts[:max_redirects] = 10
  inopts[:timeout] = 30
  inopts[:scrape_warn] = false
  opts = OptionParser.new do |opts|
  # set defaults
    opts.on("-p", "--password PASSWORD", "Password")     { |v| inopts[:password] = v }
    opts.on("-u", "--username USERNAME", "Username")     { |v| inopts[:username] = v }
    opts.on("--header_file HEADER_FILE","Header file")   { |v| inopts[:header_file] = v }
    opts.on("--base_url BASE_URL","Base url")            { |v| inopts[:base_url] = v }
    opts.on("--use_proxy PROXY_URL", "Use proxy at url") do |v|
      ENV['HTTP_PROXY'] = "http://" + v.to_s
      inopts[:use_proxy] = true
      inopts[:proxy_url] = v
    end
    opts.on("--no_proxy","do not use proxy") do |v|
      inopts[:use_proxy] = false
      inopts[:proxy_url] = nil
    end
    opts.on("--use_auth","login before getting url")     { |v| inopts[:use_auth] = true }
    opts.on("--no_auth","no login before getting url")   { |v| inopts[:use_auth] = false }
    opts.on("--debug","debugging really doesn't work")   { |v| inopts[:debug] = true }
    opts.on_tail("-h", "--help", "Show this message") do
      puts opts
      exit
    end
  end
  opts.parse!(args)
  return inopts
end

#putd(*args) ⇒ Object



6
# File 'lib/wwmd/wwmd_puts.rb', line 6

def putd(*args); puts *args if WWMD::debug; end

#putw(*args) ⇒ Object



8
# File 'lib/wwmd/wwmd_puts.rb', line 8

def putw(*args); puts *args if WWMD::console; end

#putx(*args) ⇒ Object



7
# File 'lib/wwmd/wwmd_puts.rb', line 7

def putx(*args); puts *args if WWMD::console; end