Module: WWMD
- Defined in:
- lib/wwmd/page/form_array.rb,
lib/wwmd.rb,
lib/wwmd/urlparse.rb,
lib/wwmd/page/auth.rb,
lib/wwmd/page/form.rb,
lib/wwmd/page/page.rb,
lib/wwmd/viewstate.rb,
lib/wwmd/wwmd_puts.rb,
lib/wwmd/page/inputs.rb,
lib/wwmd/page/scrape.rb,
lib/wwmd/page/spider.rb,
lib/wwmd/wwmd_config.rb,
lib/wwmd/page/headers.rb,
lib/wwmd/page/helpers.rb,
lib/wwmd/page/constants.rb,
lib/wwmd/page/irb_helpers.rb,
lib/wwmd/viewstate/viewstate.rb,
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/reporting_helpers.rb,
lib/wwmd/page/html2text_nokogiri.rb,
lib/wwmd/page/parsing_convenience.rb,
lib/wwmd/viewstate/viewstate_types.rb,
lib/wwmd/viewstate/viewstate_utils.rb,
lib/wwmd/viewstate/vs_stubs/vs_list.rb,
lib/wwmd/viewstate/vs_stubs/vs_pair.rb,
lib/wwmd/viewstate/vs_stubs/vs_type.rb,
lib/wwmd/viewstate/vs_stubs/vs_unit.rb,
lib/wwmd/viewstate/vs_stubs/vs_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_value.rb,
lib/wwmd/viewstate/viewstate_from_xml.rb,
lib/wwmd/viewstate/vs_stubs/vs_string.rb,
lib/wwmd/viewstate/vs_stubs/vs_triplet.rb,
lib/wwmd/viewstate/vs_stubs/vs_int_enum.rb,
lib/wwmd/viewstate/vs_stubs/vs_hashtable.rb,
lib/wwmd/viewstate/vs_stubs/vs_read_types.rb,
lib/wwmd/viewstate/vs_stubs/vs_read_value.rb,
lib/wwmd/viewstate/vs_stubs/vs_hybrid_dict.rb,
lib/wwmd/viewstate/vs_stubs/vs_sparse_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_string_array.rb,
lib/wwmd/viewstate/vs_stubs/vs_stub_helpers.rb,
lib/wwmd/viewstate/vs_stubs/vs_indexed_string.rb,
lib/wwmd/viewstate/vs_stubs/vs_string_formatted.rb,
lib/wwmd/class_extensions/mixins_string_encoding.rb,
lib/wwmd/viewstate/vs_stubs/vs_binary_serialized.rb,
lib/wwmd/viewstate/viewstate_deserializer_methods.rb,
lib/wwmd/viewstate/vs_stubs/vs_indexed_string_ref.rb
Overview
Place methods to character encodings here
Defined Under Namespace
Modules: Encoding, VSStubHelpers, VSStubs, ViewStateUtils Classes: Field, Form, FormArray, Inputs, Page, Scrape, Spider, URLParse, ViewState, WWMDConfig
Constant Summary collapse
- VERSION =
:stopdoc:
"0.2.20.3"
- PARSER =
:nokogiri || :hpricot
:nokogiri
- LIBPATH =
::File.(::File.dirname(__FILE__)) + ::File::SEPARATOR
- PATH =
::File.dirname(LIBPATH) + ::File::SEPARATOR
- HDOC =
Hpricot
- LINKS_REGEXP =
[ /window\.open\s*\(([^\)]+)/i, /open_window\s*\(([^\)]+)/i, /window\.location\s*=\s*(['"][^'"]+['"])/i, /.*location.href\s*=\s*(['"][^'"]+['"])/i, /document.forms.*action\s*=\s*(['"][^'"]+['"])/i, /Ajax\.Request\s*\((['"][^'"]+['"])/i, ]
- XSSFISH =
"<;'\"}()[]>{"
- DEFAULTS =
{ :base_url => "", :use_auth => true, :enable_cookies => true, :cookiejar => "./__cookiejar", :follow_location => true, :max_redirects => 20, :use_proxy => false, :debug => false, :scrape_warn => true, :parse => true, :timeout => 20, }
- ESCAPE =
{ :url => /[^a-zA-Z0-9\-_%]/, :nalnum => /[^a-zA-Z0-9]/, :xss => /[^a-zA-Z0-9=?()']/, :ltgt => /[<>]/, :all => /.*/, # :b64 => /[=+\/]/, :b64 => /[^a-zA-Z0-9]/, :none => :none, :default => :default, }
- UA =
{ :mozilla => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16", :moz3 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.0.1) Gecko/2008070206 Firefox/3.0.1", :ie6 => "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", :ie7 => "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", :ie8 => "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)", :opera => "Opera/9.20 (Windows NT 6.0; U; en)", :safari => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_4_11; en) AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.22", :safari4 => "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-us) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Safari/530.17", :wwmd => "Mozilla/5.0 (compatible; WWMD #{WWMD::VERSION}; o_hai)" }
- DEFAULT_HEADERS =
{ "User-Agent" => UA[:wwmd], "Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language" => "en-US,en;q=0.8,en-au;q=0.6,en-us;q=0.4,en;q=0.2", "Accept-Encoding" => "gzip,deflate", "Accept-Charset" => "SO-8859-1,utf-8;q=0.7,*;q=0.7", "Keep-Alive" => "300", "Connection" => "keep-alive", }
- HEADERS =
{ :default => nil, :utf7 => { "Content-Type" => "application/x-www-form-urlencoded;charset=UTF-7", "Content-Transfer-Encoding" => "7bit", }, :ajax => { "X-Requested-With" => "XMLHttpRequest", "X-Prototype-Version" => "1.5.0", }, }
- InlineTags =
['a','abbr','acronym','address','b','bdo','big','cite','code','del','dfn','em','font','i','ins','kbd','label','noframes','noscript','q','s','samp','small','span','strike','strong','sub','sup','td','th','tt','u','html','body','table']
- BlockTags =
['blockquote','br','center','dd','div','fieldset','form','h1','h2','h3', 'h4','h5','h6','hr','p','pre','tr','var',]
- ListTags =
['dir','dl','menu','ol','ul']
- ItemTags =
['li','dt']
- INLINETAGS =
['a','abbr','acronym','address','b','bdo','big','cite', 'code','del','dfn','em','font','i','ins','kbd','label', 'noframes','noscript','q','s','samp','small','span', 'strike','strong','sub','sup','td','th','tt','u', 'html','body','table']
- BLOCKTAGS =
['blockquote','center','dd','div','fieldset','form', 'h1','h2','h3','h4','h5','h6','p','pre','tr','var',]
- LISTTAGS =
['dir','dl','menu','ol','ul']
- ITEMTAGS =
['li','dt']
- SPECIALTAGS =
['br','hr']
- VIEWSTATE_TYPES =
{ # System.Web.UI.LosFormatter # System.Web.UI.ObjectStateFormatter # .DeserializeValue 0x00 => :debug, ##X debugging 0x01 => :int16, #RX private const byte Token_Int16 = 1; 0x02 => :int32, #RX private const byte Token_Int32 = 2; 0x03 => :byte, #RX private const byte Token_Byte = 3; 0x04 => :char, #RX private const byte Token_Char = 4; 0x05 => :string, ##X private const byte Token_String = 5; 0x06 => :date_time, #RX private const byte Token_DateTime = 6; 0x07 => :double, #RX private const byte Token_Double = 7; 0x08 => :single, #RX private const byte Token_Single = 8; 0x09 => :color, ##X private const byte Token_Color = 9; 0x0a => :known_color, ##X private const byte Token_KnownColor = 10; 0x0b => :int_enum, ##X private const byte Token_IntEnum = 11; 0x0c => :empty_color, #VX private const byte Token_EmptyColor = 12; 0x0f => :pair, ##X private const byte Token_Pair = 15; 0x10 => :triplet, ##X private const byte Token_Triplet = 0x10; 0x14 => :array, ##X private const byte Token_Array = 20; 0x15 => :string_array, ##X private const byte Token_StringArray = 0x15; 0x16 => :list, ##X private const byte Token_ArrayList = 0x16; 0x17 => :hashtable, ##X private const byte Token_Hashtable = 0x17 0x18 => :hybrid_dict, ##X private const byte Token_HybridDictionary = 0x18; 0x19 => :type, ##X private const byte Token_Type = 0x19; 0x1b => :unit, ##X private const byte Token_Unit = 0x1b; 0x1c => :empty_unit, #VX private const byte Token_EmptyUnit = 0x1c; 0x1e => :indexed_string, ##X private const byte Token_IndexedStringAdd = 30; 0x1f => :indexed_string_ref, ##X private const byte Token_IndexedString = 0x1f; 0x28 => :string_formatted, ##X private const byte Token_StringFormatted = 40; 0x29 => :typeref_add, ##X private const byte Token_TypeRefAdd = 0x29; 0x2a => :typeref_add_local, ##X private const byte Token_TypeRefAddLocal = 0x2a; 0x2b => :typeref, ##X private const byte Token_TypeRef = 0x2b; 0x32 => :binary_serialized, ##X private const byte Token_BinarySerialized = 50; 0x3c => :sparse_array, ##X private const byte Token_SparseArray = 60; 0x64 => :null, #VX private const byte Token_Null = 100; 0x65 => :empty_byte, #VX private const byte Token_EmptyString = 0x65; 0x66 => :zeroint32, #VX private const byte Token_ZeroInt32 = 0x66; 0x67 => :bool_true, #VX private const byte Token_True = 0x67; 0x68 => :bool_false, #VX private const byte Token_False = 0x68; }
Instance Attribute Summary collapse
-
#console ⇒ Object
Returns the value of attribute console.
-
#debug ⇒ Object
Returns the value of attribute debug.
Class Method Summary collapse
-
.libpath(*args) ⇒ Object
Returns the library path for the module.
-
.path(*args) ⇒ Object
Returns the lpath for the module.
-
.require_all_libs_relative_to(fname, dir = nil) ⇒ Object
Utility method used to require all files ending in .rb that lie in the directory below this file that has the same name as the filename passed in.
-
.version ⇒ Object
Returns the version string for the library.
Instance Method Summary collapse
- #load_config(file) ⇒ Object
- #parse_opts(args) ⇒ Object
- #putd(*args) ⇒ Object
- #putw(*args) ⇒ Object
- #putx(*args) ⇒ Object
Instance Attribute Details
#console ⇒ Object
Returns the value of attribute console.
2 3 4 |
# File 'lib/wwmd/wwmd_puts.rb', line 2 def console @console end |
#debug ⇒ Object
Returns the value of attribute debug.
3 4 5 |
# File 'lib/wwmd/wwmd_puts.rb', line 3 def debug @debug end |
Class Method Details
.libpath(*args) ⇒ Object
Returns the library path for the module. If any arguments are given, they will be joined to the end of the libray path using File.join
.
40 41 42 |
# File 'lib/wwmd.rb', line 40 def self.libpath( *args ) args.empty? ? LIBPATH : ::File.join(LIBPATH, args.flatten) end |
.path(*args) ⇒ Object
Returns the lpath for the module. If any arguments are given, they will be joined to the end of the path using File.join
.
48 49 50 |
# File 'lib/wwmd.rb', line 48 def self.path( *args ) args.empty? ? PATH : ::File.join(PATH, args.flatten) end |
.require_all_libs_relative_to(fname, dir = nil) ⇒ Object
Utility method used to require all files ending in .rb that lie in the directory below this file that has the same name as the filename passed in. Optionally, a specific directory name can be passed in such that the filename does not have to be equivalent to the directory.
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/wwmd.rb', line 57 def self.require_all_libs_relative_to( fname, dir = nil ) dir ||= ::File.basename(fname, '.*') search_me = ::File.( ::File.join(::File.dirname(fname), dir, '**', '*.rb')) Dir.glob(search_me).sort.each do |rb| next if rb =~ /html2text_/ require rb end end |
.version ⇒ Object
Returns the version string for the library.
32 33 34 |
# File 'lib/wwmd.rb', line 32 def self.version VERSION end |
Instance Method Details
#load_config(file) ⇒ Object
9 10 11 12 13 14 15 16 17 18 |
# File 'lib/wwmd/wwmd_config.rb', line 9 def load_config(file) begin config = YAML.load_file(file) rescue => e putw "config file not found #{file}" putw e.inspect exit end return config end |
#parse_opts(args) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/wwmd/wwmd_config.rb', line 20 def parse_opts(args) inopts = Hash.new inopts[:max_redirects] = 10 inopts[:timeout] = 30 inopts[:scrape_warn] = false opts = OptionParser.new do |opts| # set defaults opts.on("-p", "--password PASSWORD", "Password") { |v| inopts[:password] = v } opts.on("-u", "--username USERNAME", "Username") { |v| inopts[:username] = v } opts.on("--header_file HEADER_FILE","Header file") { |v| inopts[:header_file] = v } opts.on("--base_url BASE_URL","Base url") { |v| inopts[:base_url] = v } opts.on("--use_proxy PROXY_URL", "Use proxy at url") do |v| ENV['HTTP_PROXY'] = "http://" + v.to_s inopts[:use_proxy] = true inopts[:proxy_url] = v end opts.on("--no_proxy","do not use proxy") do |v| inopts[:use_proxy] = false inopts[:proxy_url] = nil end opts.on("--use_auth","login before getting url") { |v| inopts[:use_auth] = true } opts.on("--no_auth","no login before getting url") { |v| inopts[:use_auth] = false } opts.on("--debug","debugging really doesn't work") { |v| inopts[:debug] = true } opts.on_tail("-h", "--help", "Show this message") do puts opts exit end end opts.parse!(args) return inopts end |
#putd(*args) ⇒ Object
6 |
# File 'lib/wwmd/wwmd_puts.rb', line 6 def putd(*args); puts *args if WWMD::debug; end |