Module: HttpTools

Included in:
ETL::Parser::ApacheCombinedLogParser
Defined in:
lib/etl/http_tools.rb

Overview

Module which has utility methods for HTTP.

Instance Method Summary collapse

Instance Method Details

#parse_uri(uri_string, options = {}) ⇒ Object

Parse a URI. If options is set then prepend it to the keys for the hash that is returned.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/etl/http_tools.rb', line 111

def parse_uri(uri_string, options={})
  prefix = options[:prefix] ||= ''
  empty_hash = {
    "#{prefix}scheme".to_sym => nil, 
    "#{prefix}host".to_sym => nil, 
    "#{prefix}port".to_sym => nil, 
    "#{prefix}uri_path".to_sym => nil, 
    "#{prefix}domain".to_sym => nil
  }
  if uri_string
    #attempt to parse uri --if it's a uri then catch the problem and set everything to nil
    begin
      uri = URI.parse(uri_string)    
      results = {
        "#{prefix}scheme".to_sym => uri.scheme, 
        "#{prefix}host".to_sym => uri.host, 
        "#{prefix}port".to_sym => uri.port, 
        "#{prefix}uri_path".to_sym => uri.path
      }
      results["#{prefix}domain".to_sym] = $1 if uri.host =~ /\.?([^\.]+\.[^\.]+$)/
      results
    rescue
      empty_hash
    end
  else
    empty_hash
  end
end

#parse_user_agent(user_agent) ⇒ Object

Parse the given user agent string

Code taken from gemtacular.com/gems/ParseUserAgent



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/etl/http_tools.rb', line 8

def parse_user_agent(user_agent)
  if '-' == user_agent
    #raise 'Invalid User Agent'
    #puts 'Invalid User Agent'
  end
  
  browser, browser_version_major, browser_version_minor, ostype, os, os_version = nil

  # fix Opera
  #useragent =~ s/Opera (\d)/Opera\/$1/i;
  useragent = user_agent.gsub(/(Opera [\d])/,'Opera\1')

  # grab all Agent/version strings as 'agents'
  agents = Array.new
  user_agent.split(/\s+/).each {|string| 
    if string =~ /\//
      agents<< string
    end
  }

  # cycle through the agents to set browser and version (MSIE is set later)
  if agents && agents.length > 0
      agents.each {|agent|
        parts = agent.split('/')
        browser = parts[0]
        browser_version = parts[1]
        if browser == 'Firefox'
          browser_version_major = parts[1].slice(0,3)
          browser_version_minor = parts[1].sub(browser_version_major,'').sub('.','')
        elsif browser == 'Safari'
          if parts[1].slice(0,3).to_f < 400
            browser_version_major = '1'
          else
            browser_version_major = '2'
          end
        else
          browser_version_major = parts[1].slice(0,1)
        end
      }
  end

  # grab all of the properties (within parens)
  # should be in relation to the agent if possible  
  detail = user_agent
  user_agent.gsub(/\((.*)\)/,'').split(/\s/).each {|part| detail = detail.gsub(part,'')}
  detail = detail.gsub('(','').gsub(')','').lstrip
  properties = detail.split(/;\s+/)

  # cycle through the properties to set known quantities
  properties.each do |property| 
    if property =~ /^Win/
      ostype = 'Windows'
      os = property
      if parts = property.split(/ /,2)
        if parts[1] =~ /^NT/
          ostype = 'Windows'
          subparts = parts[1].split(/ /,2)
          if subparts[1] == '5'
            os_version = '2000'
          elsif subparts[1] == '5.1'
            os_version = 'XP'
          else
            os_version = subparts[1]
          end
        end
      end
    end
    if property == 'Macintosh'
      ostype = 'Macintosh'
      os = property
    end
    if property =~ /OS X/
      ostype = 'Macintosh'
      os_version = 'OS X'
      os = property
    end
    if property =~ /^Linux/
      ostype = 'Linux'
      os = property
    end
    if property =~ /^MSIE/
      browser = 'MSIE'
      browser_version = property.gsub('MSIE ','').lstrip
      browser_version_major,browser_version_minor = browser_version.split('.')
    end
  end
  
  result = {
    :browser => browser, 
    :browser_version_major => browser_version_major, 
    :browser_version_minor => browser_version_minor, 
    :ostype => ostype, 
    :os_version => os_version,
    :os => os,
  }
  result.each do |key, value|
    result[key] = value.blank? ? nil : value.strip
  end
  result
end