Class: Har

Inherits:
Object
  • Object
show all
Defined in:
lib/mu/har.rb

Constant Summary collapse

HTTP_CONTENT_LENGTH_HEADER =
'Content-Length'
HTTP_CONTENT_TYPE_HEADER =
'Content-Type'
HTTP_CONTENT_ENCODING_HEADER =
'Content-Encoding'
HTTP_TRANSFER_ENCODING_HEADER =
'Transfer-Encoding'
HTTP_CONTENT_TRANSFER_ENCODING_HEADER =
'Content-Transfer-Encoding'
HTTP_FORM_CONTENT_TYPE =
'application/x-www-form-urlencoded'
HTTP_TEXT_PLAIN_CONTENT_TYPE =
'text/plain'
HTTP_TEXT_HTML_CONTENT_TYPE =
'text/html'
HTTP_TEXT_XML_CONTENT_TYPE =
'text/xml'
HTTP_APPLICATION_XML_CONTENT_TYPE =
'application/xml'
HTTP_TEXT_JSON_CONTENT_TYPE =
'text/json'
HTTP_APPLICATION_JSON_CONTENT_TYPE =
'application/json'
HTTP_TEXT_JAVASCRIPT_CONTENT_TYPE =
'text/javascript'
HTTP_APPLICATION_JAVASCRIPT_CONTENT_TYPE =
'application/x-javascript'
HTTP_TEXT_CSS_CONTENT_TYPE =
'text/css'
HTTP_TEXT_CONTENT_TYPES =
[ HTTP_FORM_CONTENT_TYPE,
HTTP_TEXT_PLAIN_CONTENT_TYPE,
HTTP_TEXT_HTML_CONTENT_TYPE,
HTTP_TEXT_XML_CONTENT_TYPE,
HTTP_APPLICATION_XML_CONTENT_TYPE,
HTTP_TEXT_JSON_CONTENT_TYPE,
HTTP_APPLICATION_JSON_CONTENT_TYPE,
HTTP_TEXT_JAVASCRIPT_CONTENT_TYPE,
HTTP_APPLICATION_JAVASCRIPT_CONTENT_TYPE,
HTTP_TEXT_CSS_CONTENT_TYPE ]
HTTP_GZIP_CONTENT_ENCODING =
'gzip'
HTTP_DEFLATE_CONTENT_ENCODING =
'deflate'
HTTP_EMPTY_GZIP_BODY =
"\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x03\x03\x00\x00" +
"\x00\x00\x00\x00\x00\x00\x00"
HTTP_CHUNKED_TRANSFER_ENCODING =
'chunked'
HTTP_VIDEO_X_MS_WMV_CONTENT_TYPE =
'video/x-ms-wmv'
HTTP_VIDEO_X_MS_WMA_CONTENT_TYPE =
'audio/x-ms-wma'
HTTP_STREAMING_CONTENT_TYPES =
[ HTTP_VIDEO_X_MS_WMV_CONTENT_TYPE,
HTTP_VIDEO_X_MS_WMA_CONTENT_TYPE ]
HTTP_CONTENT_SLICE_SIZE =
1024
ESCAPES =
Array.new 256 do |i|
    case i
    when 9;        "\\t".freeze
    when 13;       "\\r".freeze
    when 92;       "\\\\".freeze
    when 10;       "\\n".freeze
    when 32..126;  i.chr.freeze
    else ;         ('\x%02x' % i).freeze
    end
end

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(har_file, options, ignores) ⇒ Har

Returns a new instance of Har.



54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/mu/har.rb', line 54

def initialize har_file, options, ignores
    @har_file   = har_file
    @hosts      = {}
    @options    = options
    @ignores    = ignores
    begin
        @har = JSON File.read(@har_file)
    rescue Exception=>e
        puts "There was an error reading the JSON har file, probably a parsing problem"
        raise e
    end

end

Instance Attribute Details

#entriesObject

Returns the value of attribute entries.



52
53
54
# File 'lib/mu/har.rb', line 52

def entries
  @entries
end

#harObject

Returns the value of attribute har.



52
53
54
# File 'lib/mu/har.rb', line 52

def har
  @har
end

#har_fileObject (readonly)

Returns the value of attribute har_file.



51
52
53
# File 'lib/mu/har.rb', line 51

def har_file
  @har_file
end

#har_hostsObject

Returns the value of attribute har_hosts.



52
53
54
# File 'lib/mu/har.rb', line 52

def har_hosts
  @har_hosts
end

#hostsObject

Returns the value of attribute hosts.



52
53
54
# File 'lib/mu/har.rb', line 52

def hosts
  @hosts
end

Instance Method Details

#build_headers(cs, headers, cookies, i) ⇒ Object

Takes the headers



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/mu/har.rb', line 194

def build_headers cs, headers, cookies, i
    headers.each do |header|
        # If this is a Cookie header, try and map the cookie
        # value to a step variable preceding this step
        if ('cookie' === header['name'].downcase)
            
            #value = self.escape(header['value'])
            value  = header['value']
            
            for j in 0..cookies.length
                cookie = cookies[j]

                if defined? cookie['name']
                    step = self.find_cookie cookie['name'], i
                    # THE FOLLOWING LOGIC PROBABLY NEEDS FIXING FOR COOKIES
                    if (step != nil)
                        regex = /"(#{cookie['name']})" + "=([^;]*)"/
                        if value.match regex
                            raise "I KNOW THERE IS A PROBLE WITH THE code below"
                            # TODO: FIX THIS LOGIC
                            value = value.replace(regex, cookie['name'] + '=#{@cr' + step + '.' + cookie['name'].gsub(/[^a-zA-Z0-9_]/, '_') + '}')
                        end
                    end
                end
            end
            cs.line(header['name'] + ': ' + value)
        else
            cs.line(header['name'] + ': ' + header['value'])
        end

    end #end headers.each
end

#build_hosts(hosts) ⇒ Object

end build_postdata



258
259
260
261
262
263
264
265
266
267
268
# File 'lib/mu/har.rb', line 258

def build_hosts hosts
    # TODO: Right now v4 is hard-coded, need to change this
    hosts.create 'host_0', 'v4', 'browser'

    har_hosts   = self.get_hosts @ignores, @options

    # Build the host entries into the scenario
    har_hosts.each do |hhost,har_host_value|
        hosts.create(har_hosts[hhost], 'v4', hhost)
    end
end

#build_payload(cs_send, entry_count, req_res, scenario) ⇒ Object

If the payload is to be included, handle the inclusion logic here



375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
# File 'lib/mu/har.rb', line 375

def build_payload(cs_send, entry_count, req_res, scenario)
    # If it is coming from the client side it will be part of the postData
    payload = req_res.has_key?('content') ? req_res['content']['text'] : req_res['postData']['text'] 
    #raise "req_res #{req_res} PAYLOAD #{payload}"
    return if payload.nil?

    content_encoding  = false
    transfer_encoding = false

    # In case we need encoding types different than gzip, and chunked later on
    req_res['headers'].each do |header|
        if(header['name'] === 'Transfer-Encoding')
            # We can do chunked
            if  header['value'] == HTTP_CHUNKED_TRANSFER_ENCODING
                transfer_encoding = true
            else
                raise NotImplementedError, "Transfer-Encoding: #{header['value']}"
            end

        elsif(header['name'] === 'Content-Encoding')
            # Check out content encoding
            # We can do GZIP
            if  header['value'] == HTTP_GZIP_CONTENT_ENCODING or header['value'] == HTTP_DEFLATE_CONTENT_ENCODING
                content_encoding = true
            else
                raise NotImplementedError, "Content-Encoding: #{header['value']}"
            end

        end
    end

    # http chunked
    if transfer_encoding
        cs_send.literal_no_format("http_chunk_encode(chunk_size: #{req_res['content']['size']}) [")
    end

    # gzip only for now
    if content_encoding
        cs_send.literal_no_format("gzip_compress[")
    end

    # If we need to replace the content with a special repeated field
    if @options.strip_large_content
        body = []
        if req_res.has_key?('content')
            if req_res['content']['size'] > @options.large_content_size
                # Take first 1K bytes from the content and repeat
                body << payload[0, HTTP_CONTENT_SLICE_SIZE]
            else
                # Set response body
                body << payload
            end
        else
            # If the content is empty no need to strip it
            return
        end

        # Yes calculate the number of 1K chunks we need to inject
        count     = req_res['content']['size'] / HTTP_CONTENT_SLICE_SIZE
        remainder = req_res['content']['size'] % HTTP_CONTENT_SLICE_SIZE

        # TODO: If there is a reminder add one more repeat count for now
        if remainder > 0
            count += 1
        end

        # Open the field
        cs_send.literal_no_format("repeat(count: %d) [" % [count])
        # No, write the content as binary string
        cs_send.literal_no_format("\"0h")

        # Write all blocks
        body.each do |block|
            # Write each byte in the block
            block.each_byte do |byte|
                cs_send.literal_no_format("%02x" % byte)
            end
        end
        cs_send.literal_no_format("\"");
    else
        # If the payload contains a string that looks like it will require ruby variable interpolation #{} => \x23{} when escaped
        if(payload =~ /#\{.*\}/)
            new_payload = payload.split(/#\{/).join("#\"\n\"{")
            payload = new_payload
        end
        cs_send.literal("\"" + self.escape(payload) + "\"")
    end


    # Close repeat field
    if @options.strip_large_content
        cs_send.literal_no_format("]")
    end

    # Close content encoding, if needed
    if content_encoding
        cs_send.literal_no_format("]")
    end

    # Close transfer encoding, if needed
    if transfer_encoding
        cs_send.literal_no_format("]")
    end

end

#build_postdata(cs, req, entry_count, scenario) ⇒ Object



228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/mu/har.rb', line 228

def build_postdata cs, req, entry_count, scenario
    
    if req['postData']['params'].length > 0
        cs.header('Content-Length') do
            cs.length_string({'of' => "body_#{entry_count}"})
        end
        cs.line()
        
        cs.block('body_' + "#{entry_count}" + ' = struct [', ']') do
            #assert_equal(req['postData']['mimeType'], 'application/x-www-form-urlencoded', 'unsupported mime type')
            cs.block('dsv(delimiter: "&") [',']') do
                req['postData']['params'].each do |param|
                    cs.block('struct [',']') do
                        cs.string(param['name'] + '=')
                        cs.block('uri_percent_encode [',']') do
                            # This is so we correctly escape strange unicode characters
                            cs.string Regexp.escape(param['value'].bytes.to_a.map(&:chr).join.inspect)
                        end
                    end
                end
            end
        end
    else
        unless(@options.ignore_payload)
            cs.line()
            self.build_payload(cs, entry_count, req, scenario)
        end
    end
end

#build_steps(steps, scenario) ⇒ Object



270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/mu/har.rb', line 270

def build_steps steps, scenario
    # Create the steps
    entries = self.get_entries
    entry_count = 0
    for i in 0..entries.length
        entry  = entries[i]
        next unless defined? entry['musl']

        m      = entry['musl']
        req    = entry['request']
        res    = entry['response']
        next if m.nil? || m['host'].nil?

        xopts  = { 'src' => '&host_0', 'dst' => '&' + "#{m['host']}" }
        xklass = m['url_object'].scheme === 'http' ? 'tcp' : 'ssl'
        xopts['dst_port'] = m['url']['port']
        # Create the xport options in the scenario
        if(@options.skip_ssl_passthru)
            xklass = 'tcp' if xklass == 'ssl' 
        end
        steps.xport("xport#{entry_count}", xklass, xopts)

        # client_send comments for each entry
        steps.comment(req["method"] + ' ' + m['url']['pathname'] + ' ' + req['httpVersion'])

        # main client_send lines for each entry
        steps.client_send("cs#{entry_count}","xport#{entry_count}") do |cs|
            cs.line(req['method'] + ' ' + m['url']['pathname'] + m['url']['search'] + m['url']['hash'] + ' ' + req['httpVersion'])
            # For each header
            self.build_headers  cs, req['headers'], req['cookies'], i

            # For building the form post params block
            if req.has_key?('postData')
                self.build_postdata cs, req, entry_count, scenario
            else
                cs.line('Content-Length: 0')
            end # end if req.has_key('postData')
            cs.line()

            # For building the client content payload block into the scenario
            if req.has_key?('content')
                self.build_payload(cs, entry_count, req, scenario)
            end unless @options.ignore_payload

        end # end steps.client_send

        # Skip the server side if the command line option included --endpint
        #unless @options.endpoint
            steps.server_receive("sr#{entry_count}", "cs#{entry_count}") do ||
                    #return nil
            end

            # For adding comment headers for the server side
            steps.comment(res['httpVersion'] + ' ' + "#{res['status']}" + ' ' + res['statusText'])

            # Build server_send portion
            steps.server_send("ss#{entry_count}", "xport#{entry_count}") do |ss|
                ss.line(res['httpVersion'] + ' ' + "#{res['status']}" + ' ' + res['statusText'])
                res['headers'].each do |header|
                    ss.line(header['name'] + ': ' + header['value'])
                end
                ss.line()

                # For building the server content payload block into the scenario
                if res.has_key?('content')
                    build_payload ss, entry_count, res, scenario
                end unless @options.ignore_payload
            end # end server_send
        #end

        # If the endpoint option is chosen only include cs 'client send' values instead of ss 'server send' values
        receive_side = @options.endpoint ? 'cs' : 'ss'

        # Build client_receive portion
        steps.client_receive("cr#{entry_count}", "#{receive_side}#{entry_count}") do |cr|
            cr.assertions do |as|
                http_version = Regexp.escape(res['httpVersion'])
                http_version.gsub!(/\//,"\\/")
                as.create("/#{http_version} (\\d+)/ == " + "#{res['status']}")
            end

            if (res['cookies'] && res['cookies'].length > 0)
                cr.variables do |vs|
                    seen = {}
                    res['cookies'].each do |cookie|
                        # Protect against empty cookie['name']
                        next if cookie['name'] == ''
                        # Need to escape these characters if they are in the cookie['name']: | ( ) [ ] { } + \ ^ $ * ?.
                        cookie['name'] = Regexp.escape(cookie['name'])
                        # Check to make sure we are not repeating a variable name
                        musl_variable = '@' + cookie['name'].gsub(/[^a-zA-Z0-9_]/, '_') + ' = ' + '/' + cookie['name'] + '=([^;]*)' + '/:1'
                        next if seen.has_key?(musl_variable)
                        seen[musl_variable] = cookie
                        # Build the variable
                        vs.create(musl_variable)
                    end
                    
                end
            end
        end
        entry_count += 1
    end
end

#escape(input, escapes = nil) ⇒ Object

Takes input and a table that maps ascii codes to their representation



508
509
510
511
512
513
514
515
516
517
518
# File 'lib/mu/har.rb', line 508

def escape input, escapes=nil
    #output = input.bytes.to_a.map(&:chr).join.inspect
    #return output

    escapes ||= ESCAPES
    output = []
    input.each_byte do |i|
        output << escapes[i]
    end
    output.join
end

Finds a specific cookie from the response cookies



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/mu/har.rb', line 170

def find_cookie name, step
    if step === 0
        return
    end

    (step-1).downto(0) {|i|
        entry = @entries[i]
        res   = entry['response']
        if (res['cookies'] && res['cookies'].length > 0)
            for j in 0..res['cookies'].length do
                cookie = res['cookies'][j]
                unless cookie.nil?
                    if cookie['name'] === name
                        return i
                    end
                end
            end
        end
    }

    return
end

#generate(ios) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/mu/har.rb', line 73

def generate ios
    # Attempt to create the scenario
    generated = MuSL::Maker.create do |scenario|
        # First declare all the hosts
        scenario.hosts do |host|
            self.build_hosts host
        end
        # Then build all the steps
        scenario.steps do |step|
            self.build_steps step, scenario
        end # scenario.steps
    end # Maker.create
end

#get_entriesObject

Return a list of entries from the har file



69
70
71
# File 'lib/mu/har.rb', line 69

def get_entries
    @entries = @har['log']['entries']
end

#get_hosts(ignores, options) ⇒ Object

Return a list of hosts from the har file



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/mu/har.rb', line 88

def get_hosts ignores, options

    # Regex to check if certain kinds of entries should be ignored, like css, js, images based on command line options
    ignores_regex = ignores.join('|')

    # Iterate through each har entry and parse out the important url pieces
    begin
        hosts_seen = Hash.new(0)
        host_count = 0
        @har["log"]["entries"].each_with_index do |entry, index|
            # Check our command line exclusions
            if(options.ignore)
                next if (entry['response']['content']['mimeType'] =~ /#{ignores_regex}/o )
            end

            # Build the hosts list
            host                 = nil
            m                    = entry["musl"] = {}
            m["url"]             = {}
            # Put this rescue in place for when a url doesn't parse correctly, which is a fairly rare corner case
            # Example: http://online.wsj.com/api-video/get_video_info.asp?guid={65EDE7D2-9261-4C47-BB01-DB841CBA209A}&fields=all&dojo.preventCache=1304591152604
            # The example fails because of the {} so in this rare case I escape the url to pull out the relevant parts and then restore it
            begin
                m["url_object"]      = URI.parse entry["request"]["url"]
                m["url"]["search"]   = m["url_object"].query    || ''
                # The m["url_object"].query doesn't return the leading ? for a search string so we have to add it
                m["url"]["search"]   = "?" + m["url"]["search"] unless m["url"]["search"] == ''
                m["url"]["hash"]     = m["url_object"].fragment || ''
            rescue => e
                m["url_object"]      = URI.parse URI.escape entry["request"]["url"]
                m["url"]["search"]   = URI.unescape m["url_object"].query    || ''
                m["url"]["search"]   = "?" + m["url"]["search"] unless m["url"]["search"] == ''
                m["url"]["hash"]     = URI.unescape m["url_object"].fragment || ''
            end
            m['url']['port']     = m["url_object"].port || (m["url_object"].scheme === 'http' ? 80 : 443)
            m['url']['pathname'] = m['url_object'].path || '/'
            

            # Create the host entry hash with underscores instead of dots for the host value
            entry['request']['headers'].each do |header|
                if (!host && header['name'].downcase === 'host')
                    host = header['value']

                    # Set the hosts_seen value to host_count and increment the host_count unless we have already seen the host
                    hosts_seen[host] += host_count && host_count += 1 unless hosts_seen.has_key?(host)
                    
                    # OLD WAY - Substitute the dots with underscores for the musl host
                    # OLD WAY entry['musl'][host] = host.gsub(/[^a-zA-Z0-9_]/, '_')
                    entry['musl'][host] = hosts_seen[host].to_s

                    if entry['musl'][host].match /^[0-9]/
                        entry['musl'][host] = 'host_' + entry['musl'][host]
                    end
                end
            end
            # request-headers are empty
            next if host.nil? && entry['musl'][host].nil?
            @hosts[host] = entry['musl'][host]
            entry['musl']['host'] = entry['musl'][host]

            host = nil

            if entry['response']['cookies']
                #p "Entry-response-cookies: #{entry['response']['cookies']}"
                entry['response']['cookies'].each do |cookie|
                    # TODO: Need to mark the index of this entry against
                    # this cookie so we can search for it easily.

                end
            end

        end
    rescue Exception => e
        puts e.message
        puts e.backtrace
        raise e
    end

    return @hosts
end

#text_body?Boolean

TO-DO: Implement this check

Returns:

  • (Boolean)


482
483
484
485
486
# File 'lib/mu/har.rb', line 482

def text_body?
    HTTP_TEXT_CONTENT_TYPES.include?(@content_type) and
        not @content_transfer_encoding.to_s == 'binary' and
        not @mu_content_transfer_encoding.to_s == 'binary'
end