Class: Ronin::URL

Inherits:
Object
  • Object
show all
Includes:
DataMapper::Timestamps, Model, Model::Importable
Defined in:
lib/ronin/url.rb

Overview

Represents URLs that can be stored in the Database.

Constant Summary collapse

SCHEMES =

Mapping of URL Schemes and URI classes

{
  'https' => ::URI::HTTPS,
  'http' => ::URI::HTTP,
  'ftp' => ::URI::FTP
}

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Model::Importable

included

Methods included from Model

included

Class Method Details

.[](url) ⇒ URL?

Searches for a URL.

Parameters:

  • url (URI::HTTP, String)

    The URL to search for.

Returns:

  • (URL, nil)

    The matching URL.

Since:

  • 1.0.0



264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/ronin/url.rb', line 264

def self.[](url)
  return super(url) if url.kind_of?(Integer)

  # optionally parse the URL
  url = ::URI.parse(url.to_s) unless url.kind_of?(::URI)

  # create the initial query
  query = all(
    'scheme.name' => url.scheme,
    'host_name.address' => url.host,
    :path => normalized_path(url),
    :fragment => url.fragment
  )

  if url.port
    # query the port
    query = query.all('port.number' => url.port)
  end

  if url.query
    # add the query params to the query
    URI::QueryParams.parse(url.query).each do |name,value|
      query = query.all(
        'query_params.name.name' => name,
        'query_params.value' => value 
      )
    end
  end

  return query.first
end

.directory(sub_dir) ⇒ Array<URL>

Searches for all URLs sharing a common sub-directory.

Parameters:

  • sub_dir (String)

    The sub-directory to search for.

Returns:

  • (Array<URL>)

    The URL with the common sub-directory.

Since:

  • 1.0.0



196
197
198
# File 'lib/ronin/url.rb', line 196

def self.directory(sub_dir)
  all(:path => sub_dir) | all(:path.like => "#{sub_dir}/%")
end

.extension(ext) ⇒ Array<URL>

Searches for all URLs with a common file-extension.

Parameters:

  • ext (String)

    The file extension to search for.

Returns:

  • (Array<URL>)

    The URLs with the common file-extension.

Since:

  • 1.0.0



213
214
215
# File 'lib/ronin/url.rb', line 213

def self.extension(ext)
  all(:path => "%.#{ext}")
end

.extract(text) {|url| ... } ⇒ Array<URL>

Extracts URLs from the given text.

Parameters:

  • text (String)

    The text to parse.

Yields:

  • (url)

    The given block will be passed each extracted URL.

Yield Parameters:

  • url (URL)

    An extracted URL from the text.

Returns:

  • (Array<URL>)

    If no block is given, an Array of the extracted URLs is returned.

See Also:

Since:

  • 1.3.0



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/ronin/url.rb', line 104

def self.extract(text)
  return enum_for(:extract,text).to_a unless block_given?

  ::URI.extract(text) do |uri|
    uri = begin
            ::URI.parse(uri)
          rescue URI::InvalidURIError
            # URI.extract can parse URIs that URI.parse cannot handle
            next
          end

    yield from(uri)
  end

  return nil
end

.from(uri) ⇒ URL

Creates a new URL.

Parameters:

  • uri (URI::HTTP)

    The URI to create the URL from.

Returns:

  • (URL)

    The new URL.

Since:

  • 1.0.0



309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/ronin/url.rb', line 309

def self.from(uri)
  # find or create the URL scheme, host_name and port
  scheme = URLScheme.first_or_new(:name => uri.scheme)
  host_name = HostName.first_or_new(:address => uri.host)
  port = if uri.port
           TCPPort.first_or_new(:number => uri.port)
         end

  path = normalized_path(uri)
  fragment = uri.fragment

  query_params = []
  
  if uri.respond_to?(:query_params)
    # find or create the URL query params
    uri.query_params.each do |name,value|
      query_params << {
        :name => URLQueryParamName.first_or_new(:name => name),
        :value => value
      }
    end
  end

  # find or create the URL
  return first_or_new(
    :scheme => scheme,
    :host_name => host_name,
    :port => port,
    :path => path,
    :fragment => fragment,
    :query_params => query_params
  )
end

.hosts(names) ⇒ Array<URL>

Searches for URLs with specific host name(s).

Parameters:

  • names (Array<String>, String)

    The host name(s) to search for.

Returns:

  • (Array<URL>)

    The matching URLs.

Since:

  • 1.0.0



162
163
164
# File 'lib/ronin/url.rb', line 162

def self.hosts(names)
  all('host.address' => names)
end

.httpArray<URL>

Searches for all URLs using HTTP.

Returns:

  • (Array<URL>)

    The matching URLs.

Since:

  • 1.0.0



131
132
133
# File 'lib/ronin/url.rb', line 131

def self.http
  all('scheme.name' => 'http')
end

.httpsArray<URL>

Searches for all URLs using HTTPS.

Returns:

  • (Array<URL>)

    The matching URLs.

Since:

  • 1.0.0



145
146
147
# File 'lib/ronin/url.rb', line 145

def self.https
  all('scheme.name' => 'https')
end

.normalized_path(uri) ⇒ String? (protected)

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Normalizes the path of a URI.

Parameters:

  • uri (URI)

    The URI containing the path.

Returns:

  • (String, nil)

    The normalized path.

Since:

  • 1.0.0



501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/ronin/url.rb', line 501

def self.normalized_path(uri)
  case uri
  when URI::HTTP
    # map empty HTTP paths to '/'
    unless uri.path.empty?
      uri.path
    else
      '/'
    end
  else
    uri.path
  end
end

.parse(url) ⇒ URL

Parses the URL.

Parameters:

  • url (String)

    The raw URL to parse.

Returns:

  • (URL)

    The parsed URL.

See Also:

Since:

  • 1.0.0



358
359
360
# File 'lib/ronin/url.rb', line 358

def self.parse(url)
  from(::URI.parse(url))
end

.ports(numbers) ⇒ Array<URL>

Searches for URLs with the specific port number(s).

Parameters:

  • numbers (Array<Integer>, Integer)

    The port numbers to search for.

Returns:

  • (Array<URL>)

    The matching URLs.

Since:

  • 1.0.0



179
180
181
# File 'lib/ronin/url.rb', line 179

def self.ports(numbers)
  all('port.number' => numbers)
end

.query_param(name) ⇒ Array<URL>

Searches for URLs with the given query param.

Parameters:

  • name (Array<String>, String)

    The query param name to search for.

Returns:

  • (Array<URL>)

    The URLs with the given query param.

Since:

  • 1.0.0



230
231
232
# File 'lib/ronin/url.rb', line 230

def self.query_param(name)
  all('query_params.name.name' => name)
end

.query_value(value) ⇒ Array<URL>

Search for all URLs with a given query param value.

Parameters:

  • value (Array<String>, String)

    The query param value to search for.

Returns:

  • (Array<URL>)

    The URLs with the given query param value.

Since:

  • 1.0.0



247
248
249
# File 'lib/ronin/url.rb', line 247

def self.query_value(value)
  all('query_params.value' => value)
end

Instance Method Details

#hostString

The host name of the URL.

Returns:

  • (String)

    The address of host name.

Since:

  • 1.0.0



372
373
374
# File 'lib/ronin/url.rb', line 372

def host
  self.host_name.address
end

#port_numberInteger?

The port number used by the URL.

Returns:

  • (Integer, nil)

    The port number.

Since:

  • 1.0.0



386
387
388
# File 'lib/ronin/url.rb', line 386

def port_number
  self.port.number if self.port
end

#query_stringString

Dumps the URL query params into a URI query string.

Returns:

  • (String)

    The URI query string.

Since:

  • 1.0.0



400
401
402
403
404
405
406
407
408
# File 'lib/ronin/url.rb', line 400

def query_string
  params = {}

  self.query_params.each do |param|
    params[param.name] = param.value
  end

  return URI::QueryParams.dump(params)
end

#query_string=(query) ⇒ String

Sets the query params of the URL.

Parameters:

  • query (String)

    The query string to parse.

Returns:

  • (String)

    The given query string.

Since:

  • 1.0.0



423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/ronin/url.rb', line 423

def query_string=(query)
  self.query_params.clear

  URI::QueryParams.parse(query).each do |name,value|
    self.query_params.new(
      :name => URLQueryParamName.first_or_new(:name => name),
      :value => value
    )
  end

  return query
end

#to_sString

Converts the URL to a String.

Returns:

  • (String)

    The string form of the URL.

Since:

  • 1.0.0



482
483
484
# File 'lib/ronin/url.rb', line 482

def to_s
  self.to_uri.to_s
end

#to_uriURI::HTTP, URI::HTTPS

Builds a URI object from the URL.

Returns:

  • (URI::HTTP, URI::HTTPS)

    The URI object created from the URL attributes.

Since:

  • 1.0.0



446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/ronin/url.rb', line 446

def to_uri
  # map the URL scheme to a URI class
  url_class = SCHEMES.fetch(self.scheme.name,::URI::Generic)

  host = if self.host_name
           self.host_name.address
         end
  port = if self.port
           self.port.number
         end

  query = unless self.query_params.empty?
            self.query_string
          end

  # build the URI
  return url_class.build(
    :scheme => self.scheme.name,
    :host => host,
    :port => port,
    :path => self.path,
    :query => query,
    :fragment => self.fragment
  )
end