Module: Iudex::Worker::FetchHelper

Includes:
Core::Filters, HTTP
Included in:
FilterChainFactory
Defined in:
lib/iudex-worker/fetch_helper.rb

Instance Method Summary collapse

Instance Method Details

#accept_header(types) ⇒ Object



121
122
123
124
125
126
127
128
129
# File 'lib/iudex-worker/fetch_helper.rb', line 121

def accept_header( types )
  q = 1.0
  ts = types.map do |tgrp|
    tgrp = tgrp.map { |m| "#{m};q=#{q}" } if q < 1.0
    q -= 0.1
    tgrp
  end
  ts.flatten.join( ',' )
end

#accept_list(types) ⇒ Object



131
132
133
# File 'lib/iudex-worker/fetch_helper.rb', line 131

def accept_list( types )
  types.flatten
end

#call_if(v, *args) ⇒ Object



135
136
137
138
139
140
141
142
143
# File 'lib/iudex-worker/fetch_helper.rb', line 135

def call_if( v, *args )
  if v.is_a?( Proc ) || v.is_a?( Method )
    v.call( *args )
  elsif v.is_a?( Symbol )
    send( v, *args )
  else
    v
  end
end

#create_content_fetcher(*args) ⇒ Object

Create a ContentFetcher including a filter chain to receive the fetch result.

Options

Options support literal values, or a Proc, Method, or a Symbol to self send unless otherwise noted.

:types

An Array or table of Mime types use Accept header in default :request_headers and to restrict returned results on. (Default: #page_mime_types)

:client

The Java::iudex.http.HTTPClient implementation to use (Default: :http_client)

:user_agent

The HTTP User-Agent for default :request_headers. Proc’s will receive the options Hash as parameter (Default: #http_user_agent)

:visit_counter

The Java::iudex.core.VisitCounter implementation. (Default: :visit_counter)

:executor

The java.util.concurrent.Executor to use for running the receiver filter chain. (Default: :executor)

:request_headers

HTTP Request headers as Array<iudex.http.Header> (Default: #http_request_headers)

All options (including the required :filters option, and :listener with default :main) are also passed to self.create_chain for creating the receiver filter chain

The positional parameters equivalent to ( :types, :filters, :listener ) as defined above are also supported, but deprecated.



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/iudex-worker/fetch_helper.rb', line 62

def create_content_fetcher( *args )
  opts = args.last.is_a?( Hash ) ? args.pop.dup : {}

  opts[ :types ]    ||= args.shift
  opts[ :filters  ] ||= args.shift
  opts[ :listener ] ||= args.shift

  opts = { :types           => :page_mime_types,
           :listener        => :main,
           :client          => :http_client,
           :user_agent      => :http_user_agent,
           :visit_counter   => :visit_counter,
           :executor        => :executor,
           :request_headers => :http_request_headers
         }.merge( opts )

  cf = ContentFetcher.new( call_if( opts[ :client ] ),
                           call_if( opts[ :visit_counter ] ),
                           create_chain( opts ) )

  cf.executor = call_if( opts[ :executor ] )

  alist = accept_list( call_if( opts[ :types ] ) )
  unless alist.include?( '*/*' )
    cf.accepted_content_types = ContentTypeSet.new( alist )
  end

  cf.request_headers = call_if( opts[ :request_headers ], opts )
  cf
end

#feed_mime_typesObject



105
106
107
108
109
110
111
112
113
# File 'lib/iudex-worker/fetch_helper.rb', line 105

def feed_mime_types
  # List of accepted mime types grouped and order in descending
  # order of preference.
  [ %w[ application/atom+xml application/rss+xml ],
    %w[ application/rdf+xml application/xml ],
    %w[ text/xml ],
    %w[ text/* ],
    %w[ */* ] ]
end

#http_request_headers(opts) ⇒ Object



93
94
95
96
97
# File 'lib/iudex-worker/fetch_helper.rb', line 93

def http_request_headers( opts )
  [ [ 'User-Agent', call_if( opts[ :user_agent ] ) ],
    [ 'Accept',     accept_header( call_if( opts[ :types ] ) ) ]
  ].map { |kv| Header.new( *kv ) }
end

#http_user_agentObject



99
100
101
102
103
# File 'lib/iudex-worker/fetch_helper.rb', line 99

def http_user_agent
  ( "Mozilla/5.0 (compatible; " +
    "Iudex #{Iudex::Worker::VERSION}; " +
     "+http://gravitext.com/iudex)" )
end

#page_mime_typesObject



115
116
117
118
119
# File 'lib/iudex-worker/fetch_helper.rb', line 115

def page_mime_types
  [ %w[ application/xhtml+xml text/html ],
    %w[ application/xml ],
    %w[ text/* ] ]
end