Class: Glib::JsonCrawler::Router

Inherits:
Object
  • Object
show all
Defined in:
lib/glib/json_crawler/router.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeRouter

Returns a new instance of Router.



44
45
46
47
48
49
50
51
52
53
# File 'lib/glib/json_crawler/router.rb', line 44

def initialize
  @depth = -1
  @logger = ''
  @visitor = Glib::Json::Traversal::Visitor.new(crawler_test: true)
  @read_only_actions = Set.new
  # default rails's development host
  @host ||= 'localhost:3000'
  @page_specs = []
  @page_urls = []
end

Instance Attribute Details

#deferred_actionsObject (readonly)

Returns the value of attribute deferred_actions.



4
5
6
# File 'lib/glib/json_crawler/router.rb', line 4

def deferred_actions
  @deferred_actions
end

#hostObject

Returns the value of attribute host.



5
6
7
# File 'lib/glib/json_crawler/router.rb', line 5

def host
  @host
end

#last_logObject (readonly)

Returns the value of attribute last_log.



4
5
6
# File 'lib/glib/json_crawler/router.rb', line 4

def last_log
  @last_log
end

#loggerObject (readonly)

Returns the value of attribute logger.



4
5
6
# File 'lib/glib/json_crawler/router.rb', line 4

def logger
  @logger
end

#read_only_actionsObject (readonly)

Returns the value of attribute read_only_actions.



4
5
6
# File 'lib/glib/json_crawler/router.rb', line 4

def read_only_actions
  @read_only_actions
end

Instance Method Details

#_puts(text) ⇒ Object



19
20
21
# File 'lib/glib/json_crawler/router.rb', line 19

def _puts(text)
  puts '  ' * @depth + text
end

#allowed?(url) ⇒ Boolean

Returns:

  • (Boolean)


172
173
174
175
# File 'lib/glib/json_crawler/router.rb', line 172

def allowed?(url)
  regex = Regexp.new("#{host}.+(?<!\.pdf)$")
  regex.match(url)
end

#assert_target_ids_exist(args) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
# File 'lib/glib/json_crawler/router.rb', line 23

def assert_target_ids_exist(args)
  # This saves targetId so that later we can check to make sure that it indeed exists
  # within the page.
  if (target_ids = args['targetIds'])
    target_ids.each do |target_id|
      @visitor.defer_action(nil, target_id)
    end
  elsif (target_id = args['targetId'])
    @visitor.defer_action(nil, target_id)
  end
end

#begin_page(spec, url) ⇒ Object



152
153
154
155
156
# File 'lib/glib/json_crawler/router.rb', line 152

def begin_page(spec, url)
  @page_specs << spec
  @page_urls << url
  @visitor.begin_page(spec)
end

#crawl_multiple(views, block) ⇒ Object



148
149
150
# File 'lib/glib/json_crawler/router.rb', line 148

def crawl_multiple(views, block)
  @visitor.traverse_multiple views, block
end

#end_page(spec) ⇒ Object



158
159
160
161
162
# File 'lib/glib/json_crawler/router.rb', line 158

def end_page(spec)
  @page_specs.pop
  @page_urls.pop
  @visitor.end_page(spec)
end

#follow(http, target_routers) ⇒ Object



131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/glib/json_crawler/router.rb', line 131

def follow(http, target_routers)
  if !target_routers.is_a?(Array)
    target_routers = [target_routers]
  end

  target_actions = Set.new
  target_routers.each do |router|
    target_actions += router.read_only_actions
  end

  @depth += 1
  target_actions.each do |crawler_action|
    action, url = crawler_action
    http.get(url, action, {}, false)
  end
end

#last_formObject



127
128
129
# File 'lib/glib/json_crawler/router.rb', line 127

def last_form
  @visitor.forms.last
end

#log(action, url, response = nil) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
# File 'lib/glib/json_crawler/router.rb', line 7

def log(action, url, response = nil)
  @last_log = [
    action,
    response.present? ? response.code : nil,
    url
  ].compact.join(' :: ')

  # puts @last_log

  @logger += '  ' * @depth + @last_log + "\n"
end

#page_specObject



164
165
166
# File 'lib/glib/json_crawler/router.rb', line 164

def page_spec
  @page_specs.last
end

#page_urlObject



168
169
170
# File 'lib/glib/json_crawler/router.rb', line 168

def page_url
  @page_urls.last
end

#process_action(http, spec) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/glib/json_crawler/router.rb', line 76

def process_action(http, spec)
  action = spec&.fetch('action')
  params = spec

  assert_target_ids_exist(params)

  if action.present?
    @depth += 1
    case action
    when 'initiate_navigation'
      @read_only_actions.add([action, params['url']])
      JsonCrawler::NavInitiate.new(http, params, action)
    when 'runMultiple-v1', 'runMultiple'
      JsonCrawler::RunMultiple.new(http, params, action)
    when 'windows/open-v1', 'dialogs/open-v1', 'windows/reload-v1', 'windows/open',
      'dialogs/open', 'windows/reload', 'windows/openWeb', 'windows/openWeb-v1'
      if allowed?(params['url'])
        @read_only_actions.add([action, params['url']])
        JsonCrawler::WindowsOpen.new(http, params, action)
      else
        self.log action, params['url']
      end
    when 'dialogs/show-v1', 'dialogs/show', 'popovers/show-v1', 'popovers/show'
      JsonCrawler::DialogsShow.new(http, params, action)
    when 'sheets/select-v1', 'sheets/select'
      JsonCrawler::Menu.new(http, params, action)
    when 'http/post-v1', 'http/post'
      JsonCrawler::ActionHttp.new(:post, http, params, action)
    when 'forms/submit-v1', 'forms/submit'
      # forms = @visitor.forms
      # JsonCrawler::FormsSubmit.new(http, params, forms.last)
      JsonCrawler::FormsSubmit.new(http, params)
    when 'dialogs/alert-v1', 'dialogs/alert'
      JsonCrawler::DialogsAlert.new(http, params, action)
    when 'dialogs/close-v1', 'dialogs/close', 'popovers/close', 'popovers/close-v1'
      JsonCrawler::DialogsClose.new(http, params, action)
    else
      unless [
        'http/delete-v1',
        'dialogs/oauth-v1',
        'http/delete',
        'dialogs/oauth'
      ].include?(action)
        @read_only_actions.add([action, params['url']])
      end
      self.log action, params['url']
    end
    @depth -= 1
  end
end

#should_defer_crawl?(action_crawler, args) ⇒ Boolean

Returns:

  • (Boolean)


35
36
37
38
39
40
41
42
# File 'lib/glib/json_crawler/router.rb', line 35

def should_defer_crawl?(action_crawler, args)
  if (target_id = args['targetId'])
    @visitor.defer_action(action_crawler, target_id)
    return true
  end

  false
end

#step(http, args) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/glib/json_crawler/router.rb', line 55

def step(http, args)
  # TODO: Refactor
  case args['view']
  when 'fields/submit-v1', 'fields/submit'
    @depth += 1
    # forms = @visitor.forms
    # JsonCrawler::FormsSubmit.new(http, args, forms.last)
    JsonCrawler::FormsSubmit.new(http, args)
    @depth -= 1
    return
  end

  if args.is_a?(Hash) && args['rel'] != 'nofollow'
    if (on_click = args.fetch('onClick', nil))
      process_action(http, on_click)
    end
  end

  @read_only_actions.replace(@read_only_actions.sort_by { |e| e[1].to_s })
end