Class: Husc

Inherits:
Object
  • Object
show all
Defined in:
lib/husc.rb,
lib/husc/version.rb

Defined Under Namespace

Classes: CrawlArray, Error

Constant Summary collapse

VERSION =
"0.4.1"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10) ⇒ Husc

Returns a new instance of Husc.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/husc.rb', line 39

def initialize(url = nil, doc: nil, html: nil, user_agent: nil, request_headers: nil, timeout: 10)
  ## -----*----- コンストラクタ -----*----- ##
  @agent = Mechanize.new
  @agent.keep_alive = false
  @agent.user_agent = user_agent  unless user_agent.nil?
  @agent.request_headers = request_headers  unless request_headers.nil?
  @agent.read_timeout = timeout

  if !url.nil?
    get(url)
  elsif !doc.nil?
    @html = doc.to_html
    @doc = doc
    table_to_hash
  else
    update_params(html)
    @html = html
  end

  @params = []
end

Instance Attribute Details

#codeObject (readonly)

Returns the value of attribute code.



12
13
14
# File 'lib/husc.rb', line 12

def code
  @code
end

#htmlObject (readonly)

Returns the value of attribute html.



12
13
14
# File 'lib/husc.rb', line 12

def html
  @html
end

#paramsObject (readonly)

Returns the value of attribute params.



12
13
14
# File 'lib/husc.rb', line 12

def params
  @params
end

#tablesObject (readonly)

Returns the value of attribute tables.



12
13
14
# File 'lib/husc.rb', line 12

def tables
  @tables
end

#urlObject (readonly)

Returns the value of attribute url.



12
13
14
# File 'lib/husc.rb', line 12

def url
  @url
end

Instance Method Details

#attr(name) ⇒ Object



198
199
200
201
202
203
204
205
206
# File 'lib/husc.rb', line 198

def attr(name)
  ## -----*----- ノードの属性情報取得 -----*----- ##
  ret = @doc.attr(name)
  if ret.nil?
    return ''
  else
    return ret
  end
end

#css(locator, single = false) ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/husc.rb', line 155

def css(locator, single = false)
  ## -----*----- HTMLからCSSセレクタで要素取得 -----*----- ##
  elements = CrawlArray.new(@doc.css(locator).map {|el| Husc.new(doc: el)})
  if single
    # シングルノード
    if elements[0] == nil
      return CrawlArray.new()
    else
      return elements[0]
    end
  else
    # 複数ノード
    return elements
  end
end

#get(url) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/husc.rb', line 61

def get(url)
  ## -----*----- ページ推移 -----*----- ##
  @url = url
  begin
    page = @agent.get(@url)
    @code = page.code
  rescue Mechanize::ResponseCodeError => e
    @code = e.page.body
  rescue Net::HTTP::Persistent::Error => e
    puts e
  end
  html = page.content.toutf8
  update_params(html)
end

#inner_html(shaping = true) ⇒ Object



180
181
182
183
184
185
186
187
# File 'lib/husc.rb', line 180

def inner_html(shaping = true)
  ## -----*----- タグ内のHTMLを取得 -----*----- ##
  if shaping
    return shaping_string(@doc.inner_html)
  else
    @doc.inner_html
  end
end

#inner_text(shaping = true) ⇒ Object



171
172
173
174
175
176
177
178
# File 'lib/husc.rb', line 171

def inner_text(shaping = true)
  ## -----*----- タグ内の文字列を取得 -----*----- ##
  if shaping
    return shaping_string(@doc.inner_text)
  else
    @doc.inner_text
  end
end

#send(opts) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
# File 'lib/husc.rb', line 76

def send(opts)
  ## -----*----- フォームデータ指定 -----*----- ##
  #
  # テキスト,数値など   => value(String)を指定
  # チェックボックス    => check(Bool)を指定
  # ファイルアップロード  => file(String)を指定
  # ボタンクリック        => click(Bool)を指定
  @params << {}
  opts = opts.map { |k, v| [k.to_sym, v] }.to_h
  opts.each { |k, v| @params[-1][k.to_sym] = v }
end

#submit(opts) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/husc.rb', line 88

def submit(opts)
  ## -----*----- フォーム送信 -----*----- ##
  # フォーム指定
  opts = opts.map { |k,v| [k.to_sym, v] }.to_h
  if opts.kind_of?(Integer)
    form = @agent.page.forms[opts]
  else
    form = @agent.page.form(**opts)
  end
  return if form.nil?
  button = nil

  @params.each do |param|
    # テキスト,数値など
    if param.include?(:value) && !param.include?(:check)
      value = param.delete(:value)
      next if value.nil?
      form.field_with(**param).value = value unless form.field_with(**param).nil?
    end

    # チェックボックス
    if param.include?(:check)
      check = param.delete(:check)
      next if check.nil?
      if check
        form.checkbox_with(**param).check unless form.checkbox_with(**param).nil?
      else
        form.checkbox_with(**param).uncheck unless form.checkbox_with(**param).nil?
      end
    end

    # ファイルアップロード
    if param.include?(:file)
      file = param.delete(:file)
      next if file.nil? || !File.exist?(file)
      form.file_upload_with(**param).file_name = file unless form.file_upload_with(**param).nil?
    end

    # ボタンクリック
    if param.include?(:click)
      click = param.delete(:click)
      next unless click
      button = form.button_with(**param) unless form.button_with(**param).nil?
    end
  end

  form = @agent.submit(form, button)
  update_params(form.content.toutf8)
  @params = []
end

#text(shaping = true) ⇒ Object



189
190
191
192
193
194
195
196
# File 'lib/husc.rb', line 189

def text(shaping = true)
  ## -----*----- タグ内の文字列(その他タグ除去)を取得 -----*----- ##
  if shaping
    return shaping_string(@doc.text)
  else
    @doc.text
  end
end

#xpath(locator, single = false) ⇒ Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/husc.rb', line 139

def xpath(locator, single = false)
  ## -----*----- HTMLからXPath指定で要素取得 -----*----- ##
  elements = CrawlArray.new(@doc.xpath(locator).map {|el| Husc.new(doc: el)})
  if single
    # シングルノード
    if elements[0] == nil
      return CrawlArray.new()
    else
      return elements[0]
    end
  else
    # 複数ノード
    return elements
  end
end