Module: Timelog4r::HTML_Parser

Defined in:
lib/timelog4r/html_parser.rb

Defined Under Namespace

Classes: ParseError

Instance Method Summary collapse

Instance Method Details

#has_child_entry?(entry_element) ⇒ Boolean

Returns:

  • (Boolean)


36
37
38
# File 'lib/timelog4r/html_parser.rb', line 36

def has_child_entry?(entry_element)
  
end

#has_parent_entry?(entry_element) ⇒ Boolean

Returns:

  • (Boolean)


32
33
34
# File 'lib/timelog4r/html_parser.rb', line 32

def has_parent_entry?(entry_element)
  
end

#is_group?(entry_element) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
30
# File 'lib/timelog4r/html_parser.rb', line 27

def is_group?(entry_element)
  group_element = 'span[@class="time"]/a[@class="name"]'
  
end

#parse_author(author_element) ⇒ Object



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/timelog4r/html_parser.rb', line 184

def parse_author(author_element)
  author = Hash.allocate
  begin
    author[:user_id] = author_element.attr(
      'href'
    ).match(
      /\/\/(.+)\.timelog\.jp/
    ) ? $1 : nil
    author[:name] = author_element.inner_text[1..-2]
  rescue ParseError => e
    p e
    return false
  else
    return author
  end
end

#parse_children_entries(entries_element) ⇒ Object



40
41
42
# File 'lib/timelog4r/html_parser.rb', line 40

def parse_children_entries(entries_element)
  
end

#parse_entry(state_element) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# File 'lib/timelog4r/html_parser.rb', line 44

def parse_entry(state_element)
  permalink_element = 'span[@class="time"]/a[3]'
  memo_element = 'h3'
  author_element = 'span[@class="time"]/a[@class="name"]'
  group_element = 'h3/a[@class="name"]'
  permission_element = 'img[@class="icn_left"]'
  has_star_element = 'img[@src="http://img.timelog.jp/star.gif"]'
  star_count_element = has_star_element
  res_count_element = 'img[@src="http://img.timelog.jp/comment.gif"]'
  tag_list_element = 'span[@class="tag"]'
  reply_to_element = 'h3/a'
  
  result = nil
  
  begin
    result = Hash.allocate
    unless state_element.at(memo_element) then
      raise ParseError('not found memo_text.')
    else
      result[:memo_text] = state_element.at(memo_element).inner_text
    end
    unless state_element.at(permalink_element) then
      raise ParseError.new('not found memo_id.')
    else
      result[:memo_id] = state_element.at(
        permalink_element
      ).attr('href')[7..-1]
    end
    unless state_element.at(permission_element) then
      raise ParseError.new('not found permission.')
    else
      result[:permission] = state_element.at(
        permission_element
      ).attr('src').match(
        /icon_(public|friend)\.gif/
      ) ? $1.to_sym : nil
    end
    unless state_element.at(permalink_element) then
      raise ParseError.new('not found modified.')
    else
      result[:modified] = state_element.at(
        permalink_element
      ).inner_text.match(
        /(\d+\/\d+).+(\d+:\d+)/
      ) ? Time.parse(
        [Time.now.year.to_s+'/', $1, ' '+$2].join
      ) : nil
    end
    unless state_element.at(permalink_element) then
      raise ParseError.new('not found permalink.')
    else
      result[:permalink] = URI.parse(
        'http://timelog.jp/' + state_element.at(
          permalink_element
        ).attr('href')
      )
    end
    unless state_element.at(author_element) then
      raise ParseError('not found author.')
    else
      author = state_element.at(author_element)
      result[:author] = parse_author(author)
    end
    unless state_element.at(group_element) then
      raise ParseError.new('not found group.')
    else
      group = state_element.at(group_element)
      result[:in_group] = parse_group(group)
    end
    links = state_element.search(reply_to_element)
    links = links.reject do |link|
      link.has_attribute?('class') or
      link.has_attribute?('target')
    end
    unless links.empty? then
      reply_to = Hash.allocate
      author = Hash.allocate
      link = links.last
      author[:user_id] = link.attr(
        'href'
      ).match(
        /\/\/(.+)\.timelog\.jp/
      ) ? $1 : nil
      author[:screen_name] = link.inner_text
      reply_to[:author] = author
      result[:reply_to] = reply_to
    end
    # result[:todo] not supported.
    if state_element.at(tag_list_element) then
      tags = state_element.at(tag_list_element)
      result[:tag] = parse_tag_list(tags)
    end
    if state_element.at(has_star_element) then
      result[:star] = state_element.at(
        has_star_element
      ).attr(
        'alt'
      ).match(
        /\d+/
      ) ? [:count => $1.to_i] : [:count => 0]
    else
      result[:star] = [:count => 0]
    end
    unless state_element.at(res_count_element) then
      raise ParseError.new('not found res count.')
    else
      result[:res_count] = state_element.at(
        res_count_element
      ).inner_text.match(
        /\d+/
      ) ? $1.to_i : 0
    end
    
    # reject group name.
    group_name_pattern = Regexp.new(
      result[:in_group][:name].gsub(
        /[\(\[\{\/\.\|\}\)\]]/
      ) {
        "\\"+$&
      }
    )
    result[:memo_text].sub!(group_name_pattern, '')
    
    # reject reply name.
    reply_name_pattern = Regexp.new(/(\s>\s\w+).+$/)
    result[:memo_text].sub!(reply_name_pattern, '')
    
    # reject tags.
    tags_pattern = Regexp.new(/(\s\[.+\])$/)
    result[:memo_text].gsub!(tags_pattern, '')
    
    result[:memo_text].rstrip.strip.chomp!
  rescue ParseError => e
    p e
    return nil
  else
    return result
  end
end

#parse_group(group_element) ⇒ Object



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
# File 'lib/timelog4r/html_parser.rb', line 201

def parse_group(group_element)
  group = Hash.allocate
  begin
    group[:group_id] = group_element.attr(
      'href'
    ).match(
      /\/\/(.+)\.timelog\.jp/
    ) ? $1 : nil
    group[:name] = group_element.inner_text
  rescue ParseError => e
    p e
    return nil
  else
    return group
  end
end

#parse_profile(profile_element) ⇒ Object



275
276
277
# File 'lib/timelog4r/html_parser.rb', line 275

def parse_profile(profile_element)
  
end

#parse_tag(tag_element) ⇒ Object



253
254
255
# File 'lib/timelog4r/html_parser.rb', line 253

def parse_tag(tag_element)
  
end

#parse_tag_list(tag_list_element) ⇒ Object



257
258
259
260
261
262
263
264
265
# File 'lib/timelog4r/html_parser.rb', line 257

def parse_tag_list(tag_list_element)
  tag_element = 'a'
  
  tags = tag_list_element.search(tag_element)
  result = tags.map do |tag|
    tag.inner_text
  end
  return result
end

#parse_timeline(timeline_element) ⇒ Object



218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/timelog4r/html_parser.rb', line 218

def parse_timeline(timeline_element)
  document = Mechanize::Page.new(
    URI.parse('http://timelog.jp/home/'),
    {'content-type' => 'text/html'},
    timeline_element,
    '200',
    @agent
  )
  
  timeline_element = 'ul#timeline'
  entry_element = 'div#list_1/li'
  permalink_element = 'span[@class="time"]/a[3]'
  
  result = {
    :title => document.at('title').inner_text,
    :link => URI.parse('http://timelog.jp/'),
    :modified => Time.now
  }
  
  begin
    timeline = document.search(timeline_element)
    raise ParseError.new('not found timeline element.') if timeline.empty?
    entries = timeline.search(entry_element)
    raise ParseError.new('not found entry elements.') if entries.empty?
    result[:entries] = entries.map do |entry|
      parse_entry(entry)
    end
  rescue ParseError => e
    p e
    return nil
  else
    return result
  end
end

#parse_user(user_element) ⇒ Object



267
268
269
# File 'lib/timelog4r/html_parser.rb', line 267

def parse_user(user_element)
  
end

#parse_user_list(user_list_element) ⇒ Object



271
272
273
# File 'lib/timelog4r/html_parser.rb', line 271

def parse_user_list(user_list_element)
  
end

#permission_to_sym(string) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/timelog4r/html_parser.rb', line 10

def permission_to_sym(string)
  case string
    when '0'
      return :public
    when '1'
      return :friends_only
    when '2'
      return :private
    else
      return :unknown
  end
end

#tags_to_a(tag_string) ⇒ Object



23
24
25
# File 'lib/timelog4r/html_parser.rb', line 23

def tags_to_a(tag_string)
  
end