17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/legacy.rb', line 17
def self.parse_opinion_list(doc)
data = Nokogiri::HTML.parse(doc)
target_zone = data.search("#submission").search("p") targets = target_zone.select { |t| t.inner_text.match(/Before/) } docket_targets = data.search("#submission").search("table")
srd = data.search('#submission').search('p').first.inner_text.split("Opinions").last
release_date = srd.strip_both_ends.to_date
results = []
targets.each_with_index do |t,i|
next if docket_targets.count < (i + 1)
result = Hash.new
opinions = t.search("a")
result[:author_string] = opinions.first.inner_text.strip_both_ends
result[:opinion_urls] = {}
opinions.each do |op|
text_part = op.inner_text.downcase
href_target = op["href"]
if href_target.downcase.match(/pdfopinion/) or text_part.match(/pdf/)
result[:opinion_urls]["pdf"] = href_target
elsif href_target.downcase.match(/htmlopinion/)
result[:opinion_urls]["html"] = href_target
elsif text_part.match(/wpd/)
result[:opinion_urls]["wpd"] = href_target
end
end
spans = t.search("span")
result[:disposition] = spans.first.inner_text.strip_both_ends
result[:panel_string] = t.search("b").inner_text.strip_both_ends
result[:release_date] = release_date
source = docket_targets[i].search("td").last.inner_text
parts = source.split("--")
if parts.size == 2
result[:case_style] = parts.first
result[:origin] = parts.second
elsif parts.size == 1
result[:case_style] = parts.first
end
result[:docket_no] = docket_targets[i].search("a").inner_text
result[:docket_page_url] = docket_targets[i].search("a").first["href"]
next if result[:docket_no].nil? or result[:docket_page_url].nil?
next if result[:docket_no].blank? or result[:docket_page_url].blank?
results << result
end
results
end
|