16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
# File 'lib/tames.rb', line 16
def self.parse_opinion_list(doc)
data = Nokogiri::HTML.parse(doc)
main_targets = data.search("table[class=rgMasterTable]").search("tr[class=rgRow]")
alt_targets = data.search("table[class=rgMasterTable]").search("tr[class=rgAltRow]")
targets = main_targets + alt_targets
raw_release_date = data.search("span.TitleBlue").search("span")[1].inner_text
release_date = self.date_from_oddball(raw_release_date.strip_both_ends)
results = []
targets.each do |t|
result = Hash.new
result[:release_date] = release_date
result[:opinion_urls] = {}
t.search("a").each do |link|
if link.inner_text.match(/\d\d\d\d/)
result[:docket_no] = link.inner_text
result[:docket_page_url] = link["href"]
next
end
text_part = link.inner_text.downcase
if text_part.match(/pdf/)
result[:opinion_urls]["pdf"] = link["href"]
elsif text_part.match(/htm/)
result[:opinion_urls]["html"] = link["href"]
elsif text_part.match(/wpd/)
result[:opinion_urls]["wpd"] = link["href"]
elsif text_part.match(/doc/)
result[:opinion_urls]["doc"] = link["href"]
else
unknown_type = text_part.downcase.gsub("","").gsub("","").strip_both_ends
result[:opinion_urls]["#{unknown_type}"] = link["href"]
end end
if t.search("div").search("td").first
result[:author_string] = t.search("div").search("td").first.inner_text
end
result[:panel_string] = t.search("td")[-1].to_html.split(/[<>]/).select { |x| x.match(/Ju[ds]/) }.join(",").gsub(" "," ")
result[:disposition] = t.search("td")[-2].inner_text.downcase.strip_both_ends
source = t.search("td")[-3].inner_text
parts = source.split("--")
if parts.size == 2
result[:case_style] = parts.first.strip_both_ends
result[:origin] = parts.second.strip_both_ends
elsif parts.size == 1
result[:case_style] = parts.first.strip_both_ends
end
results << result
end
results
end
|