Class: ECFS::SolrScrapeQuery

Inherits:
Object
  • Object
show all
Defined in:
lib/ecfs/solr_scrape_query.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#after_scrapeObject

Returns the value of attribute after_scrape.



10
11
12
# File 'lib/ecfs/solr_scrape_query.rb', line 10

def after_scrape
  @after_scrape
end

#docket_numberObject

Returns the value of attribute docket_number.



8
9
10
# File 'lib/ecfs/solr_scrape_query.rb', line 8

def docket_number
  @docket_number
end

#received_min_dateObject

Returns the value of attribute received_min_date.



9
10
11
# File 'lib/ecfs/solr_scrape_query.rb', line 9

def received_min_date
  @received_min_date
end

Instance Method Details

#filing_to_citation(filing) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/ecfs/solr_scrape_query.rb', line 12

def filing_to_citation(filing)
  patterns = {
    "COMMENT" => "Comments",
    "REPLY TO COMMENTS" => "Reply Comments",
    "NOTICE OF EXPARTE" => "Ex Parte Letter"
  }
  
  case filing["type_of_filing"]
  when "COMMENT"
    return "Comments of #{filing['name_of_filer']}"
  when "REPLY TO COMMENTS"
    return "Reply Comments of #{filing['name_of_filer']}"
  when "NOTICE OF EXPARTE"
    return "#{filing['name_of_filer']} Ex Parte Letter"
  else
    return "#{filing["type_of_filing"].downcase.capitalize} of #{filing['name_of_filer']}"
  end
end

#filings_from_docket_number(docket_number, start = 0, received_min_date = nil, after_scrape = nil) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/ecfs/solr_scrape_query.rb', line 31

def filings_from_docket_number(docket_number, start=0, received_min_date=nil, after_scrape=nil)
  url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{docket_number}&dir=asc&start=#{start}"
  
  if received_min_date
    url << "&received.minDate=#{received_min_date}"
  end
  
  agent = Mechanize.new
  
  sleep(1)
  
  page = agent.get(url)
  
  total = page.search('div').find {|div| div.text.start_with?("Showing results")}.text.split('of ')[1].to_i
  table = page.search('div.dataTable table').first
  
  rows = table.search('tr')
  rows.shift
  
  filings = rows.map do |row|
    columns = row.search('td')
  
    proceeding     = columns[0].text.strip
    name_of_filer  = columns[1].text.strip
    date_received  = columns[2].text.strip
    type_of_filing = columns[3].text.strip
    pages          = columns[4].text.strip.to_i
          
    id = columns[1].search('a').first.attributes['href'].value.split('?id=')[1]
    url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
    
    {
      'docket_number' => proceeding,
      'name_of_filer' => name_of_filer,
      'type_of_filing' => type_of_filing,
      'url' => url,
      'date_received' => date_received,
      'pages' => pages
    }
  end
  
  filings.each do |filing|
    filing['citation'] = filing_to_citation(filing)
  end
  
  if after_scrape
    after_scrape.call(filings)
  end
        
  return filings, total
end

#getObject



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/ecfs/solr_scrape_query.rb', line 83

def get
  url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{@docket_number}&dir=asc&start=0"
  filings = []
  
  first_page_of_filings, total = filings_from_docket_number(@docket_number, 0, @received_min_date, @after_scrape)
  
  pages = (total.to_f/20.0).ceil.to_i.times.map {|n| n*20} # divide, round up, then map *20
  pages.shift
  
  filings.concat first_page_of_filings
  
  pages.each do |page|
    filings.concat filings_from_docket_number(@docket_number, page, @received_min_date, @after_scrape)[0]
  end

  filings
end