Class: Pollex::Source
- Inherits:
-
PollexObject
- Object
- PollexObject
- Pollex::Source
- Extended by:
- PollexClass
- Defined in:
- lib/pollex/source.rb
Overview
A source of entries in Pollex.
Instance Attribute Summary collapse
-
#code ⇒ Object
Returns the value of attribute code.
-
#count ⇒ Integer
Number of Entries belonging to this Source.
-
#name ⇒ String
Full name of this Source.
-
#path ⇒ Object
Returns the value of attribute path.
-
#reference ⇒ String
Reference information for this Source.
Attributes included from PollexClass
Class Method Summary collapse
-
.all ⇒ Array<Source>
Returns all Sources in Pollex.
-
.count ⇒ Integer
Counts the number of Sources within Pollex.
Instance Method Summary collapse
-
#entries ⇒ Array<Entry>
Returns all Entries belonging to this Source.
-
#grammar ⇒ Hash
Returns grammatical information for this source, used for intelligently parsing the descriptions of entries from this source.
Methods included from PollexClass
Methods inherited from PollexObject
Constructor Details
This class inherits a constructor from Pollex::PollexObject
Instance Attribute Details
#code ⇒ Object
Returns the value of attribute code.
6 7 8 |
# File 'lib/pollex/source.rb', line 6 def code @code end |
#count ⇒ Integer
Returns number of Entries belonging to this Source.
39 40 41 |
# File 'lib/pollex/source.rb', line 39 def count @count ||= @entries.count end |
#name ⇒ String
Returns full name of this Source.
25 26 27 28 29 |
# File 'lib/pollex/source.rb', line 25 def name @name ||= Scraper.instance.get(@path, [ [:name, 'h1/text()', lambda {|x| x.match('Entries from (.*) in Pollex-Online')[1]}] ])[:name] end |
#path ⇒ Object
Returns the value of attribute path.
6 7 8 |
# File 'lib/pollex/source.rb', line 6 def path @path end |
#reference ⇒ String
Returns reference information for this Source.
32 33 34 35 36 |
# File 'lib/pollex/source.rb', line 32 def reference @reference ||= Scraper.instance.get(@path, [ [:name, "p[@class='ref']/text()"] ])[:name] end |
Class Method Details
.all ⇒ Array<Source>
Returns all Sources in Pollex.
124 125 126 127 128 129 130 131 132 |
# File 'lib/pollex/source.rb', line 124 def self.all @sources ||= Scraper.instance.get_all(Source, "/source/", [ [:code, 'td[1]/a/text()'], [:path, 'td[1]/a/@href'], [:name, 'td[2]/a/text()'], [:count, 'td[3]/text()'], [:reference, 'td[4]/text()'] ]) end |
.count ⇒ Integer
Counts the number of Sources within Pollex
136 137 138 |
# File 'lib/pollex/source.rb', line 136 def self.count self.all.count end |
Instance Method Details
#entries ⇒ Array<Entry>
Returns all Entries belonging to this Source
12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/pollex/source.rb', line 12 def entries @entries ||= Scraper.instance.get_all(Entry, @path, [ [:language_name, 'td[1]/a/text()'], [:language_path, 'td[1]/a/@href'], [:source_code, nil, lambda {|x| @code}], [:source_path, nil, lambda {|x| @path}], [:reflex, 'td[2]/text()'], [:description, 'td[3]/text()'], [:flag, "td[3]/span[@class='flag']/text()"] ]) end |
#grammar ⇒ Hash
Returns grammatical information for this source, used for intelligently parsing the descriptions of entries from this source
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/pollex/source.rb', line 48 def grammar # first, assume reasonable defaults language = 'en' # default language: English dividers = /[,;]/ # default: split on comma and semicolon trim_expressions = '' # default: don't trim any expressions trim_after = nil # default: don't trim any trailing text # now bring in source-specific information if ['Cnt', 'Bxn', 'Egt', 'Fts'].include? @code # Spanish-language sources language = 'es' elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch', 'Dln', 'Gzl', 'Jnu', 'Jsn', 'Rve', 'Lvs', 'Lch', 'Lmt', 'Myr', 'Mfr', 'Rdl', 'Sgs'].include? @code # French-language sources language = 'fr' elsif ['Ths'].include? @code # German-language sources language = 'de' end if ['Aca', 'Bxn', 'Jsn', 'Mtu', 'Grn'].include? @code # split by comma, semicolon, period dividers = /(,|;|\. )/ elsif ['Atn', 'Bwh', 'Hmn', 'Crk', 'Hdy', 'Smt', 'Rkj'].include? @code # don't split at all dividers = '\n' # dividers = nil doesn't work elsif ['Bgn', 'Bst', 'Brn', 'Gms', 'Tmo'].include? @code # split by period dividers = '.' elsif ['Bkr', 'Bgs'].include? @code # split by comma, period dividers = /(,|\. )/ elsif ['Bge', 'Bck', 'Cbl', 'Chn', 'Cdn', 'Dvs', 'Dnr', 'Dln', 'Dye', 'Ebt', 'Egt', 'Fbg', 'Fth', 'Fox', 'Fts', 'Hzd', 'Hry', 'Hvn', 'Hnh', 'Fny', 'Mta', 'Myr', 'Mtx', 'Mnr', 'Mbg', 'Kvt', 'Ply', 'Ebt', 'Mka', 'Sby', 'Sve', 'Sta', 'Sma', 'Sks', 'Tbs', 'Tgr', 'Whe', 'Whr', 'Rmn', 'Wms', 'Ykr'].include? @code # split by semicolon dividers = ';' elsif ['Drd', 'Hbn', 'Mkn', 'Rdl', 'Bke'].include? @code # split by semicolon, period dividers = /(;|\. )/ end if ['McP', 'Dsn', 'Gzl', 'Sby', 'Sph'].include? @code # Trim all (parenthetical expressions) trim_expressions = /\(.*\)/ elsif ['Cnt', 'Aca', 'Bse', 'Hmn', 'Cbl', 'Cpl', 'Crn', 'Chn', 'Chl', 'Cwd', 'Clk', 'Cek', 'Crk', 'Dvs', 'Dtn', 'Dnr', 'Dty', 'Fth', 'Fox', 'Fts', 'Gmd', 'McC', 'Hwd', 'Ivs', 'Lmt', 'Lvs', 'Lmt', 'Lbr', 'Mar', 'Mta', 'Myr', 'McE', 'Mnr', 'Mfr', 'Mtu', 'Gty', 'Ply', 'Rby', 'Mka', 'Clk', 'Sve', 'Shd', 'Sma', 'Stn', 'Sks', 'Tgr', 'Whe', 'Mke', 'Whr'].include? @code # Trim parenthetical expressions that are <= 4 chars or contain numbers trim_expressions = /\((.{0,4}|.*[0-9].*)\)/ elsif ['Stz', 'Bck'].include? @code # Trim parenthetical expressions that contain numbers trim_expressions = /\(.*[0-9].*\)/ elsif ['Kch', 'Ray'].include? @code # Trim all [bracketed expressions] trim_expressions = /\[.*\]/ elsif ['Rsr'].include? @code # Trim all "expressions in quotes" trim_expressions = /".*"/ end if ['Btl', 'Bck', 'Chl', 'McC', 'Hpr', 'Mbg', 'Wte'].include? @code # Trim everything after a period trim_after = '.' elsif ['Shd'].include? @code # Trim everything after an equals sign trim_after = '=' end { :language => language, :dividers => dividers, :trim_expressions => trim_expressions, :trim_after => trim_after } end |