Class: Pollex::Source

Inherits:
PollexObject show all
Extended by:
PollexClass
Defined in:
lib/pollex/source.rb

Overview

A source of entries in Pollex.

Instance Attribute Summary collapse

Attributes included from PollexClass

#inspectables

Class Method Summary collapse

Instance Method Summary collapse

Methods included from PollexClass

attr_inspector

Methods inherited from PollexObject

#initialize, #inspect

Constructor Details

This class inherits a constructor from Pollex::PollexObject

Instance Attribute Details

#codeObject

Returns the value of attribute code.



6
7
8
# File 'lib/pollex/source.rb', line 6

def code
  @code
end

#countInteger

Returns number of Entries belonging to this Source.

Returns:

  • (Integer)

    number of Entries belonging to this Source



39
40
41
# File 'lib/pollex/source.rb', line 39

def count
  @count ||= @entries.count
end

#nameString

Returns full name of this Source.

Returns:

  • (String)

    full name of this Source



25
26
27
28
29
# File 'lib/pollex/source.rb', line 25

def name
  @name ||= Scraper.instance.get(@path, [
    [:name, 'h1/text()', lambda {|x| x.match('Entries from (.*) in Pollex-Online')[1]}]
  ])[:name]
end

#pathObject

Returns the value of attribute path.



6
7
8
# File 'lib/pollex/source.rb', line 6

def path
  @path
end

#referenceString

Returns reference information for this Source.

Returns:

  • (String)

    reference information for this Source



32
33
34
35
36
# File 'lib/pollex/source.rb', line 32

def reference
  @reference ||= Scraper.instance.get(@path, [
    [:name, "p[@class='ref']/text()"]
  ])[:name]
end

Class Method Details

.allArray<Source>

Returns all Sources in Pollex.

Returns:

  • (Array<Source>)

    array of Sources in Pollex



124
125
126
127
128
129
130
131
132
# File 'lib/pollex/source.rb', line 124

def self.all
  @sources ||= Scraper.instance.get_all(Source, "/source/", [
    [:code, 'td[1]/a/text()'],
    [:path, 'td[1]/a/@href'],
    [:name, 'td[2]/a/text()'],
    [:count, 'td[3]/text()'],
    [:reference, 'td[4]/text()']
  ])
end

.countInteger

Counts the number of Sources within Pollex

Returns:

  • (Integer)

    number of Sources in Pollex



136
137
138
# File 'lib/pollex/source.rb', line 136

def self.count
  self.all.count
end

Instance Method Details

#entriesArray<Entry>

Returns all Entries belonging to this Source

Returns:

  • (Array<Entry>)

    array of Entries belonging to this Source



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/pollex/source.rb', line 12

def entries
  @entries ||= Scraper.instance.get_all(Entry, @path, [
    [:language_name, 'td[1]/a/text()'],
    [:language_path, 'td[1]/a/@href'],
    [:source_code, nil, lambda {|x| @code}],
    [:source_path, nil, lambda {|x| @path}],
    [:reflex, 'td[2]/text()'],
    [:description, 'td[3]/text()'],
    [:flag, "td[3]/span[@class='flag']/text()"]
 ])
end

#grammarHash

Returns grammatical information for this source, used for intelligently parsing the descriptions of entries from this source

Returns:

  • (Hash)

    grammatical information pertaining to the descriptions of this sources’ entries

See Also:



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/pollex/source.rb', line 48

def grammar
  # first, assume reasonable defaults

  language = 'en' # default language: English
  dividers = /[,;]/ # default: split on comma and semicolon
  trim_expressions = '' # default: don't trim any expressions
  trim_after = nil # default: don't trim any trailing text

  # now bring in source-specific information

  if ['Cnt', 'Bxn', 'Egt', 'Fts'].include? @code
    # Spanish-language sources
    language = 'es'
  elsif ['Aca', 'Bgn', 'Btn', 'Hmn', 'Rch', 'Dln', 'Gzl', 'Jnu', 'Jsn', 'Rve', 'Lvs', 'Lch', 'Lmt', 'Myr', 'Mfr', 'Rdl', 'Sgs'].include? @code
    # French-language sources
    language = 'fr'
  elsif ['Ths'].include? @code
    # German-language sources
    language = 'de'
  end

  if ['Aca', 'Bxn', 'Jsn', 'Mtu', 'Grn'].include? @code
    # split by comma, semicolon, period
    dividers = /(,|;|\. )/
  elsif ['Atn', 'Bwh', 'Hmn', 'Crk', 'Hdy', 'Smt', 'Rkj'].include? @code
    # don't split at all
    dividers = '\n' # dividers = nil doesn't work
  elsif ['Bgn', 'Bst', 'Brn', 'Gms', 'Tmo'].include? @code
    # split by period
    dividers = '.'
  elsif ['Bkr', 'Bgs'].include? @code
    # split by comma, period
    dividers = /(,|\. )/
  elsif ['Bge', 'Bck', 'Cbl', 'Chn', 'Cdn', 'Dvs', 'Dnr', 'Dln', 'Dye', 'Ebt', 'Egt', 'Fbg', 'Fth', 'Fox', 'Fts', 'Hzd', 'Hry', 'Hvn', 'Hnh', 'Fny', 'Mta', 'Myr', 'Mtx', 'Mnr', 'Mbg', 'Kvt', 'Ply', 'Ebt', 'Mka', 'Sby', 'Sve', 'Sta', 'Sma', 'Sks', 'Tbs', 'Tgr', 'Whe', 'Whr', 'Rmn', 'Wms', 'Ykr'].include? @code
    # split by semicolon
    dividers = ';'
  elsif ['Drd', 'Hbn', 'Mkn', 'Rdl', 'Bke'].include? @code
    # split by semicolon, period
    dividers = /(;|\. )/
  end

  if ['McP', 'Dsn', 'Gzl', 'Sby', 'Sph'].include? @code
    # Trim all (parenthetical expressions)
    trim_expressions = /\(.*\)/
  elsif ['Cnt', 'Aca', 'Bse', 'Hmn', 'Cbl', 'Cpl', 'Crn', 'Chn', 'Chl', 'Cwd', 'Clk', 'Cek', 'Crk', 'Dvs', 'Dtn', 'Dnr', 'Dty', 'Fth', 'Fox', 'Fts', 'Gmd', 'McC', 'Hwd', 'Ivs', 'Lmt', 'Lvs', 'Lmt', 'Lbr', 'Mar', 'Mta', 'Myr', 'McE', 'Mnr', 'Mfr', 'Mtu', 'Gty', 'Ply', 'Rby', 'Mka', 'Clk', 'Sve', 'Shd', 'Sma', 'Stn', 'Sks', 'Tgr', 'Whe', 'Mke', 'Whr'].include? @code
    # Trim parenthetical expressions that are <= 4 chars or contain numbers
    trim_expressions = /\((.{0,4}|.*[0-9].*)\)/
  elsif ['Stz', 'Bck'].include? @code
    # Trim parenthetical expressions that contain numbers
    trim_expressions = /\(.*[0-9].*\)/
  elsif ['Kch', 'Ray'].include? @code
    # Trim all [bracketed expressions]
    trim_expressions = /\[.*\]/
  elsif ['Rsr'].include? @code
    # Trim all "expressions in quotes"
    trim_expressions = /".*"/
  end

  if ['Btl', 'Bck', 'Chl', 'McC', 'Hpr', 'Mbg', 'Wte'].include? @code
    # Trim everything after a period
    trim_after = '.'
  elsif ['Shd'].include? @code
    # Trim everything after an equals sign
    trim_after = '='
  end

  {
    :language => language,
    :dividers => dividers,
    :trim_expressions => trim_expressions,
    :trim_after => trim_after
  }
end