Class: Rsssf::Page
- Inherits:
-
Object
show all
- Includes:
- Utils
- Defined in:
- lib/rsssf/page.rb
Overview
note:
a rsssf page may contain:
many leagues, cups
- tables, schedules (rounds), notes, etc.
a rsssf page MUST be in plain text (.txt) and utf-8 character encoding assumed
Constant Summary
collapse
- LEAGUE_ROUND_REGEX =
/\b
Round
\b/ix
- CUP_ROUND_REGEX =
/\b(
Round |
1\/8\sFinals |
1\/16\sFinals |
Quarterfinals |
Semifinals |
Final
)\b/ix
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from Utils
#archive_dir_for_year, #year_from_file, #year_from_name, #year_to_season
Constructor Details
#initialize(txt) ⇒ Page
Returns a new instance of Page.
46
47
48
|
# File 'lib/rsssf/page.rb', line 46
def initialize( txt )
@txt = txt
end
|
Class Method Details
.from_file(path) ⇒ Object
37
38
39
40
|
# File 'lib/rsssf/page.rb', line 37
def self.from_file( path )
txt = File.read_utf8( path ) self.from_string( txt )
end
|
.from_string(txt) ⇒ Object
42
43
44
|
# File 'lib/rsssf/page.rb', line 42
def self.from_string( txt )
self.new( txt )
end
|
.from_url(src) ⇒ Object
31
32
33
34
|
# File 'lib/rsssf/page.rb', line 31
def self.from_url( src )
txt = PageFetcher.new.fetch( src )
self.from_string( txt )
end
|
Instance Method Details
#build_stat ⇒ Object
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
|
# File 'lib/rsssf/page.rb', line 215
def build_stat
source = nil
authors = nil
last_updated = nil
if @txt =~ /source: ([^ \n]+)/im
source = $1.to_s
puts "source: >#{source}<"
end
if @txt =~ /authors?:\s+(.+?)\s+last updated:\s+(\d{1,2} [a-z]{3,10} \d{4})/im
last_updated = $2.to_s authors = $1.to_s.strip.gsub(/\s+/, ' ' ) authors = authors.gsub( /[ ]*,[ ]*/, ', ' ) puts "authors: >#{authors}<"
puts "last updated: >#{last_updated}<"
end
puts "*** !!! missing source" if source.nil?
puts "*** !!! missing authors n last updated" if authors.nil? || last_updated.nil?
sections = []
line_count = 0
@txt.each_line do |line|
line_count +=1
if line =~ /####\s+(.+)/
puts " found section >#{$1}<"
sections << $1.strip
end
end
url = URI.parse( source )
path = url.path
extname = File.extname( path )
basename = File.basename( path, extname ) year = year_from_name( basename )
season = year_to_season( year )
rec = PageStat.new
rec.source = source rec.basename = basename rec.year = year rec.season = season
rec.authors = authors
rec.last_updated = last_updated
rec.line_count = line_count
rec.char_count = @txt.size rec.sections = sections
rec
end
|
#find_schedule(opts = {}) ⇒ Object
change to build_schedule - why? why not???
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
# File 'lib/rsssf/page.rb', line 64
def find_schedule( opts={} )
new_txt = ''
round_count = 0
= opts[:header]
if
= false
= .gsub( ' ', '\s' )
= /^
([#]{2,4}\s+(#{}))
|
(\*{2}(#{})\*{2})
/ix
else
= true = /^---dummy---$/ end
if opts[:cup]
round_regex = CUP_ROUND_REGEX else
round_regex = LEAGUE_ROUND_REGEX
end
= false
= false
round_body_found = false
blank_found = false
@txt.each_line do |line|
if == false
if line =~
puts "!!! bingo - found header >#{line}<"
= true
title = line.gsub( /[#*]/, '' ).strip new_txt << "## #{title}\n\n" else
puts " searching for header >#{}<; skipping line >#{line}<"
next
end
elsif == false
if line =~ round_regex
puts "!!! bingo - found first round >#{line}<"
round_count += 1
= true
= true
round_body_found = false
new_txt << line
elsif line =~ /^=-=-=-=/
puts "*** no rounds found; hit section marker (horizontal rule)"
break
elsif line =~ /^\*{2}[^*]+\*{2}/ puts "*** no rounds found; hit section/stage header: #{line}"
break
else
puts " searching for first round; skipping line >#{line}<"
next end
elsif == true
if line =~ /^\s*$/ if round_body_found
= false
blank_found = true new_txt << line
else
next
end
else
round_body_found = true
new_txt << line end
else
if line =~ /^\s*$/
next elsif line =~ round_regex
puts "!!! bingo - found new round >#{line}<"
round_count += 1
= true round_body_found = false
blank_found = false new_txt << line
elsif blank_found && line =~ /\[[a-z]{3} \d{1,2}\]/i puts "!!! bingo - continue round >#{line}<"
= true
blank_found = false new_txt << line
elsif blank_found && line =~ /First Legs|Second Legs/i
puts "!!! bingo - continue round >#{line}<"
= true
blank_found = false new_txt << line
elsif line =~ /=-=-=-=/
puts "!!! stop schedule; hit section marker (horizontal rule)"
break;
elsif line =~ /^\*{2}[^*]+\*{2}/ puts "!!! stop schedule; hit section/stage header: #{line}"
break
else
blank_found = false
puts "skipping line in schedule >#{line}<"
next end
end
end
schedule = Schedule.from_string( new_txt )
schedule.rounds = round_count
schedule
end
|
#save(path) ⇒ Object
281
282
283
284
285
|
# File 'lib/rsssf/page.rb', line 281
def save( path )
File.open( path, 'w' ) do |f|
f.write @txt
end
end
|