Class: SportDb::Package

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/formats/package.rb

Direct Known Subclasses

DirPackage, ZipPackage

Constant Summary collapse

CONF_RE =

todo/fix: make all regexes case-insensitive with /i option - why? why not?

             e.g. .TXT and .txt
yes!! use /i option!!!!!
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
    \.conf\.txt$
}x
LEAGUES_RE =

leagues.txt or leagues_en.txt

remove support for en.leagues.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.leagues.txt
    leagues
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
SEASONS_RE =

seasons.txt or seasons_en.txt

remove support for br.seasons.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.seasons.txt
    seasons
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
CLUBS_RE =

clubs.txt or clubs_en.txt

remove support for en.clubs.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
CLUBS_WIKI_RE =
%r{  (?:^|/)               # beginning (^) or beginning of path (/)
    (?:[a-z]{1,4}\.)?   # optional country code/key e.g. eng.clubs.wiki.txt
   clubs
     (?:_[a-z0-9_-]+)?
   \.wiki\.txt$
}x
CLUBS_PROPS_RE =

todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?

%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.props.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.props\.txt$
}x
CLUB_PROPS_RE =

add alias for now (fix later - why? why not?)

CLUBS_PROPS_RE
CLUBS_HISTORY_RE =
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.history.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.history\.txt$
}x
TEAMS_RE =

teams.txt or teams_history.txt

%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
                  teams
                    (?:_[a-z0-9_-]+)?
                  \.txt$
}x
SEASON_RE =

todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!! season folder:

          e.g. /2019-20   or
year-only e.g. /2019      or
               /2016--france
%r{ (?:
     \d{4}-\d{2}
   | \d{4}(--[a-z0-9_-]+)?
  )
}x
SEASON =

“inline” helper for embedding in other regexes - keep? why? why not?

SEASON_RE.source
MATCH_RE =

note: if pattern includes directory add here

(otherwise move to more "generic" datafile) - why? why not?
%r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
       #{SEASON}
     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
}x
MATCH_CSV_RE =
%r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
     #{SEASON}
   /[a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
}x
CSV_RE =

add “generic” pattern to find all csv datafiles

%r{ (?: ^|/ )
    [a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
}x

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path_or_pack) ⇒ Package

Returns a new instance of Package.



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/sportdb/formats/package.rb', line 216

def initialize( path_or_pack )
  @include = nil
  @exclude = nil

  if path_or_pack.is_a?( Datafile::Package )
    @pack = path_or_pack
  else   ## assume it's a (string) path
    path = path_or_pack
    if !File.exist?( path )  ## file or directory
      puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
      exit 1
    end

    if File.directory?( path )
      @pack = Datafile::DirPackage.new( path )     ## delegate to "generic" package
    elsif File.file?( path ) && File.extname( path ) == '.zip'  # note: includes dot (.) eg .zip
      @pack = Datafile::ZipPackage.new( path )
    else
      puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
      exit 1
    end
  end
end

Instance Attribute Details

#excludeObject

attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?



185
186
187
# File 'lib/sportdb/formats/package.rb', line 185

def exclude
  @exclude
end

#includeObject

attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?



185
186
187
# File 'lib/sportdb/formats/package.rb', line 185

def include
  @include
end

Class Method Details

.find(path, pattern) ⇒ Object

move class-level “static” finders to DirPackage (do NOT work for now for zip packages) - why? why not?



106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/sportdb/formats/package.rb', line 106

def self.find( path, pattern )
   datafiles = []

   ## check all txt files
   ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
   candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
   pp candidates
   candidates.each do |candidate|
     datafiles << candidate    if pattern.match( candidate )
   end

   pp datafiles
   datafiles
end

.find_clubs(path, pattern: CLUBS_RE) ⇒ Object



125
# File 'lib/sportdb/formats/package.rb', line 125

def self.find_clubs( path, pattern: CLUBS_RE )                 find( path, pattern ); end

.find_clubs_history(path, pattern: CLUBS_HISTORY_RE) ⇒ Object



127
# File 'lib/sportdb/formats/package.rb', line 127

def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end

.find_clubs_wiki(path, pattern: CLUBS_WIKI_RE) ⇒ Object



126
# File 'lib/sportdb/formats/package.rb', line 126

def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )       find( path, pattern ); end

.find_conf(path, pattern: CONF_RE) ⇒ Object



141
# File 'lib/sportdb/formats/package.rb', line 141

def self.find_conf( path, pattern: CONF_RE )  find( path, pattern ); end

.find_leagues(path, pattern: LEAGUES_RE) ⇒ Object



134
# File 'lib/sportdb/formats/package.rb', line 134

def self.find_leagues( path, pattern: LEAGUES_RE )  find( path, pattern ); end

.find_match(path, format: 'txt') ⇒ Object



144
145
146
147
148
149
150
# File 'lib/sportdb/formats/package.rb', line 144

def self.find_match( path, format: 'txt' )
   if format == 'csv'
     find( path, MATCH_CSV_RE )
   else  ## otherwise always assume txt for now
     find( path, MATCH_RE )
   end
end

.find_seasons(path, pattern: SEASONS_RE) ⇒ Object



137
# File 'lib/sportdb/formats/package.rb', line 137

def self.find_seasons( path, pattern: SEASONS_RE )  find( path, pattern ); end

.find_teams(path, pattern: TEAMS_RE) ⇒ Object



122
# File 'lib/sportdb/formats/package.rb', line 122

def self.find_teams( path, pattern: TEAMS_RE )  find( path, pattern ); end

.match_clubs(path) ⇒ Object Also known as: match_clubs?, clubs?



129
# File 'lib/sportdb/formats/package.rb', line 129

def self.match_clubs( path )         CLUBS_RE.match( path ); end

.match_clubs_history(path) ⇒ Object Also known as: match_clubs_history?, clubs_history?



131
# File 'lib/sportdb/formats/package.rb', line 131

def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end

.match_clubs_props(path, pattern: CLUBS_PROPS_RE) ⇒ Object Also known as: match_club_props, match_club_props?, club_props?, match_clubs_props?, clubs_props?



132
# File 'lib/sportdb/formats/package.rb', line 132

def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end

.match_clubs_wiki(path) ⇒ Object Also known as: match_clubs_wiki?, clubs_wiki?



130
# File 'lib/sportdb/formats/package.rb', line 130

def self.match_clubs_wiki( path )    CLUBS_WIKI_RE.match( path ); end

.match_conf(path) ⇒ Object Also known as: match_conf?, conf?



142
# File 'lib/sportdb/formats/package.rb', line 142

def self.match_conf( path )  CONF_RE.match( path ); end

.match_leagues(path) ⇒ Object Also known as: match_leagues?, leagues?



135
# File 'lib/sportdb/formats/package.rb', line 135

def self.match_leagues( path )  LEAGUES_RE.match( path ); end

.match_seasons(path) ⇒ Object Also known as: match_seasons?, seasons?



138
# File 'lib/sportdb/formats/package.rb', line 138

def self.match_seasons( path )  SEASONS_RE.match( path ); end

.match_teams(path) ⇒ Object Also known as: match_teams?, teams?



123
# File 'lib/sportdb/formats/package.rb', line 123

def self.match_teams( path )  TEAMS_RE.match( path ); end

Instance Method Details

#each(pattern:, &blk) ⇒ Object



241
242
243
244
245
246
# File 'lib/sportdb/formats/package.rb', line 241

def each( pattern:, &blk )
  @pack.each( pattern: pattern ) do |entry|
    next unless filter( entry )   ## lets you use include/exclude filters
    blk.call( entry )
  end
end

#each_club_props(&blk) ⇒ Object



259
# File 'lib/sportdb/formats/package.rb', line 259

def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end

#each_clubs(&blk) ⇒ Object



262
# File 'lib/sportdb/formats/package.rb', line 262

def each_clubs( &blk )      each( pattern: CLUBS_RE, &blk ); end

#each_clubs_history(&blk) ⇒ Object



264
# File 'lib/sportdb/formats/package.rb', line 264

def each_clubs_history( &blk )  each( pattern: CLUBS_HISTORY_RE, &blk ); end

#each_clubs_wiki(&blk) ⇒ Object



263
# File 'lib/sportdb/formats/package.rb', line 263

def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end

#each_conf(&blk) ⇒ Object



248
# File 'lib/sportdb/formats/package.rb', line 248

def each_conf( &blk )       each( pattern: CONF_RE, &blk ); end

#each_csv(&blk) ⇒ Object



257
# File 'lib/sportdb/formats/package.rb', line 257

def each_csv( &blk )        each( pattern: CSV_RE, &blk );       end

#each_leagues(&blk) ⇒ Object



261
# File 'lib/sportdb/formats/package.rb', line 261

def each_leagues( &blk )    each( pattern: LEAGUES_RE, &blk ); end

#each_match(format: 'txt', &blk) ⇒ Object



249
250
251
252
253
254
255
# File 'lib/sportdb/formats/package.rb', line 249

def each_match( format: 'txt', &blk )
  if format == 'csv'
     each( pattern: MATCH_CSV_RE, &blk );
  else
     each( pattern: MATCH_RE, &blk );
  end
end

#each_match_csv(&blk) ⇒ Object



256
# File 'lib/sportdb/formats/package.rb', line 256

def each_match_csv( &blk )  each( pattern: MATCH_CSV_RE, &blk ); end

#each_seasons(&blk) ⇒ Object



266
# File 'lib/sportdb/formats/package.rb', line 266

def each_seasons( &blk )    each( pattern: SEASONS_RE, &blk ); end

#filter(entry) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/sportdb/formats/package.rb', line 199

def filter( entry )
  if @include
    if filter_clause( @include, entry )   ## todo/check: is include a reserved keyword????
      true  ## todo/check: check for exclude here too - why? why not?
    else
      false
    end
  else
    if @exclude && filter_clause( @exclude, entry )
      false
    else
      true
    end
  end
end

#filter_clause(filter, entry) ⇒ Object

private helpers - like select returns true for keeping and false for skipping entry



188
189
190
191
192
193
194
195
196
197
# File 'lib/sportdb/formats/package.rb', line 188

def filter_clause( filter, entry )
  if filter.is_a?( String )
    entry.name.index( filter ) ? true : false
  elsif filter.is_a?( Regexp )
    filter.match( entry.name )  ? true : false
  else  ## assume
    ## todo/check: pass in entry (and NOT entry.name) - why? why not?
    filter.call( entry )
  end
end

#match(format: 'txt') ⇒ Object Also known as: matches

return all match datafile entries



270
271
272
# File 'lib/sportdb/formats/package.rb', line 270

def match( format: 'txt' )
  ary=[]; each_match( format: format ) {|entry| ary << entry  }; ary;
end

#match_by_season(format: 'txt', start: nil) ⇒ Object

method match_by_season_dir



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
# File 'lib/sportdb/formats/package.rb', line 302

def match_by_season( format: 'txt', start: nil )   ## change/rename to by_season_key - why? why not?

  ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?

  ##  note: fold all sames seasons (even if in different directories)
  ##     into same datafile list e.g.
  ##   ["1957/58",
  ##     ["1950s/1957-58/1-division1.csv",
  ##      "1950s/1957-58/2-division2.csv",
  ##      "1950s/1957-58/3a-division3n.csv",
  ##      "1950s/1957-58/3b-division3s.csv"]],
  ## and
  ##   ["1957/58",
  ##      ["archives/1950s/1957-58/1-division1.csv",
  ##       "archives/1950s/1957-58/2-division2.csv",
  ##       "archives/1950s/1957-58/3a-division3n.csv",
  ##       "archives/1950s/1957-58/3b-division3s.csv"]],
  ##  should be together - why? why not?

  ####
  # Example package:
  # [["2012/13", ["2012-13/1-proleague.csv"]],
  #  ["2013/14", ["2013-14/1-proleague.csv"]],
  #  ["2014/15", ["2014-15/1-proleague.csv"]],
  #  ["2015/16", ["2015-16/1-proleague.csv"]],
  #  ["2016/17", ["2016-17/1-proleague.csv"]],
  #  ["2017/18", ["2017-18/1-proleague.csv"]]]

  ## todo/fix:  (re)use a more generic filter instead of start for start of season only

  ##  todo/fix: use a "generic" filter_season helper for easy reuse
  ##     filter_season( clause, season_key )
  ##   or better filter = SeasonFilter.new( clause )
  ##             filter.skip? filter.include? ( season_sason_key )?
  ##             fiteer.before?( season_key )  etc.
  ##              find some good method names!!!!
  season_start = start ? Season( start ) : nil

  h = {}
  match( format: format ).each do |entry|
    ## note: assume last directory in datafile path is the season part/key
    season_q = File.basename( File.dirname( entry.name ))
    season   = Season.parse( season_q )  ## normalize season

    ## skip if start season before this season
    next if season_start && season_start.start_year > season.start_year

    h[ season.key ] ||= []
    h[ season.key ] << entry
  end

  ##  todo/fix:  - add sort entries by name - why? why not?
  ## note: assume 1-,2- etc. gets us back sorted leagues
  ##  - use sort. (will not sort by default?)

  ## sort by season
  ##   latest / newest first (and oldest last)

  h.to_a.sort do |l,r|    ## return as array (or keep hash) - why? why not?
    r[0] <=> l[0]
  end
end

#match_by_season_dir(format: 'txt') ⇒ Object

todo/check: rename/change to match_by_dir - why? why not?

still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?


278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/sportdb/formats/package.rb', line 278

def match_by_season_dir( format: 'txt' )
  ##
  ## [["1950s/1956-57",
  ##    ["1950s/1956-57/1-division1.csv",
  ##     "1950s/1956-57/2-division2.csv",
  ##     "1950s/1956-57/3a-division3n.csv",
  ##     "1950s/1956-57/3b-division3s.csv"]],
  ##   ...]

  h = {}
  match( format: format ).each do |entry|
    season_path = File.dirname( entry.name )

    h[ season_path ] ||= []
    h[ season_path ] << entry
  end

  ##  todo/fix:  - add sort entries by name - why? why not?
  ## note: assume 1-,2- etc. gets us back sorted leagues
  ##  - use sort. (will not sort by default?)

  h.to_a    ## return as array (or keep hash) - why? why not?
end