Class: SportDb::Package

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/formats/package.rb

Direct Known Subclasses

DirPackage, ZipPackage

Constant Summary collapse

CONF_RE =

todo/fix: make all regexes case-insensitive with /i option - why? why not?

             e.g. .TXT and .txt
yes!! use /i option!!!!!
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
    \.conf\.txt$
}x
LEAGUES_RE =

leagues.txt or leagues_en.txt

remove support for en.leagues.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.leagues.txt
    leagues
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
SEASONS_RE =

seasons.txt or seasons_en.txt

remove support for br.seasons.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.seasons.txt
    seasons
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
GROUNDS_RE =

de.stadiums.txt or stadiums.txt or stadiums_de.txt

%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.txt
    stadiums
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
PLAYERS_RE =
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.txt
    players
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
CLUBS_RE =

clubs.txt or clubs_en.txt

remove support for en.clubs.txt - why? why not?
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
   (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.txt$
}x
CLUBS_WIKI_RE =
%r{  (?:^|/)               # beginning (^) or beginning of path (/)
    (?:[a-z]{1,4}\.)?   # optional country code/key e.g. eng.clubs.wiki.txt
   clubs
     (?:_[a-z0-9_-]+)?
   \.wiki\.txt$
}x
CLUBS_PROPS_RE =

todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?

%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.props.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.props\.txt$
}x
CLUB_PROPS_RE =

add alias for now (fix later - why? why not?)

CLUBS_PROPS_RE
CLUBS_HISTORY_RE =
%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
  (?: [a-z]{1,4}\. )?   # optional country code/key e.g. eng.clubs.history.txt
    clubs
      (?:_[a-z0-9_-]+)?
    \.history\.txt$
}x
TEAMS_RE =

teams.txt or teams_history.txt

%r{  (?: ^|/ )               # beginning (^) or beginning of path (/)
                  teams
                    (?:_[a-z0-9_-]+)?
                  \.txt$
}x
SEASON_RE =

todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!! season folder:

          e.g. /2019-20   or
year-only e.g. /2019      or
               /2016--france
%r{ (?:
     \d{4}-\d{2}
   | \d{4}(--[a-z0-9_-]+)?
  )
}x
SEASON =

“inline” helper for embedding in other regexes - keep? why? why not?

SEASON_RE.source
MATCH_RE =

note: if pattern includes directory add here

(otherwise move to more "generic" datafile) - why? why not?
%r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
       #{SEASON}
     /[a-z0-9_-]+\.txt$  ## txt e.g /1-premierleague.txt
}x
MATCH_CSV_RE =
%r{ (?: ^|/ )      # beginning (^) or beginning of path (/)
     #{SEASON}
   /[a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
}x
CSV_RE =

add “generic” pattern to find all csv datafiles

%r{ (?: ^|/ )
    [a-z0-9_.-]+\.csv$  ## note: allow dot (.) too e.g /eng.1.csv
}x

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path_or_pack) ⇒ Package

Returns a new instance of Package.



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/sportdb/formats/package.rb', line 233

def initialize( path_or_pack )
  @include = nil
  @exclude = nil

  if path_or_pack.is_a?( Datafile::Package )
    @pack = path_or_pack
  else   ## assume it's a (string) path
    path = path_or_pack
    if !File.exist?( path )  ## file or directory
      puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
      exit 1
    end

    if File.directory?( path )
      @pack = Datafile::DirPackage.new( path )     ## delegate to "generic" package
    elsif File.file?( path ) && File.extname( path ) == '.zip'  # note: includes dot (.) eg .zip
      @pack = Datafile::ZipPackage.new( path )
    else
      puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
      exit 1
    end
  end
end

Instance Attribute Details

#excludeObject

attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?



202
203
204
# File 'lib/sportdb/formats/package.rb', line 202

def exclude
  @exclude
end

#includeObject

attr_reader :pack ## allow access to embedded (“low-level”) delegate package (or hide!?) - why? why not?



202
203
204
# File 'lib/sportdb/formats/package.rb', line 202

def include
  @include
end

Class Method Details

.find(path, pattern) ⇒ Object

move class-level “static” finders to DirPackage (do NOT work for now for zip packages) - why? why not?



123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/sportdb/formats/package.rb', line 123

def self.find( path, pattern )
   datafiles = []

   ## check all txt files
   ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
   candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
   pp candidates
   candidates.each do |candidate|
     datafiles << candidate    if pattern.match( candidate )
   end

   pp datafiles
   datafiles
end

.find_clubs(path, pattern: CLUBS_RE) ⇒ Object



142
# File 'lib/sportdb/formats/package.rb', line 142

def self.find_clubs( path, pattern: CLUBS_RE )                 find( path, pattern ); end

.find_clubs_history(path, pattern: CLUBS_HISTORY_RE) ⇒ Object



144
# File 'lib/sportdb/formats/package.rb', line 144

def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end

.find_clubs_wiki(path, pattern: CLUBS_WIKI_RE) ⇒ Object



143
# File 'lib/sportdb/formats/package.rb', line 143

def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )       find( path, pattern ); end

.find_conf(path, pattern: CONF_RE) ⇒ Object



158
# File 'lib/sportdb/formats/package.rb', line 158

def self.find_conf( path, pattern: CONF_RE )  find( path, pattern ); end

.find_leagues(path, pattern: LEAGUES_RE) ⇒ Object



151
# File 'lib/sportdb/formats/package.rb', line 151

def self.find_leagues( path, pattern: LEAGUES_RE )  find( path, pattern ); end

.find_match(path, format: 'txt') ⇒ Object



161
162
163
164
165
166
167
# File 'lib/sportdb/formats/package.rb', line 161

def self.find_match( path, format: 'txt' )
   if format == 'csv'
     find( path, MATCH_CSV_RE )
   else  ## otherwise always assume txt for now
     find( path, MATCH_RE )
   end
end

.find_seasons(path, pattern: SEASONS_RE) ⇒ Object



154
# File 'lib/sportdb/formats/package.rb', line 154

def self.find_seasons( path, pattern: SEASONS_RE )  find( path, pattern ); end

.find_teams(path, pattern: TEAMS_RE) ⇒ Object



139
# File 'lib/sportdb/formats/package.rb', line 139

def self.find_teams( path, pattern: TEAMS_RE )  find( path, pattern ); end

.match_clubs(path) ⇒ Object Also known as: match_clubs?, clubs?



146
# File 'lib/sportdb/formats/package.rb', line 146

def self.match_clubs( path )         CLUBS_RE.match( path ); end

.match_clubs_history(path) ⇒ Object Also known as: match_clubs_history?, clubs_history?



148
# File 'lib/sportdb/formats/package.rb', line 148

def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end

.match_clubs_props(path, pattern: CLUBS_PROPS_RE) ⇒ Object Also known as: match_club_props, match_club_props?, club_props?, match_clubs_props?, clubs_props?



149
# File 'lib/sportdb/formats/package.rb', line 149

def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end

.match_clubs_wiki(path) ⇒ Object Also known as: match_clubs_wiki?, clubs_wiki?



147
# File 'lib/sportdb/formats/package.rb', line 147

def self.match_clubs_wiki( path )    CLUBS_WIKI_RE.match( path ); end

.match_conf(path) ⇒ Object Also known as: match_conf?, conf?



159
# File 'lib/sportdb/formats/package.rb', line 159

def self.match_conf( path )  CONF_RE.match( path ); end

.match_leagues(path) ⇒ Object Also known as: match_leagues?, leagues?



152
# File 'lib/sportdb/formats/package.rb', line 152

def self.match_leagues( path )  LEAGUES_RE.match( path ); end

.match_seasons(path) ⇒ Object Also known as: match_seasons?, seasons?



155
# File 'lib/sportdb/formats/package.rb', line 155

def self.match_seasons( path )  SEASONS_RE.match( path ); end

.match_teams(path) ⇒ Object Also known as: match_teams?, teams?



140
# File 'lib/sportdb/formats/package.rb', line 140

def self.match_teams( path )  TEAMS_RE.match( path ); end

Instance Method Details

#each(pattern:, &blk) ⇒ Object



258
259
260
261
262
263
# File 'lib/sportdb/formats/package.rb', line 258

def each( pattern:, &blk )
  @pack.each( pattern: pattern ) do |entry|
    next unless filter( entry )   ## lets you use include/exclude filters
    blk.call( entry )
  end
end

#each_club_props(&blk) ⇒ Object



276
# File 'lib/sportdb/formats/package.rb', line 276

def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end

#each_clubs(&blk) ⇒ Object



279
# File 'lib/sportdb/formats/package.rb', line 279

def each_clubs( &blk )      each( pattern: CLUBS_RE, &blk ); end

#each_clubs_history(&blk) ⇒ Object



281
# File 'lib/sportdb/formats/package.rb', line 281

def each_clubs_history( &blk )  each( pattern: CLUBS_HISTORY_RE, &blk ); end

#each_clubs_wiki(&blk) ⇒ Object



280
# File 'lib/sportdb/formats/package.rb', line 280

def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end

#each_conf(&blk) ⇒ Object



265
# File 'lib/sportdb/formats/package.rb', line 265

def each_conf( &blk )       each( pattern: CONF_RE, &blk ); end

#each_csv(&blk) ⇒ Object



274
# File 'lib/sportdb/formats/package.rb', line 274

def each_csv( &blk )        each( pattern: CSV_RE, &blk );       end

#each_grounds(&blk) ⇒ Object



286
# File 'lib/sportdb/formats/package.rb', line 286

def each_grounds( &blk )    each( pattern: GROUNDS_RE, &blk ); end

#each_leagues(&blk) ⇒ Object



278
# File 'lib/sportdb/formats/package.rb', line 278

def each_leagues( &blk )    each( pattern: LEAGUES_RE, &blk ); end

#each_match(format: 'txt', &blk) ⇒ Object



266
267
268
269
270
271
272
# File 'lib/sportdb/formats/package.rb', line 266

def each_match( format: 'txt', &blk )
  if format == 'csv'
     each( pattern: MATCH_CSV_RE, &blk );
  else
     each( pattern: MATCH_RE, &blk );
  end
end

#each_match_csv(&blk) ⇒ Object



273
# File 'lib/sportdb/formats/package.rb', line 273

def each_match_csv( &blk )  each( pattern: MATCH_CSV_RE, &blk ); end

#each_players(&blk) ⇒ Object



287
# File 'lib/sportdb/formats/package.rb', line 287

def each_players( &blk )    each( pattern: PLAYERS_RE, &blk ); end

#each_seasons(&blk) ⇒ Object



283
# File 'lib/sportdb/formats/package.rb', line 283

def each_seasons( &blk )    each( pattern: SEASONS_RE, &blk ); end

#filter(entry) ⇒ Object



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# File 'lib/sportdb/formats/package.rb', line 216

def filter( entry )
  if @include
    if filter_clause( @include, entry )   ## todo/check: is include a reserved keyword????
      true  ## todo/check: check for exclude here too - why? why not?
    else
      false
    end
  else
    if @exclude && filter_clause( @exclude, entry )
      false
    else
      true
    end
  end
end

#filter_clause(filter, entry) ⇒ Object

private helpers - like select returns true for keeping and false for skipping entry



205
206
207
208
209
210
211
212
213
214
# File 'lib/sportdb/formats/package.rb', line 205

def filter_clause( filter, entry )
  if filter.is_a?( String )
    entry.name.index( filter ) ? true : false
  elsif filter.is_a?( Regexp )
    filter.match( entry.name )  ? true : false
  else  ## assume
    ## todo/check: pass in entry (and NOT entry.name) - why? why not?
    filter.call( entry )
  end
end

#match(format: 'txt') ⇒ Object Also known as: matches

return all match datafile entries



290
291
292
# File 'lib/sportdb/formats/package.rb', line 290

def match( format: 'txt' )
  ary=[]; each_match( format: format ) {|entry| ary << entry  }; ary;
end

#match_by_season(format: 'txt', start: nil) ⇒ Object

method match_by_season_dir



322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/sportdb/formats/package.rb', line 322

def match_by_season( format: 'txt', start: nil )   ## change/rename to by_season_key - why? why not?

  ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?

  ##  note: fold all sames seasons (even if in different directories)
  ##     into same datafile list e.g.
  ##   ["1957/58",
  ##     ["1950s/1957-58/1-division1.csv",
  ##      "1950s/1957-58/2-division2.csv",
  ##      "1950s/1957-58/3a-division3n.csv",
  ##      "1950s/1957-58/3b-division3s.csv"]],
  ## and
  ##   ["1957/58",
  ##      ["archives/1950s/1957-58/1-division1.csv",
  ##       "archives/1950s/1957-58/2-division2.csv",
  ##       "archives/1950s/1957-58/3a-division3n.csv",
  ##       "archives/1950s/1957-58/3b-division3s.csv"]],
  ##  should be together - why? why not?

  ####
  # Example package:
  # [["2012/13", ["2012-13/1-proleague.csv"]],
  #  ["2013/14", ["2013-14/1-proleague.csv"]],
  #  ["2014/15", ["2014-15/1-proleague.csv"]],
  #  ["2015/16", ["2015-16/1-proleague.csv"]],
  #  ["2016/17", ["2016-17/1-proleague.csv"]],
  #  ["2017/18", ["2017-18/1-proleague.csv"]]]

  ## todo/fix:  (re)use a more generic filter instead of start for start of season only

  ##  todo/fix: use a "generic" filter_season helper for easy reuse
  ##     filter_season( clause, season_key )
  ##   or better filter = SeasonFilter.new( clause )
  ##             filter.skip? filter.include? ( season_sason_key )?
  ##             fiteer.before?( season_key )  etc.
  ##              find some good method names!!!!
  season_start = start ? Season( start ) : nil

  h = {}
  match( format: format ).each do |entry|
    ## note: assume last directory in datafile path is the season part/key
    season_q = File.basename( File.dirname( entry.name ))
    season   = Season.parse( season_q )  ## normalize season

    ## skip if start season before this season
    next if season_start && season_start.start_year > season.start_year

    h[ season.key ] ||= []
    h[ season.key ] << entry
  end

  ##  todo/fix:  - add sort entries by name - why? why not?
  ## note: assume 1-,2- etc. gets us back sorted leagues
  ##  - use sort. (will not sort by default?)

  ## sort by season
  ##   latest / newest first (and oldest last)

  h.to_a.sort do |l,r|    ## return as array (or keep hash) - why? why not?
    r[0] <=> l[0]
  end
end

#match_by_season_dir(format: 'txt') ⇒ Object

todo/check: rename/change to match_by_dir - why? why not?

still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?


298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/sportdb/formats/package.rb', line 298

def match_by_season_dir( format: 'txt' )
  ##
  ## [["1950s/1956-57",
  ##    ["1950s/1956-57/1-division1.csv",
  ##     "1950s/1956-57/2-division2.csv",
  ##     "1950s/1956-57/3a-division3n.csv",
  ##     "1950s/1956-57/3b-division3s.csv"]],
  ##   ...]

  h = {}
  match( format: format ).each do |entry|
    season_path = File.dirname( entry.name )

    h[ season_path ] ||= []
    h[ season_path ] << entry
  end

  ##  todo/fix:  - add sort entries by name - why? why not?
  ## note: assume 1-,2- etc. gets us back sorted leagues
  ##  - use sort. (will not sort by default?)

  h.to_a    ## return as array (or keep hash) - why? why not?
end