Module: TextUtils::TitleTable
- Included in:
- TextUtils
- Defined in:
- lib/sportdb/title.rb
Instance Method Summary collapse
- #build_title_table_for(records) ⇒ Object
- #find_key_for!(name, line) ⇒ Object
-
#find_keys_for!(name, line) ⇒ Object
NB: keys (plural!) - will return array.
- #map_title_worker_for!(name, line, key, values) ⇒ Object
- #map_titles_for!(name, line, title_table) ⇒ Object
Instance Method Details
#build_title_table_for(records) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/sportdb/title.rb', line 16 def build_title_table_for( records ) ## build known tracks table w/ synonyms e.g. # # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]], # [ 'augsburg', [ 'FC Augsburg', 'Augi2', 'Augi3' ]], # [ 'stuttgart', [ 'VfB Stuttgart' ]] ] known_titles = [] records.each_with_index do |rec,index| title_candidates = [] title_candidates << rec.title title_candidates += rec.synonyms.split('|') if rec.synonyms.present? ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit) # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan titles = [] title_candidates.each do |t| titles << t if t =~ /\(.+\)/ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles extra_title.strip! # strip leading n trailing withspaces too! titles << extra_title end end ## NB: sort here by length (largest goes first - best match) # exclude code and key (key should always go last) titles = titles.sort { |left,right| right.length <=> left.length } ## escape for regex plus allow subs for special chars/accents titles = titles.map { |title| TextUtils.title_esc_regex( title ) } ## NB: only include code field - if defined titles << rec.code if rec.respond_to?(:code) && rec.code.present? known_titles << [ rec.key, titles ] ### fix: use plain logger LogUtils::Logger.root.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<" end known_titles end |
#find_key_for!(name, line) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/sportdb/title.rb', line 68 def find_key_for!( name, line ) regex = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@]) upcase_name = name.upcase downcase_name = name.downcase if line =~ regex value = "#{$1}" ### fix: use plain logger LogUtils::Logger.root.debug " #{downcase_name}: >#{value}<" line.sub!( regex, "[#{upcase_name}]" ) return $1 else return nil end end |
#find_keys_for!(name, line) ⇒ Object
NB: keys (plural!) - will return array
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/sportdb/title.rb', line 88 def find_keys_for!( name, line ) # NB: keys (plural!) - will return array counter = 1 keys = [] downcase_name = name.downcase key = find_key_for!( "#{downcase_name}#{counter}", line ) while key.present? keys << key counter += 1 key = find_key_for!( "#{downcase_name}#{counter}", line ) end keys end |
#map_title_worker_for!(name, line, key, values) ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/sportdb/title.rb', line 114 def map_title_worker_for!( name, line, key, values ) downcase_name = name.downcase values.each do |value| ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9) ## (thus add it, allows match for Benfica Lis. for example - note . at the end) ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$ regex = /\b#{value}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) if line =~ regex ### fix: use plain logger LogUtils::Logger.root.debug " match for #{downcase_name} >#{key}< >#{value}<" # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc. line.sub!( regex, "@@oo#{key}oo@@ " ) # NB: add one space char at end return true # break out after first match (do NOT continue) end end return false end |
#map_titles_for!(name, line, title_table) ⇒ Object
105 106 107 108 109 110 111 |
# File 'lib/sportdb/title.rb', line 105 def map_titles_for!( name, line, title_table ) title_table.each do |rec| key = rec[0] values = rec[1] map_title_worker_for!( name, line, key, values ) end end |