Module: TextUtils::TitleTable

Included in:
TextUtils
Defined in:
lib/textutils/title.rb

Instance Method Summary collapse

Instance Method Details

#build_title_table_for(records) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/textutils/title.rb', line 22

def build_title_table_for( records )
    ## build known tracks table w/ synonyms e.g.
    #
    # [[ 'wolfsbrug', [ 'VfL Wolfsburg' ]],
    #  [ 'augsburg',  [ 'FC Augsburg', 'Augi2', 'Augi3' ]],
    #  [ 'stuttgart', [ 'VfB Stuttgart' ]] ]

    known_titles = []

    records.each_with_index do |rec,index|

      title_candidates = []
      title_candidates << rec.title

      title_candidates += rec.synonyms.split('|') if rec.synonyms.present?


      ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
      #  make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan

      titles = []
      title_candidates.each do |t|
        titles << t
        if t =~ /\(.+\)/
          extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
          extra_title.strip!   # strip leading n trailing withspaces too!
          titles << extra_title
        end
      end


      ## NB: sort here by length (largest goes first - best match)
      #  exclude code and key (key should always go last)
      titles = titles.sort { |left,right| right.length <=> left.length }
      
      ## escape for regex plus allow subs for special chars/accents
      titles = titles.map { |title| TextUtils.title_esc_regex( title )  }

      ## NB: only include code field - if defined
      titles << rec.code          if rec.respond_to?(:code) && rec.code.present?

      known_titles << [ rec.key, titles ]

      ### fix: use plain logger
      LogUtils::Logger.root.debug "  #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
    end

    known_titles
end

#find_key_for!(name, line) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/textutils/title.rb', line 74

def find_key_for!( name, line )
  regex = /@@oo([^@]+?)oo@@/     # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])

  upcase_name   = name.upcase
  downcase_name = name.downcase

  if line =~ regex
    value = "#{$1}"
    ### fix: use plain logger
    LogUtils::Logger.root.debug "   #{downcase_name}: >#{value}<"
      
    line.sub!( regex, "[#{upcase_name}]" )

    return $1
  else
    return nil
  end
end

#find_keys_for!(name, line) ⇒ Object

NB: keys (plural!) - will return array



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/textutils/title.rb', line 94

def find_keys_for!( name, line )  # NB: keys (plural!) - will return array
  counter = 1
  keys = []

  downcase_name = name.downcase

  key = find_key_for!( "#{downcase_name}#{counter}", line )
  while key.present?
    keys << key
    counter += 1
    key = find_key_for!( "#{downcase_name}#{counter}", line )
  end

  keys
end

#map_title_worker_for!(name, line, key, values) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/textutils/title.rb', line 120

def map_title_worker_for!( name, line, key, values )

  downcase_name = name.downcase

  values.each do |value|
    ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
    ## (thus add it, allows match for Benfica Lis.  for example - note . at the end)

    ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
    regex = /\b#{value}(\b| |\t|$)/   # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker) 
    if line =~ regex
      ### fix: use plain logger
      LogUtils::Logger.root.debug "     match for #{downcase_name}  >#{key}< >#{value}<"
      # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
      line.sub!( regex, "@@oo#{key}oo@@ " )    # NB: add one space char at end
      return true    # break out after first match (do NOT continue)
    end
  end
  return false
end

#map_titles_for!(name, line, title_table) ⇒ Object



111
112
113
114
115
116
117
# File 'lib/textutils/title.rb', line 111

def map_titles_for!( name, line, title_table )
  title_table.each do |rec|
    key    = rec[0]
    values = rec[1]
    map_title_worker_for!( name, line, key, values )
  end
end