Class: SDL4R::TZAbbreviationDB

Inherits:
Object
  • Object
show all
Defined in:
lib/sdl4r/tz_abbreviation_db.rb

Overview

Gathers an index database of time zone abbreviations (e.g. “PST”, “JST”).

For each abbreviation, there are 3 basic cases:

  • no ambiguity: the abbreviation is always used with the same offsets (even if in different

countries).
  • time ambiguity: the abbreviation has been used in the same places but with different offsets

at different times. In this case, we base the timezone on one of the geographical time zones.
  • modern time ambiguity: the abbreviation has been used in different places but only before

1970. It's been stable since. As it is the case of widely used abbreviations like "CET", a
modernly-used geographical timezone should be used as for the "time ambiguity" case, at least
by default.
  • location ambiguity: the abbreviation has been used in different places. In this case, there

is no way to tell, which place is the right one and therefore an error should be raised.

Note that ‘utc_offset’ and ‘std_offset’ of Record are meaningful only if the abbreviation is not ambiguous.

Defined Under Namespace

Classes: Record

Constant Summary collapse

DB_FILENAME =
File.dirname(__FILE__) + "/tz_abbreviation_db.csv"
@@index =
nil
@@index_mutex =
Mutex.new

Class Method Summary collapse

Class Method Details

.clean_raw_record_index(raw_index) ⇒ Object



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 140

def self.clean_raw_record_index(raw_index)
  index = {}

  # Clean up and annotate the ambiguous cases
  raw_index.each_pair { |identifier, records|
    if records.length == 1
      record = records[0]
      record.annotation = :not_ambiguous

    else
      # check whether locations differ
      record = create_ambiguous_record(records, nil, :time_ambiguous)
      if record.annotation == :too_ambiguous
        salvaged_record = create_ambiguous_record(records, :modern, :modern_time_ambiguous)
        record = salvaged_record if salvaged_record
      end
    end

    index[record.identifier] = record
  }
  
  return index
end

.create_ambiguous_record(records, use, default_annotation) ⇒ Object

Check the locations of the records and create a corresonding record annotated as ambiguous according to its level: either ‘default_annotation’ or :too_ambiguous if the locations where the abbreviation is used differ along time (for the given ‘use’).

_use_:: indicates the only kind of use considered or nil for all of them
default_annotation

annotation to set if locations do not differ



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 171

def self.create_ambiguous_record(records, use, default_annotation)
  records = records.reject { |item| item.use != use } if use
  return nil if records.empty?

  record = records[0]
  record.annotation = default_annotation

  locations = nil
  records.each { |item|
    if locations
      record.annotation = :too_ambiguous if locations != item.linked_zone_ids
    else
      locations = item.linked_zone_ids
    end
  }

  linked_zone_ids = {}
  records.each { |item|
    item.linked_zone_ids.each { |loc|
      linked_zone_ids[loc] = nil
    }
  }
  record.linked_zone_ids = linked_zone_ids.keys.sort

  return record
end

.generate_fileObject

Creates a CSV index file of abbreviations and their corresponding offsets and unambiguous corresponding zones.

This method relies on unpublished internals of TZInfo. Therefore, it might easily break in the future.



204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 204

def self.generate_file
  abbreviation_index = {}

  TZInfo::Timezone.all_data_zone_identifiers.each { |tz_id|
    tz = TZInfo::Timezone.get(tz_id)
    info = tz.instance_variable_get(:@info)
    offsets = info.instance_variable_get(:@offsets)
    transitions = info.instance_variable_get(:@transitions)

    if offsets
      offsets.each_value { |offset|
        if tz.identifier != offset.abbreviation.id2name
          offset_key = [offset.abbreviation.id2name, offset.utc_offset, offset.std_offset]

          abbreviation_index[offset_key] ||= OpenStruct.new(:timezones => [], :use => :historical)
          offset_record = abbreviation_index[offset_key]

          unless offset_record.timezones.include? tz.identifier
            offset_record.timezones << tz.identifier
          end

          # Find the last use of that offset
          previous_transition = nil
          (transitions.length - 1).downto(0) { |i|
            transition = transitions[i]
            if offset == transition.offset
              if previous_transition.nil? or previous_transition.at.year >= 1970
                offset_record.use = :modern
              end
              break
            end
            previous_transition = transition
          }
        end
      }
    end
  }

  # Create a raw index of Records keyed by identifiers
  raw_record_index = {}
  abbreviation_index.each_pair { |offset, item|
    record = Record.new(offset[0], offset[1], offset[2], nil, item.use, item.timezones)
    raw_record_index[record.identifier] ||= []
    raw_record_index[record.identifier] << record
  }

  record_index = clean_raw_record_index(raw_record_index)

  sorted = record_index.sort { |a, b| a[0] <=> b[0] }
  CSV::open(DB_FILENAME, "w") do |writer|
    sorted.each { |offset, record|
      writer << [
        record.identifier, record.utc_offset, record.std_offset, record.annotation, record.use] +
        record.linked_zone_ids
    }
  end
end

.get_record(identifier) ⇒ Object

Returns the Record corresponding to the specified identifier or nil if not found. Be sure to check the ‘annotation’ property before using the data of the record.



121
122
123
124
125
126
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 121

def self.get_record(identifier)
  @@index_mutex.synchronize do
    load_file unless @@index
  end
  return @@index[identifier]
end

.get_timezone(identifier, consider_modern_abbreviations = true) ⇒ Object



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 83

def self.get_timezone(identifier, consider_modern_abbreviations = true)
  tz = nil

  begin
    tz = TZInfo::Timezone.get(identifier)
    
  rescue TZInfo::InvalidTimezoneIdentifier => error1
    # check whether we can find an abbreviation
    abbreviation = TZAbbreviationDB.get_record(identifier)
    if abbreviation
      if abbreviation.annotation == :not_ambiguous
        tz = ConstantTimezone.new(
          abbreviation.identifier, abbreviation.utc_offset, abbreviation.std_offset)

      elsif abbreviation.annotation == :time_ambiguous or
          (abbreviation.annotation == :modern_time_ambiguous and consider_modern_abbreviations)
        begin
          base_tz = TZInfo::Timezone.get(abbreviation.linked_zone_ids[0])
          tz = RelativeTimezone.new(abbreviation.identifier, "", 0, base_tz)
        rescue TZInfo::InvalidTimezoneIdentifier
          tz = nil
        end
      end
    end

    raise error1 if tz.nil?
  end

  tz
end

.get_timezone_proxy(identifier, consider_modern_abbreviations = true) ⇒ Object



114
115
116
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 114

def self.get_timezone_proxy(identifier, consider_modern_abbreviations = true)
  return AbbreviationTimezoneProxy.new(identifier, consider_modern_abbreviations)
end

.load_fileObject

Loads the CSV file (#DB_FILENAME) into memory.



130
131
132
133
134
135
136
137
138
# File 'lib/sdl4r/tz_abbreviation_db.rb', line 130

def self.load_file
  index = {}
  CSV.foreach(DB_FILENAME) do |row|
    record = Record.new(
      row[0], row[1].to_i, row[2].to_i, row[3].to_sym, row[4].to_sym, row[5..-1].sort!)
    index[record.identifier] = record
  end
  @@index = index
end