Class: UnicodedataRb::GenerateIndex

Inherits:
Object
  • Object
show all
Defined in:
lib/unicodedata_rb/generate_index.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(logger: Logger.new(STDOUT)) ⇒ GenerateIndex

Returns a new instance of GenerateIndex.



16
17
18
# File 'lib/unicodedata_rb/generate_index.rb', line 16

def initialize(logger: Logger.new(STDOUT))
  @logger = logger
end

Instance Attribute Details

#loggerObject (readonly)

Returns the value of attribute logger.



15
16
17
# File 'lib/unicodedata_rb/generate_index.rb', line 15

def logger
  @logger
end

Class Method Details

.callObject



11
12
13
# File 'lib/unicodedata_rb/generate_index.rb', line 11

def self.call(...)
  new(...).call
end

Instance Method Details

#callObject



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/unicodedata_rb/generate_index.rb', line 21

def call
  download_file("#{unicodedata_url_prefix}UnicodeData.txt", UnicodedataRb::Constants::UNICODEDATA_TXT_PATH)

  # Format of UnicodeData.txt: https://www.unicode.org/L2/L1999/UnicodeData.html
  File.open(UnicodedataRb::Constants::UNICODEDATA_TXT_PATH) do |f|
    codepoint_index = {}
    name_index = {}

    f.each_line do |line|
      start_line_pos = f.pos - line.size
      codepoint = UnicodedataRb::Codepoint.from_line(line)
      codepoint_index[codepoint.codepoint] = start_line_pos
      name_index[codepoint.name] = start_line_pos
    end

    index = {
      codepoint: codepoint_index,
      name: name_index
    }
    File.open(UnicodedataRb::Constants::UNICODEDATA_INDEX_PATH, 'wb') { |f| Marshal.dump(index, f) }
  end
end

#download_file(url, save_path) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/unicodedata_rb/generate_index.rb', line 44

def download_file(url, save_path)
  logger.info("Downloading #{url}")
  uri = URI(url)

  Net::HTTP.start(uri.host, :use_ssl => true) do |http|
    request = Net::HTTP::Get.new uri

    http.request request do |response|
      open save_path, "w:UTF-8" do |io|
        response.read_body do |chunk|
          io.write chunk
        end
      end
    end
  end
end

#unicodedata_url_prefixObject



61
62
63
# File 'lib/unicodedata_rb/generate_index.rb', line 61

def unicodedata_url_prefix
  @unicodedata_url_prefix ||= "https://unicode.org/Public/#{RbConfig::CONFIG["UNICODE_VERSION"]}/ucd/"
end