Class: TanukiEmoji::Db::EmojiTestParser

Inherits:
Object
  • Object
show all
Defined in:
lib/tanuki_emoji/db/emoji_test_parser.rb

Overview

Reads and extract content from emoji-test.txt

Constant Summary collapse

DATA_FILE =
"#{::TanukiEmoji::Db::UNICODE_DATA_DIR}/emoji-test.txt".freeze
EMOJI_UNICODE_VERSION =
{
  '0.6' => '6.0',
  '0.7' => '7.0',
  '1.0' => '8.0',
  '2.0' => '8.0',
  '3.0' => '9.0',
  '4.0' => '9.0',
  '5.0' => '10.0'
}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(index:, data_file: self.class.data_file) ⇒ EmojiTestParser

Returns a new instance of EmojiTestParser.



36
37
38
39
40
41
42
43
44
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 36

def initialize(index:, data_file: self.class.data_file)
  @data_file = data_file
  @index = index
  @group_category = nil

  return if I18n.available_locales.include?(:en)

  I18n.available_locales = I18n.available_locales + [:en]
end

Instance Attribute Details

#data_fileObject (readonly)

Returns the value of attribute data_file.



33
34
35
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 33

def data_file
  @data_file
end

#group_categoryObject

Returns the value of attribute group_category.



34
35
36
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 34

def group_category
  @group_category
end

Class Method Details

.data_filePathname

Return the path to the default data file (emoji-data.txt)

Returns:

  • (Pathname)

    path to the default data file



29
30
31
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 29

def self.data_file
  ::Pathname.new(File.expand_path(File.join(__dir__, '../../../', DATA_FILE)))
end

Instance Method Details

#dataArray<EmojiData>

Return the parsed data from the data file

Returns:

  • (Array<EmojiData>)

    collection of EmojiData



49
50
51
52
53
54
55
56
57
58
59
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 49

def data
  parsed = []

  load do |line|
    parse_line(line).tap do |result|
      parsed << result unless result.nil?
    end
  end

  parsed
end

#load!Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 70

def load!
  alpha_code = nil

  data.each do |emoji_data|
    I18n.with_locale(:en) do
      alpha_code = I18n.transliterate(emoji_data.description)
        .gsub(/[^a-zA-Z#*\d]+/, '_')
        .downcase
        .chomp('_')
    end

    alpha_code = 'keycap_asterisk' if alpha_code == 'keycap_*'
    alpha_code = 'keycap_hash' if alpha_code == 'keycap_#'

    # This might be a different qualified version, basically same emoji but slightly different
    # code point. Search on the alpha code and pull that. If found, add as alternate code point.
    # "smiling face" is one example.
    emoji = @index.find_by_alpha_code(alpha_code)

    if emoji
      emoji.add_codepoints(emoji_data.codepoints)

      @index.update(emoji)
    else
      # not found, add a new emoji
      emoji = Character.new(alpha_code,
        codepoints: emoji_data.codepoints,
        alpha_code: alpha_code,
        description: emoji_data.description,
        category: emoji_data.group_category)

      emoji.unicode_version = emoji_data.version

      @index.add(emoji)
    end
  end
end

#raw_dataObject



61
62
63
64
65
66
67
68
# File 'lib/tanuki_emoji/db/emoji_test_parser.rb', line 61

def raw_data
  lines = []
  load do |line|
    lines << line
  end

  lines
end