Class: NHKore::Word

Inherits:
Object
  • Object
show all
Defined in:
lib/nhkore/word.rb

Overview

Author:

  • Jonathan Bradley Whited

Since:

  • 0.1.0

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(defn: nil, eng: nil, freq: 1, kana: nil, kanji: nil, unknown: nil, word: nil, **kargs) ⇒ Word

Returns a new instance of Word.

Raises:

  • (ArgumentError)

Since:

  • 0.1.0



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/nhkore/word.rb', line 31

def initialize(defn: nil,eng: nil,freq: 1,kana: nil,kanji: nil,unknown: nil,word: nil,**kargs)
  super()

  if !word.nil?
    defn = word.defn if defn.nil?
    eng = word.eng if eng.nil?
    freq = word.freq if freq.nil?
    kana = word.kana if kana.nil?
    kanji = word.kanji if kanji.nil?
  end

  raise ArgumentError,"freq[#{freq}] cannot be < 1" if freq < 1

  if !unknown.nil?
    # kanji?() only tests if it contains kanji, so don't use kana?().
    if Util.kanji?(unknown)
      if !Util.empty_web_str?(kanji)
        raise ArgumentError,"unknown[#{unknown}] will overwrite kanji[#{kanji}]"
      end

      kanji = unknown
    else
      if !Util.empty_web_str?(kana)
        raise ArgumentError,"unknown[#{unknown}] will overwrite kana[#{kana}]"
      end

      kana = unknown
    end
  end

  kana = nil if Util.empty_web_str?(kana)
  kanji = nil if Util.empty_web_str?(kanji)

  raise ArgumentError,'kanji and kana cannot both be empty' if kana.nil? && kanji.nil?

  @defn = defn
  @eng = eng
  @freq = freq
  @kana = kana
  @kanji = kanji
  @key = "#{kanji}=#{kana}" # nil.to_s() is ''
end

Instance Attribute Details

#defnObject

Since:

  • 0.1.0



24
25
26
# File 'lib/nhkore/word.rb', line 24

def defn
  @defn
end

#engObject

Since:

  • 0.1.0



25
26
27
# File 'lib/nhkore/word.rb', line 25

def eng
  @eng
end

#freqObject

Since:

  • 0.1.0



26
27
28
# File 'lib/nhkore/word.rb', line 26

def freq
  @freq
end

#kanaObject (readonly)

Since:

  • 0.1.0



27
28
29
# File 'lib/nhkore/word.rb', line 27

def kana
  @kana
end

#kanjiObject (readonly)

Since:

  • 0.1.0



28
29
30
# File 'lib/nhkore/word.rb', line 28

def kanji
  @kanji
end

#keyObject (readonly)

Since:

  • 0.1.0



29
30
31
# File 'lib/nhkore/word.rb', line 29

def key
  @key
end

Class Method Details

.load_data(key, hash) ⇒ Object

Since:

  • 0.1.0



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/nhkore/word.rb', line 85

def self.load_data(key,hash)
  key = key.to_s # Change from a symbol

  word = Word.new(
    defn: hash[:defn],
    eng: hash[:eng],
    kana: hash[:kana],
    kanji: hash[:kanji]
  )

  if key != word.key
    raise ArgumentError,"the key from the hash[#{key}] does not match the generated key[#{word.key}]"
  end

  freq = hash[:freq].to_i # nil.to_i() is 0
  word.freq = freq if freq > 0

  return word
end

.scrape_ruby_tag(tag, missingno: nil, url: nil) ⇒ Array<Word>

Do not clean and/or strip spaces, as the raw text is important for Defn and ArticleScraper.

This originally only scraped 1 word, but multiple words were added after seeing this link for 産業能率大学, which is valid HTML:

https://www3.nhk.or.jp/news/easy/k10012759201000/k10012759201000.html

Returns:

Raises:

Since:

  • 0.1.0



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/nhkore/word.rb', line 113

def self.scrape_ruby_tag(tag,missingno: nil,url: nil)
  # First, try <rb> tags.
  kanjis = tag.css('rb')
  # Second, try text nodes.
  kanjis = tag.search('./text()') if kanjis.length < 1
  # Third, try non-<rt> tags, in case of being surrounded by <span>, <b>, etc.
  kanjis = tag.search("./*[not(name()='rt')]") if kanjis.length < 1

  kanas = tag.css('rt')

  raise ScrapeError,"no kanji at URL[#{url}] in tag[#{tag}]" if kanjis.length < 1
  raise ScrapeError,"no kana at URL[#{url}] in tag[#{tag}]" if kanas.length < 1

  if kanjis.length != kanas.length
    raise ScrapeError,"number of kanji & kana mismatch at URL[#{url}] in tag[#{tag}]"
  end

  words = []

  (0...kanjis.length).each do |i|
    kanji = kanjis[i].text
    kana = kanas[i].text

    # Uncomment for debugging; really need a logger.
    #puts "Word[#{i}]: #{kanji} => #{kana}"

    if !missingno.nil?
      # Check kana first, since this is the typical scenario.
      # - https://www3.nhk.or.jp/news/easy/k10012331311000/k10012331311000.html
      # - '窓' in '(8)窓を開けて外の空気を入れましょう'
      if Util.empty_web_str?(kana)
        kana = missingno.kana_from_kanji(kanji)

        if !Util.empty_web_str?(kana)
          Util.warn("using missingno for kana[#{kana}] from kanji[#{kanji}]")
        end
      elsif Util.empty_web_str?(kanji)
        kanji = missingno.kanji_from_kana(kana)

        if !Util.empty_web_str?(kanji)
          Util.warn("using missingno for kanji[#{kanji}] from kana[#{kana}]")
        end
      end
    end

    raise ScrapeError,"empty kanji at URL[#{url}] in tag[#{tag}]" if Util.empty_web_str?(kanji)
    raise ScrapeError,"empty kana at URL[#{url}] in tag[#{tag}]" if Util.empty_web_str?(kana)

    words << Word.new(kanji: kanji,kana: kana)
  end

  return words
end

.scrape_text_node(tag, url: nil) ⇒ Object

Do not clean and/or strip spaces, as the raw text is important for Defn and ArticleScraper.

Since:

  • 0.1.0



169
170
171
172
173
174
175
176
177
178
# File 'lib/nhkore/word.rb', line 169

def self.scrape_text_node(tag,url: nil)
  text = tag.text

  # No error; empty text is fine (not strictly kanji/kana only).
  return nil if Util.empty_web_str?(text)

  word = Word.new(unknown: text)

  return word
end

Instance Method Details

#encode_with(coder) ⇒ Object

Since:

  • 0.1.0



74
75
76
77
78
79
80
81
82
83
# File 'lib/nhkore/word.rb', line 74

def encode_with(coder)
  # Ignore @key because it will be the key in the YAML/Hash.
  # Order matters.

  coder[:kanji] = @kanji
  coder[:kana] = @kana
  coder[:freq] = @freq
  coder[:defn] = @defn
  coder[:eng] = @eng
end

#kanji?Boolean

Returns:

  • (Boolean)

Since:

  • 0.1.0



180
181
182
# File 'lib/nhkore/word.rb', line 180

def kanji?
  return !Util.empty_web_str?(@kanji)
end

#to_sObject

Since:

  • 0.1.0



188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/nhkore/word.rb', line 188

def to_s
  s = ''.dup

  s << "'#{@key}': "
  s << "{ kanji=>'#{@kanji}'"
  s << ", kana=>'#{@kana}'"
  s << ", freq=>#{@freq}"
  s << ", defn=>'#{@defn.to_s.gsub("\n",'\\n')}'"
  s << ", eng=>'#{@eng}'"
  s << ' }'

  return s
end

#wordObject

Since:

  • 0.1.0



184
185
186
# File 'lib/nhkore/word.rb', line 184

def word
  return kanji? ? @kanji : @kana
end