Class: JLDrill::Tanaka::Reference

Inherits:
DataFile
  • Object
show all
Defined in:
lib/jldrill/model/Tanaka.rb

Overview

Represents the Tanaka reference library

Constant Summary collapse

A_RE =
/^A:/
B_RE =
/^B: (.*)/
WORD_RE =
/([^{(\[~]*(\([^)]*\))?)/u

Instance Attribute Summary collapse

Attributes inherited from DataFile

#encoding, #file, #lines, #parsed, #publisher, #stepSize

Instance Method Summary collapse

Methods inherited from DataFile

#createLines, #eof?, #findEncoding, #fraction, #load, #loaded?, #parse, #parseChunk, #parser, #readLines, #reset, #setLoaded, #shortFilename

Constructor Details

#initializeReference

Returns a new instance of Reference.



115
116
117
118
119
120
# File 'lib/jldrill/model/Tanaka.rb', line 115

def initialize()
          super
          @sentences = 0
          @words = {}
          @stepSize = 1000
end

Instance Attribute Details

#wordsObject

Returns the value of attribute words.



108
109
110
# File 'lib/jldrill/model/Tanaka.rb', line 108

def words
  @words
end

Instance Method Details

#addWord(word, pos) ⇒ Object



130
131
132
133
134
# File 'lib/jldrill/model/Tanaka.rb', line 130

def addWord(word, pos)
    if WORD_RE.match(word)
        (@words[$1] ||= []).push(pos)
    end
end

#dataSizeObject



151
152
153
# File 'lib/jldrill/model/Tanaka.rb', line 151

def dataSize
    @sentences
end

#finishParsingObject

Don’t erase @lines because we need them later



187
188
189
# File 'lib/jldrill/model/Tanaka.rb', line 187

def finishParsing
    setLoaded(true)
end

#numSentencesObject



122
123
124
# File 'lib/jldrill/model/Tanaka.rb', line 122

def numSentences
    dataSize
end

#numWordsObject



126
127
128
# File 'lib/jldrill/model/Tanaka.rb', line 126

def numWords
    return @words.keys.size
end

#parseEntryObject



155
156
157
158
159
160
161
162
# File 'lib/jldrill/model/Tanaka.rb', line 155

def parseEntry
    if parseLines(@lines[@parsed], @lines[@parsed + 1], @parsed)
        @parsed += 2
        # As long as a single line gets parsed it is a success
    else
        @parsed += 1
    end
end

#parseLines(aLine, bLine, pos) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/jldrill/model/Tanaka.rb', line 136

def parseLines(aLine, bLine, pos)
    success = false
    if A_RE.match(aLine)
        if B_RE.match(bLine)
            @sentences += 1
            w = $1.split(' ')
            w.each do |word|
                addWord(word, pos)
            end
            success = true
        end
    end
    return success
end

#search(kanji, reading) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/jldrill/model/Tanaka.rb', line 164

def search(kanji, reading)
    word = nil
    if !kanji.nil?
        word = Word.create(kanji, reading).to_s
        connections = @words[word]
        if connections.nil?
            # The corpus only uses readings to disambiguate
            # kanji.  Most words don't have readings.  So
            # if we don't find anything, search again without
            # the reading.
            word = Word.create(kanji, nil).to_s
            connections = @words[word]
        end
    else
        # When there is no kanji, use the reading as the kanji
        word = Word.create(reading, nil).to_s
        connections = @words[word]
    end

    return SearchResults.new(word, connections, @lines).getSentences
end