Class: PROIEL::DictionaryBuilder
- Inherits:
-
Object
- Object
- PROIEL::DictionaryBuilder
- Defined in:
- lib/proiel/dictionary/builder.rb
Constant Summary collapse
- CURRENT_SCHEMA_VERSION =
'3.0'.freeze
Instance Attribute Summary collapse
-
#language ⇒ Object
readonly
Returns the value of attribute language.
-
#lemmata ⇒ Object
readonly
Returns the value of attribute lemmata.
-
#license ⇒ Object
readonly
Returns the value of attribute license.
-
#sources ⇒ Object
readonly
Returns the value of attribute sources.
Instance Method Summary collapse
- #add_external_glosses!(filename, languages = %i(eng))) ⇒ Object
- #add_source!(source) ⇒ Object
-
#initialize ⇒ DictionaryBuilder
constructor
A new instance of DictionaryBuilder.
- #to_xml(io) ⇒ Object
Constructor Details
#initialize ⇒ DictionaryBuilder
Returns a new instance of DictionaryBuilder.
15 16 17 18 19 20 21 |
# File 'lib/proiel/dictionary/builder.rb', line 15 def initialize @language = nil @license = nil @sources = [] @lemmata = {} @valency = PROIEL::Valency::Lexicon.new end |
Instance Attribute Details
#language ⇒ Object (readonly)
Returns the value of attribute language.
11 12 13 |
# File 'lib/proiel/dictionary/builder.rb', line 11 def language @language end |
#lemmata ⇒ Object (readonly)
Returns the value of attribute lemmata.
13 14 15 |
# File 'lib/proiel/dictionary/builder.rb', line 13 def lemmata @lemmata end |
#license ⇒ Object (readonly)
Returns the value of attribute license.
10 11 12 |
# File 'lib/proiel/dictionary/builder.rb', line 10 def license @license end |
#sources ⇒ Object (readonly)
Returns the value of attribute sources.
12 13 14 |
# File 'lib/proiel/dictionary/builder.rb', line 12 def sources @sources end |
Instance Method Details
#add_external_glosses!(filename, languages = %i(eng))) ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/proiel/dictionary/builder.rb', line 60 def add_external_glosses!(filename, languages = %i(eng)) raise ArgumentError, 'filename expected' unless filename.is_a?(String) raise ArgumentError, 'file not found' unless File.exists?(filename) CSV.foreach(filename, headers: true, encoding: 'utf-8', col_sep: "\t", header_converters: :symbol, quote_char: "\b") do |row| h = row.to_h data = languages.map { |l| [l, h[l]] }.to_h lemma = initialize_lemma!(row[:lemma], row[:part_of_speech]) lemma[:glosses] ||= {} lemma[:glosses].merge!(data) end end |
#add_source!(source) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/proiel/dictionary/builder.rb', line 23 def add_source!(source) raise ArgumentError, 'source expected' unless source.is_a?(PROIEL::Source) raise ArgumentError, 'incompatible language' unless @language.nil? or @language == source.language raise ArgumentError, 'incompatible license' unless @license.nil? or @license == source.license @language ||= source.language @license ||= source.license @sources << source source.tokens.each { |token| index_token!(token) } index_homographs! end |
#to_xml(io) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/proiel/dictionary/builder.rb', line 39 def to_xml(io) builder = ::Builder::XmlMarkup.new(target: io, indent: 2) builder.instruct! :xml, version: '1.0', encoding: 'UTF-8' builder.proiel('export-time': DateTime.now.xmlschema, 'schema-version': CURRENT_SCHEMA_VERSION) do builder.dictionary(language: @language) do builder.sources do @sources.each do |source| builder.source(idref: source.id, license: source.license) end end builder.lemmata do @lemmata.sort_by { |lemma, _| lemma.downcase }.each do |form_and_pos, data| form, _ = form_and_pos.split(',') lemma_to_xml(builder, form, data) end end end end end |