Class: PROIEL::Valency::Lexicon
- Inherits:
-
Object
- Object
- PROIEL::Valency::Lexicon
- Defined in:
- lib/proiel/valency/lexicon.rb
Instance Attribute Summary collapse
-
#frames ⇒ Object
readonly
Returns the value of attribute frames.
Instance Method Summary collapse
-
#add_source!(source) ⇒ Object
Generates a valency lexicon from the provided sources.
-
#initialize ⇒ Lexicon
constructor
A new instance of Lexicon.
- #lookup(lemma, part_of_speech) ⇒ Object
Constructor Details
#initialize ⇒ Lexicon
Returns a new instance of Lexicon.
6 7 8 9 10 |
# File 'lib/proiel/valency/lexicon.rb', line 6 def initialize @source_ids = Set.new @source_languages = Set.new @frames = {} end |
Instance Attribute Details
#frames ⇒ Object (readonly)
Returns the value of attribute frames.
4 5 6 |
# File 'lib/proiel/valency/lexicon.rb', line 4 def frames @frames end |
Instance Method Details
#add_source!(source) ⇒ Object
Generates a valency lexicon from the provided sources. In practice the sources should be in the same language but this is not enforced. This makes it possible to generate a lexicon from sources in closely related languages or dialects.
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/proiel/valency/lexicon.rb', line 16 def add_source!(source) @source_ids << source.id @source_languages << source.language source.sentences.each do |sentence| tokens = find_verbal_nodes(sentence) tokens.each do |token| frame = PROIEL::Valency::Arguments.get_argument_frame(token) partition = if token.dependents.any? { |d| d.relation == 'aux' and d.part_of_speech == 'Pk' } :r else :a end @frames[token.lemma] ||= {} @frames[token.lemma][token.part_of_speech] ||= {} @frames[token.lemma][token.part_of_speech][frame] ||= { a: [], r: [] } @frames[token.lemma][token.part_of_speech][frame][partition] << token.id end end end |
#lookup(lemma, part_of_speech) ⇒ Object
40 41 42 43 44 45 46 |
# File 'lib/proiel/valency/lexicon.rb', line 40 def lookup(lemma, part_of_speech) frames = @frames[lemma][part_of_speech].map do |arguments, token_ids| { arguments: arguments, tokens: token_ids } end PROIEL::Valency::Obliqueness.sort_frames(frames) end |