Class: PROIEL::Converter::CoNLLU::Token
- Inherits:
-
Object
- Object
- PROIEL::Converter::CoNLLU::Token
- Defined in:
- lib/proiel/cli/converters/conll-u.rb
Constant Summary collapse
- MORPHOLOGY_POSITIONAL_TAG_SEQUENCE =
[ :person, :number, :tense, :mood, :voice, :gender, :case, :degree, :strength, :inflection ]
Instance Attribute Summary collapse
-
#citation_part ⇒ Object
readonly
Returns the value of attribute citation_part.
-
#empty_token_sort ⇒ Object
readonly
Returns the value of attribute empty_token_sort.
-
#form ⇒ Object
readonly
Returns the value of attribute form.
-
#head_id ⇒ Object
Returns the value of attribute head_id.
-
#id ⇒ Object
readonly
Returns the value of attribute id.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
-
#lemma ⇒ Object
readonly
Returns the value of attribute lemma.
-
#part_of_speech ⇒ Object
readonly
Returns the value of attribute part_of_speech.
-
#relation ⇒ Object
Returns the value of attribute relation.
-
#upos ⇒ Object
Returns the value of attribute upos.
Instance Method Summary collapse
- #add_slash!(slash) ⇒ Object
-
#adjectival? ⇒ Boolean
returns
true
if the node is an adjective or an ordinal. - #adverb? ⇒ Boolean
- #auxiliary? ⇒ Boolean
- #cardinal? ⇒ Boolean
-
#change_coordinations! ⇒ Object
Changes coordinations recursively from the bottom of the graph.
-
#clausal? ⇒ Boolean
A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause).
- #conj_head ⇒ Object
- #conjunction? ⇒ Boolean
- #coordinated? ⇒ Boolean
-
#copula? ⇒ Boolean
Returns
true
if the node has an xobj dependent and either 1) the lemma is copular or 2) the node is empty and has no pid slash or a pid slash to a node with a copular lemma. - #count_subgraph ⇒ Object
- #dependents ⇒ Object
- #deponent? ⇒ Boolean
- #determiner? ⇒ Boolean
- #distribute_shared_modifiers! ⇒ Object
- #ellipsis? ⇒ Boolean
- #find_appositive_head ⇒ Object
- #find_highest_daughter ⇒ Object
- #find_postag(possible_postags) ⇒ Object
- #find_relation(possible_relations) ⇒ Object
- #find_remnant ⇒ Object
- #foreign? ⇒ Boolean
- #format_features(features) ⇒ Object
- #genitive? ⇒ Boolean
- #has_content? ⇒ Boolean
- #has_preposition? ⇒ Boolean
- #head ⇒ Object
-
#initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) ⇒ Token
constructor
A new instance of Token.
- #interjection? ⇒ Boolean
-
#invert!(new_dependent_relation = nil, new_head_relation = nil) ⇒ Object
Inverts the direction of a dependency relation.
- #is_empty? ⇒ Boolean
- #left_corner ⇒ Object
- #map_morphology(morph) ⇒ Object
- #map_part_of_speech! ⇒ Object
- #map_relation ⇒ Object
- #mediopassive? ⇒ Boolean
- #negation? ⇒ Boolean
- #nominal? ⇒ Boolean
- #nominalized? ⇒ Boolean
- #particle? ⇒ Boolean
- #passive? ⇒ Boolean
- #pid ⇒ Object
- #preposition? ⇒ Boolean
- #process_coordination! ⇒ Object
- #process_copula! ⇒ Object
- #process_ellipsis! ⇒ Object
- #process_preposition! ⇒ Object
-
#process_subjunction! ⇒ Object
attach subjunctions with ‘mark’ under their verbs and promote the verb to take over the subjunction’s relation.
-
#promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') ⇒ Object
promotes a node to its head’s place.
- #pronominal? ⇒ Boolean
- #proper_noun? ⇒ Boolean
- #relabel_graph! ⇒ Object
- #remove_empties! ⇒ Object
- #root? ⇒ Boolean
- #siblings ⇒ Object
- #subgraph_set ⇒ Object
- #subjunction? ⇒ Boolean
- #TAM_particle? ⇒ Boolean
- #to_conll ⇒ Object
- #to_graph(indents = 0) ⇒ Object
- #to_n ⇒ Object
- #to_s ⇒ Object
Constructor Details
#initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) ⇒ Token
Returns a new instance of Token.
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 218 def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) @id = id @head_id = head_id @form = form @lemma = lemma @part_of_speech = part_of_speech @language = language @morphology = morphology @relation = relation @empty_token_sort = empty_token_sort @slashes = slashes @sentence = sentence @features = (morphology ? map_morphology(morphology) : '' ) @citation_part = "ref=" + (citation_part ? citation_part : "").gsub(/\s/, '_') @upos = nil end |
Instance Attribute Details
#citation_part ⇒ Object (readonly)
Returns the value of attribute citation_part.
216 217 218 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 216 def citation_part @citation_part end |
#empty_token_sort ⇒ Object (readonly)
Returns the value of attribute empty_token_sort.
214 215 216 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 214 def empty_token_sort @empty_token_sort end |
#form ⇒ Object (readonly)
Returns the value of attribute form.
215 216 217 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 215 def form @form end |
#head_id ⇒ Object
Returns the value of attribute head_id.
207 208 209 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 207 def head_id @head_id end |
#id ⇒ Object (readonly)
Returns the value of attribute id.
211 212 213 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 211 def id @id end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
213 214 215 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 213 def language @language end |
#lemma ⇒ Object (readonly)
Returns the value of attribute lemma.
212 213 214 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 212 def lemma @lemma end |
#part_of_speech ⇒ Object (readonly)
Returns the value of attribute part_of_speech.
210 211 212 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 210 def part_of_speech @part_of_speech end |
#relation ⇒ Object
Returns the value of attribute relation.
209 210 211 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 209 def relation @relation end |
#upos ⇒ Object
Returns the value of attribute upos.
208 209 210 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 208 def upos @upos end |
Instance Method Details
#add_slash!(slash) ⇒ Object
626 627 628 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 626 def add_slash!(slash) @slashes << slash end |
#adjectival? ⇒ Boolean
returns true
if the node is an adjective or an ordinal
253 254 255 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 253 def adjectival? @part_of_speech == 'A-' or @part_of_speech == 'Mo' end |
#adverb? ⇒ Boolean
261 262 263 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 261 def adverb? @part_of_speech =~ /\AD/ end |
#auxiliary? ⇒ Boolean
295 296 297 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 295 def auxiliary? AUXILIARIES.include?([lemma, part_of_speech, language].join(',')) end |
#cardinal? ⇒ Boolean
265 266 267 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 265 def cardinal? @part_of_speech == 'Ma' end |
#change_coordinations! ⇒ Object
Changes coordinations recursively from the bottom of the graph
602 603 604 605 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 602 def change_coordinations! dependents.each(&:change_coordinations!) process_coordination! if conjunction? end |
#clausal? ⇒ Boolean
A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute constructino without a verb; or if it is the root (e.g. in a nominal clause)
270 271 272 273 274 275 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 270 def clausal? (@part_of_speech == 'V-' and !nominalized?) or dependents.any?(&:copula?) or dependents.any? { |d| ['sub', 'nsubj', 'nsubjpass', 'csubj', 'csubjpass'].include? d.relation } or root? end |
#conj_head ⇒ Object
394 395 396 397 398 399 400 401 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 394 def conj_head raise "Not a conjunct" unless @relation == 'conj' if head.relation == 'conj' head.conj_head else head end end |
#conjunction? ⇒ Boolean
277 278 279 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 277 def conjunction? part_of_speech == 'C-' or @empty_token_sort == 'C' end |
#coordinated? ⇒ Boolean
281 282 283 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 281 def coordinated? head and head.conjunction? and head.relation == @relation end |
#copula? ⇒ Boolean
Returns true
if the node has an xobj dependent and either 1) the lemma is copular or 2) the node is empty and has no pid slash or a pid slash to a node with a copular lemma
288 289 290 291 292 293 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 288 def copula? @relation == 'cop' or (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and dependents.any? { |d| d.relation == 'xobj' } ) end |
#count_subgraph ⇒ Object
382 383 384 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 382 def count_subgraph dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1) end |
#dependents ⇒ Object
452 453 454 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 452 def dependents @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id) end |
#deponent? ⇒ Boolean
323 324 325 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 323 def deponent? DEPONENTS[@language] and DEPONENTS[@language].match(@lemma) end |
#determiner? ⇒ Boolean
299 300 301 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 299 def determiner? DETERMINERS.include? @part_of_speech end |
#distribute_shared_modifiers! ⇒ Object
614 615 616 617 618 619 620 621 622 623 624 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 614 def distribute_shared_modifiers! raise "Can only distribute over a conjunction!" unless conjunction? conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') } first_conjunct = conjuncts.shift raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty? modifiers.each do |m| m.head_id = first_conjunct.id conjuncts.each { |c| c.add_slash! [m.id, m.relation] } end end |
#ellipsis? ⇒ Boolean
303 304 305 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 303 def ellipsis? @empty_token_sort == 'V' end |
#find_appositive_head ⇒ Object
456 457 458 459 460 461 462 463 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 456 def find_appositive_head raise "Not an apposition" unless @relation == 'apos' if head.conjunction? and head.relation == 'apos' head.find_appositive_head else head end end |
#find_highest_daughter ⇒ Object
571 572 573 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 571 def find_highest_daughter dependents.min_by { |d| OBLIQUENESS_HIERARCHY.find_index(d.map_relation[/[^:]*/]) || 1000 } end |
#find_postag(possible_postags) ⇒ Object
465 466 467 468 469 470 471 472 473 474 475 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 465 def find_postag tag, crit, feats = .shift if tag.nil? # raise "Found no postag" elsif crit.call self @upos = tag @features += ((@features.empty? ? '' : '|') + feats) if feats else find_postag end end |
#find_relation(possible_relations) ⇒ Object
477 478 479 480 481 482 483 484 485 486 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 477 def find_relation possible_relations rel, crit = possible_relations.shift if rel.nil? # raise "Found no relation" elsif crit.call self rel else find_relation possible_relations end end |
#find_remnant ⇒ Object
563 564 565 566 567 568 569 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 563 def find_remnant if r = dependents.select { |d| d.relation == 'remnant' }.first r.find_remnant else self end end |
#foreign? ⇒ Boolean
307 308 309 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 307 def foreign? @part_of_speech == 'F-' end |
#format_features(features) ⇒ Object
411 412 413 414 415 416 417 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 411 def format_features(features) if features == '' '_' else features.split("|").sort.join("|") end end |
#genitive? ⇒ Boolean
248 249 250 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 248 def genitive? @morphology =~ /......g.*/ end |
#has_content? ⇒ Boolean
311 312 313 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 311 def has_content? @empty_token_sort.nil? or @empty_token_sort == '' end |
#has_preposition? ⇒ Boolean
581 582 583 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 581 def has_preposition? dependents.any? { |d| d.preposition? and d.relation == "case" } end |
#head ⇒ Object
448 449 450 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 448 def head @sentence.tokens.select { |t| t.id == @head_id }.first end |
#interjection? ⇒ Boolean
315 316 317 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 315 def interjection? @part_of_speech == 'I-' end |
#invert!(new_dependent_relation = nil, new_head_relation = nil) ⇒ Object
Inverts the direction of a dependency relation. By default the labels are also swapped, but new relations can be specified for both the new dependent and the new head.
633 634 635 636 637 638 639 640 641 642 643 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 633 def invert!(new_dependent_relation = nil, new_head_relation = nil) raise "Cannot promote a token under root!" if @head_id == 0 new_dependent_relation ||= @relation new_head_relation ||= head.relation new_head_id = head.head_id head.head_id = @id head.relation = new_dependent_relation @head_id = new_head_id self.relation = new_head_relation end |
#is_empty? ⇒ Boolean
319 320 321 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 319 def is_empty? !has_content? end |
#left_corner ⇒ Object
390 391 392 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 390 def left_corner ([self] + dependents).sort_by(&:id).first end |
#map_morphology(morph) ⇒ Object
240 241 242 243 244 245 246 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 240 def map_morphology morph res = [] for tag in 0..morph.length - 1 res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]] end res.compact.join('|') end |
#map_part_of_speech! ⇒ Object
488 489 490 491 492 493 494 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 488 def map_part_of_speech! dependents.each(&:map_part_of_speech!) = POS_MAP[@part_of_speech] find_postag .dup # ugly, but the ugliness comes from UDEP @upos = 'ADJ' if @upos == 'DET' and @relation != 'det' end |
#map_relation ⇒ Object
503 504 505 506 507 508 509 510 511 512 513 514 515 516 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 503 def map_relation possible_relations = RELATION_MAPPING[@relation] case possible_relations when String possible_relations when Array x = find_relation possible_relations.dup when nil # do nothing: the token has already changed its relation @relation else raise "Unknown value #{possible_relations.inspect} for #{@relation}" end end |
#mediopassive? ⇒ Boolean
327 328 329 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 327 def mediopassive? (!deponent? and @morphology) ? @morphology[4] =~/[mpe]/ : false end |
#negation? ⇒ Boolean
335 336 337 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 335 def negation? NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#nominal? ⇒ Boolean
339 340 341 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 339 def nominal? @part_of_speech =~ /\A[NPM]/ or nominalized? end |
#nominalized? ⇒ Boolean
343 344 345 346 347 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 343 def nominalized? dependents.any? do |d| d.determiner? and ['atr', 'aux', 'det'].include? d.relation end end |
#particle? ⇒ Boolean
353 354 355 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 353 def particle? @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#passive? ⇒ Boolean
331 332 333 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 331 def passive? (!deponent? and @morphology) ? @morphology[4] == 'p' : false end |
#pid ⇒ Object
403 404 405 406 407 408 409 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 403 def pid if pid = @slashes.select { |t, r| r == 'pid' }.first @sentence.tokens.select { |t| pid.first == t.id}.first else nil end end |
#preposition? ⇒ Boolean
361 362 363 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 361 def preposition? @part_of_speech == 'R-' end |
#process_coordination! ⇒ Object
607 608 609 610 611 612 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 607 def process_coordination! raise "Only coordinations can be processed this way!" unless conjunction? return if dependents.reject { |d| d.relation == 'aux' }.empty? distribute_shared_modifiers! dependents.reject { |d| d.relation == 'aux' }.sort_by { |d| d.left_corner.id }.first.promote!("conj", "cc") end |
#process_copula! ⇒ Object
575 576 577 578 579 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 575 def process_copula! predicates = dependents.select { |d| d.relation == 'xobj' } raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1 predicates.first.promote!(nil, 'cop') end |
#process_ellipsis! ⇒ Object
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 538 def process_ellipsis! aux = dependents.select(&:auxiliary?).first if aux aux.promote! return end new_head = find_highest_daughter new_head.promote!('orphan') # dependents.each do |d| # check if there's a partner with the same relation under the overt node. # TODO: this isn't really very convincing when it comes to ADVs # if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self # partner = partner.find_remnant # d.head_id = partner.id # d.relation = 'remnant' # if there's no partner, just attach under the overt node, preserving the relation # else # d.head_id = overt.id # end # end @sentence.remove_token!(self) end |
#process_preposition! ⇒ Object
585 586 587 588 589 590 591 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 585 def process_preposition! raise "Only prepositions can be processed this way!" unless part_of_speech == 'R-' obliques = dependents.select { |d| d.relation == 'obl' } raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1 return if obliques.empty? #shouldn't really happen, but in practice obliques.first.invert!("case") # , "adv") end |
#process_subjunction! ⇒ Object
attach subjunctions with ‘mark’ under their verbs and promote the verb to take over the subjunction’s relation. If the verb is empty, the subjunction stays as head.
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 521 def process_subjunction! # ignore if the subjunction has no dependents or only conj dependents. # NB: this requires that the function is called *after* processing conjunctions return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty? pred = dependents.select { |d| d.relation == 'pred' } raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one? pred = pred.first # promote the subjunction if the verb is empty if pred.is_empty? pred.dependents.each { |d| d.head_id = id } @sentence.remove_token! pred # else demote the subjunction else pred.invert!('mark') end end |
#promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') ⇒ Object
promotes a node to its head’s place. The node takes over its former head’s relation and all dependents. The new relation for these dependents can be specified; if it is not, they will keep their former relation. The former head is made a dependent of the node (with a specified relation) or, if it is an empty node, destroyed.
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 652 def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') raise "Cannot promote a token under root!" if @head_id == 0 new_head_relation = head.relation new_head_id = head.head_id # move all dependents of the former head to the new one siblings.each do |t| t.head_id = @id # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings) t.relation = new_sibling_relation if (new_sibling_relation and t.relation != 'aux') end # remove the former head if it was empty if head.is_empty? @sentence.remove_token!(head) # else make it a dependent of the new head else head.head_id = @id head.relation = new_dependent_relation end @head_id = new_head_id # don't use relation=, as we don't want this relation to be # copied down a tree of conjunctions @relation = new_head_relation end |
#pronominal? ⇒ Boolean
357 358 359 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 357 def pronominal? @part_of_speech =~ /\AP[^st]/ # no evidence that possessives are pronoun/determiner-like end |
#proper_noun? ⇒ Boolean
365 366 367 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 365 def proper_noun? @part_of_speech == 'Ne' end |
#relabel_graph! ⇒ Object
496 497 498 499 500 501 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 496 def relabel_graph! dependents.each(&:relabel_graph!) # TODO: if there are iobjs without an obj among the dependents, one of them should be promoted to obj @relation = map_relation raise "No relation for #{form}" unless @relation end |
#remove_empties! ⇒ Object
593 594 595 596 597 598 599 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 593 def remove_empties! dependents.each(&:remove_empties!) if is_empty? dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' } @sentence.remove_token! self end end |
#root? ⇒ Boolean
369 370 371 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 369 def root? @head_id == 0 end |
#siblings ⇒ Object
444 445 446 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 444 def siblings @sentence.tokens.select { |t| t.head_id == @head_id } - [self] end |
#subgraph_set ⇒ Object
386 387 388 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 386 def subgraph_set [self] + dependents.map(&:subgraph_set).flatten end |
#subjunction? ⇒ Boolean
257 258 259 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 257 def subjunction? @part_of_speech == 'G-' end |
#TAM_particle? ⇒ Boolean
349 350 351 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 349 def TAM_particle? @relation == 'aux' and TAM_PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#to_conll ⇒ Object
419 420 421 422 423 424 425 426 427 428 429 430 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 419 def to_conll [@id, @form, @lemma, @upos, @part_of_speech, format_features(@features), @head_id, (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc '_', # slashes here @citation_part].join("\t") end |
#to_graph(indents = 0) ⇒ Object
440 441 442 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 440 def to_graph(indents = 0) ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n") end |
#to_n ⇒ Object
436 437 438 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 436 def to_n [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-') end |
#to_s ⇒ Object
432 433 434 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 432 def to_s [@id, @form, @head_id, @relation].join("\t") end |