Class: PROIEL::Token

Inherits:

TreebankObject

Object
TreebankObject
PROIEL::Token

show all

Extended by:: Memoist

Defined in:: lib/proiel/token.rb

Overview

A token object in a treebank.

Instance Attribute Summary collapse

#alignment_id ⇒ nil, Integer readonly

ID of the sentence that this sentence is aligned to.
#antecedent_id ⇒ nil, Fixnum readonly

ID of antecedent token.
#citation_part ⇒ nil, String readonly

Citation part.
#contrast_group ⇒ nil, String readonly

Contrast group tag.
#empty_token_sort ⇒ nil, String readonly

Token empty token sort tag.
#foreign_ids ⇒ nil, String readonly

Free-form foreign IDs.
#form ⇒ nil, String readonly

Token form.
#head_id ⇒ nil, Fixnum readonly

ID of head token.
#id ⇒ Fixnum readonly

ID of the sentence.
#information_status ⇒ nil, String readonly

Information status tag.
#lemma ⇒ nil, String readonly

Token lemma.
#morphology ⇒ nil, String readonly

Token morphological tag.
#part_of_speech ⇒ nil, String (also: #pos) readonly

Token part of speech tag.
#presentation_after ⇒ nil, String readonly

Presentation material after form.
#presentation_before ⇒ nil, String readonly

Presentation material before form.
#relation ⇒ nil, String readonly

Token relation tag.
#sentence ⇒ Sentence

Parent sentence object.
#slashes ⇒ Array<Array<String,Fixnum>> readonly

Secondary edges as an array of pairs of relation tag and target token ID.

Instance Method Summary collapse

#alignment(aligned_source) ⇒ Token, NilClass

Returns the aligned token if any.
#ancestors ⇒ Array<Token>

Finds ancestors of this token in the dependency graph.
#citation ⇒ nil, String

A complete citation for the token.
#common_ancestors(other_token, inclusive: false) ⇒ Array<Token>

Finds the common ancestors that this token and another token share in the dependency graph.
#dependents ⇒ Array<Token> (also: #children)

Finds dependent of this token in the dependency graph.
#descendents ⇒ Array<Token> (also: #descendants)

Finds descendents of this token in the dependency graph.
#div ⇒ Div

Parent div object.
#first_common_ancestor(other_token, inclusive: false) ⇒ nil, Token

Finds the first common ancestor that this token and another token share in the dependency graph.
#has_citation? ⇒ true, false

Tests if the token has a citation.
#has_content? ⇒ true, false

Tests if the token has content.
#head ⇒ Token (also: #parent)

Finds the head of this token.
#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ Token constructor

Creates a new token object.
#is_empty? ⇒ true, false

Tests if the token is empty.
#is_root? ⇒ true, false

Checks if the token is the root of its dependency graph.
#language ⇒ String

Language of the token as an ISO 639-3 language tag.
#morphology_hash ⇒ Hash<Symbol,String>

Token morphology tag as a hash.
#part_of_speech_hash ⇒ Hash<Symbol,String> (also: #pos_hash)

Token part of speech tag as a hash.
#part_of_speech_with_nulls ⇒ String (also: #pos_with_nulls)

Returns the part of speech tag if set, but also provides a suitable part of speech tag for empty elements.
#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the token with any presentation data.
#pro? ⇒ true, false

Checks if the token is a PRO token.
#source ⇒ Source

Parent source object.
#treebank ⇒ Treebank

Parent treebank object.

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ `Token`

Creates a new token object.

Raises:

(ArgumentError)

# File 'lib/proiel/token.rb', line 70

def initialize(parent, id, head_id, form, lemma, part_of_speech,
               morphology, relation, empty_token_sort, citation_part,
               presentation_before, presentation_after, antecedent_id,
               information_status, contrast_group, foreign_ids, slashes,
               alignment_id)
  @sentence = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'integer or nil expected' unless head_id.nil? or head_id.is_a?(Integer)
  @head_id = head_id

  raise ArgumentError, 'string or nil expected' unless form.nil? or form.is_a?(String)
  @form = form.freeze

  raise ArgumentError, 'string or nil expected' unless lemma.nil? or lemma.is_a?(String)
  @lemma = lemma.freeze

  raise ArgumentError, 'string or nil expected' unless part_of_speech.nil? or part_of_speech.is_a?(String)
  @part_of_speech = part_of_speech.freeze

  raise ArgumentError, 'string or nil expected' unless morphology.nil? or morphology.is_a?(String)
  @morphology = morphology.freeze

  raise ArgumentError, 'string or nil expected' unless relation.nil? or relation.is_a?(String)
  @relation = relation.freeze

  raise ArgumentError, 'string or nil expected' unless empty_token_sort.nil? or empty_token_sort.is_a?(String)
  @empty_token_sort = empty_token_sort.freeze

  raise ArgumentError, 'string or nil expected' unless citation_part.nil? or citation_part.is_a?(String)
  @citation_part = citation_part.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless antecedent_id.nil? or antecedent_id.is_a?(Integer)
  @antecedent_id = antecedent_id

  raise ArgumentError, 'string or nil expected' unless information_status.nil? or information_status.is_a?(String)
  @information_status = information_status.freeze

  raise ArgumentError, 'string or nil expected' unless contrast_group.nil? or contrast_group.is_a?(String)
  @contrast_group = contrast_group.freeze

  raise ArgumentError, 'string or nil expected' unless foreign_ids.nil? or foreign_ids.is_a?(String)
  @foreign_ids = foreign_ids.freeze

  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id
end

Instance Attribute Details

#alignment_id ⇒ `nil`, `Integer` (readonly)

Returns ID of the sentence that this sentence is aligned to.

Returns:

(nil, Integer) —

ID of the sentence that this sentence is aligned to



67
68
69

# File 'lib/proiel/token.rb', line 67

def alignment_id
  @alignment_id
end

#antecedent_id ⇒ `nil`, `Fixnum` (readonly)

Returns ID of antecedent token.

Returns:

(nil, Fixnum) —

ID of antecedent token



52
53
54

# File 'lib/proiel/token.rb', line 52

def antecedent_id
  @antecedent_id
end

#citation_part ⇒ `nil`, `String` (readonly)

Returns citation part.

Returns:

(nil, String) —

citation part



43
44
45

# File 'lib/proiel/token.rb', line 43

def citation_part
  @citation_part
end

#contrast_group ⇒ `nil`, `String` (readonly)

Returns contrast group tag.

Returns:

(nil, String) —

contrast group tag



58
59
60

# File 'lib/proiel/token.rb', line 58

def contrast_group
  @contrast_group
end

#empty_token_sort ⇒ `nil`, `String` (readonly)

Returns token empty token sort tag.

Returns:

(nil, String) —

token empty token sort tag



40
41
42

# File 'lib/proiel/token.rb', line 40

def empty_token_sort
  @empty_token_sort
end

#foreign_ids ⇒ `nil`, `String` (readonly)

Returns free-form foreign IDs.

Returns:

(nil, String) —

free-form foreign IDs



61
62
63

# File 'lib/proiel/token.rb', line 61

def foreign_ids
  @foreign_ids
end

#form ⇒ `nil`, `String` (readonly)

Returns token form.

Returns:

(nil, String) —

token form



22
23
24

# File 'lib/proiel/token.rb', line 22

def form
  @form
end

#head_id ⇒ `nil`, `Fixnum` (readonly)

Returns ID of head token.

Returns:

(nil, Fixnum) —

ID of head token



19
20
21

# File 'lib/proiel/token.rb', line 19

def head_id
  @head_id
end

#id ⇒ `Fixnum` (readonly)

Returns ID of the sentence.

Returns:

(Fixnum) —

ID of the sentence



13
14
15

# File 'lib/proiel/token.rb', line 13

def id
  @id
end

#information_status ⇒ `nil`, `String` (readonly)

Returns information status tag.

Returns:

(nil, String) —

information status tag



55
56
57

# File 'lib/proiel/token.rb', line 55

def information_status
  @information_status
end

#lemma ⇒ `nil`, `String` (readonly)

Returns token lemma.

Returns:

(nil, String) —

token lemma



25
26
27

# File 'lib/proiel/token.rb', line 25

def lemma
  @lemma
end

#morphology ⇒ `nil`, `String` (readonly)

Returns token morphological tag.

Returns:

(nil, String) —

token morphological tag



34
35
36

# File 'lib/proiel/token.rb', line 34

def morphology
  @morphology
end

#part_of_speech ⇒ `nil`, `String` (readonly) Also known as: pos

Returns token part of speech tag.

Returns:

(nil, String) —

token part of speech tag



28
29
30

# File 'lib/proiel/token.rb', line 28

def part_of_speech
  @part_of_speech
end

#presentation_after ⇒ `nil`, `String` (readonly)

Returns presentation material after form.

Returns:

(nil, String) —

presentation material after form



49
50
51

# File 'lib/proiel/token.rb', line 49

def presentation_after
  @presentation_after
end

#presentation_before ⇒ `nil`, `String` (readonly)

Returns presentation material before form.

Returns:

(nil, String) —

presentation material before form



46
47
48

# File 'lib/proiel/token.rb', line 46

def presentation_before
  @presentation_before
end

#relation ⇒ `nil`, `String` (readonly)

Returns token relation tag.

Returns:

(nil, String) —

token relation tag



37
38
39

# File 'lib/proiel/token.rb', line 37

def relation
  @relation
end

#sentence ⇒ `Sentence`

Returns parent sentence object.

Returns:

(Sentence) —

parent sentence object



16
17
18

# File 'lib/proiel/token.rb', line 16

def sentence
  @sentence
end

#slashes ⇒ `Array<Array<String,Fixnum>>` (readonly)

Returns secondary edges as an array of pairs of relation tag and target token ID.

Returns:

(Array<Array<String,Fixnum>>) —

secondary edges as an array of pairs of relation tag and target token ID



64
65
66

# File 'lib/proiel/token.rb', line 64

def slashes
  @slashes
end

Instance Method Details

#alignment(aligned_source) ⇒ `Token`, `NilClass`

Returns the aligned token if any.

Returns:

(Token, NilClass) —

aligned token



400
401
402

# File 'lib/proiel/token.rb', line 400

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_token(alignment_id) : nil
end

#ancestors ⇒ `Array<Token>`

Finds ancestors of this token in the dependency graph.

The ancestors are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned ancestors is as follows: The first ancestor is the head of this token, the next ancestor is the head of the previous token, and so on.

Returns:

(Array<Token>) —

ancestors

# File 'lib/proiel/token.rb', line 266

def ancestors
  if is_root?
    []
  else
    [head] + head.ancestors
  end
end

#citation ⇒ `nil`, `String`

Returns a complete citation for the token.

Returns:

(nil, String) —

a complete citation for the token

# File 'lib/proiel/token.rb', line 152

def citation
  if citation_part
    [source.citation_part, citation_part].compact.join(' ')
  else
    nil
  end
end

#common_ancestors(other_token, inclusive: false) ⇒ `Array<Token>`

Finds the common ancestors that this token and another token share in the dependency graph.

If ‘inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

Ancestors are returned in the same order as #ancestors.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.common_ancestors(y, inclusive: false) # => [z, u]
x.common_ancestors(w, inclusive: false) # => [z, u]
x.common_ancestors(x, inclusive: false) # => [w, z, u]

x.common_ancestors(y, inclusive: true)  # => [z, u]
x.common_ancestors(w, inclusive: true)  # => [w, z, u]
x.common_ancestors(x, inclusive: true)  # => [x, w, z, u]

Returns:

(Array<Token>) —

common ancestors

See Also:

#first_common_ancestor
Token#first_common_ancestor_path

# File 'lib/proiel/token.rb', line 358

def common_ancestors(other_token, inclusive: false)
  if inclusive
    x, y = [self] + ancestors, [other_token] + other_token.ancestors
  else
    x, y = ancestors, other_token.ancestors
  end

  x & y
end

#dependents ⇒ `Array<Token>` Also known as: children

Finds dependent of this token in the dependency graph.

The dependents are the children of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned dependents is indeterminate.

Returns:

(Array<Token>) —

dependent



248
249
250

# File 'lib/proiel/token.rb', line 248

def dependents
  @sentence.tokens.select { |t| t.head_id == @id }
end

#descendents ⇒ `Array<Token>` Also known as: descendants

Finds descendents of this token in the dependency graph.

The descendents are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned descendents is as indeterminate.

Returns:

(Array<Token>) —

descendents



284
285
286

# File 'lib/proiel/token.rb', line 284

def descendents
  dependents.map { |dependent| [dependent] + dependent.descendents }.flatten
end

#div ⇒ `Div`

Returns parent div object.

Returns:

(Div) —

parent div object



130
131
132

# File 'lib/proiel/token.rb', line 130

def div
  @sentence.div
end

#first_common_ancestor(other_token, inclusive: false) ⇒ `nil`, `Token`

Finds the first common ancestor that this token and another token share in the dependency graph.

If ‘inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.first_common_ancestor(y, inclusive: false) # => z
x.first_common_ancestor(w, inclusive: false) # => z
x.first_common_ancestor(x, inclusive: false) # => w

x.first_common_ancestor(y, inclusive: true)  # => z
x.first_common_ancestor(w, inclusive: true)  # => w
x.first_common_ancestor(x, inclusive: true)  # => x

Returns:

(nil, Token) —

first common ancestor

See Also:

#common_ancestors
Token#first_common_ancestor_path



393
394
395

# File 'lib/proiel/token.rb', line 393

def first_common_ancestor(other_token, inclusive: false)
  common_ancestors(other_token, inclusive: inclusive).first
end

#has_citation? ⇒ `true`, `false`

Tests if the token has a citation.

A token has a citation if ‘citation_part` is not `nil`.

Returns:

(true, false)



320
321
322

# File 'lib/proiel/token.rb', line 320

def has_citation?
  !citation_part.nil?
end

#has_content? ⇒ `true`, `false`

Tests if the token has content.

A token has content if it has a form.

Returns:

(true, false)

See Also:

#is_empty?



311
312
313

# File 'lib/proiel/token.rb', line 311

def has_content?
  empty_token_sort.nil?
end

#head ⇒ `Token` Also known as: parent

Finds the head of this token.

The head is the parent of the this token in the tree that has tokens as nodes and primary relations as edges.

Returns:

(Token) —

head

# File 'lib/proiel/token.rb', line 228

def head
  if is_root?
    nil
  else
    treebank.find_token(head_id)
  end
end

#is_empty? ⇒ `true`, `false`

Tests if the token is empty.

A token is empty if it does not have a form. If the token is empty, #empty_token_sort explains its function.

Returns:

(true, false)

See Also:

#has_content?



300
301
302

# File 'lib/proiel/token.rb', line 300

def is_empty?
  !empty_token_sort.nil?
end

#is_root? ⇒ `true`, `false`

Checks if the token is the root of its dependency graph.

If the token belongs to a sentence that lacks dependency annotation, all tokens are treated as roots. If a sentence has partial or complete dependency annotation there may still be multiple root tokens.

Returns:

(true, false)



218
219
220

# File 'lib/proiel/token.rb', line 218

def is_root?
  head_id.nil?
end

#language ⇒ `String`

Returns language of the token as an ISO 639-3 language tag.

Returns:

(String) —

language of the token as an ISO 639-3 language tag



145
146
147

# File 'lib/proiel/token.rb', line 145

def language
  source.language
end

#morphology_hash ⇒ `Hash<Symbol,String>`

Returns token morphology tag as a hash.

Returns:

(Hash<Symbol,String>) —

token morphology tag as a hash

# File 'lib/proiel/token.rb', line 201

def morphology_hash
  if morphology
    MORPHOLOGY_POSITIONAL_TAG_SEQUENCE.zip(morphology.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_hash ⇒ `Hash<Symbol,String>` Also known as: pos_hash

Returns token part of speech tag as a hash.

Returns:

(Hash<Symbol,String>) —

token part of speech tag as a hash

# File 'lib/proiel/token.rb', line 178

def part_of_speech_hash
  if part_of_speech
    POS_POSITIONAL_TAG_SEQUENCE.zip(part_of_speech.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_with_nulls ⇒ `String` Also known as: pos_with_nulls

Returns the part of speech tag if set, but also provides a suitable part of speech tag for empty elements.

Returns:

(String) —

part of speech tag



194
195
196

# File 'lib/proiel/token.rb', line 194

def part_of_speech_with_nulls
  part_of_speech || NULL_PARTS_OF_SPEECH[empty_token_sort]
end

#printable_form(custom_token_formatter: nil) ⇒ `String`

Returns the printable form of the token with any presentation data.

which is passed the token as its sole argument

Parameters:

custom_token_formatter (Lambda) (defaults to: nil) —

formatting function for tokens

Returns:

(String) —

the printable form of the token

# File 'lib/proiel/token.rb', line 166

def printable_form(custom_token_formatter: nil)
  printable_form =
    if custom_token_formatter
      custom_token_formatter.call(self)
    else
      form
    end

  [presentation_before, printable_form, presentation_after].compact.join
end

#pro? ⇒ `true`, `false`

Checks if the token is a PRO token.

Returns:

(true, false)



327
328
329

# File 'lib/proiel/token.rb', line 327

def pro?
  empty_token_sort == 'P'
end

#source ⇒ `Source`

Returns parent source object.

Returns:

(Source) —

parent source object



135
136
137

# File 'lib/proiel/token.rb', line 135

def source
  @sentence.div.source
end

#treebank ⇒ `Treebank`

Returns parent treebank object.

Returns:

(Treebank) —

parent treebank object



140
141
142

# File 'lib/proiel/token.rb', line 140

def treebank
  @sentence.div.source.treebank
end

Class: PROIEL::Token

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

Constructor Details

#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ Token

Instance Attribute Details

#alignment_id ⇒ nil, Integer (readonly)

#antecedent_id ⇒ nil, Fixnum (readonly)

#citation_part ⇒ nil, String (readonly)

#contrast_group ⇒ nil, String (readonly)

#empty_token_sort ⇒ nil, String (readonly)

#foreign_ids ⇒ nil, String (readonly)

#form ⇒ nil, String (readonly)

#head_id ⇒ nil, Fixnum (readonly)

#id ⇒ Fixnum (readonly)

#information_status ⇒ nil, String (readonly)

#lemma ⇒ nil, String (readonly)

#morphology ⇒ nil, String (readonly)

#part_of_speech ⇒ nil, String (readonly) Also known as: pos

#presentation_after ⇒ nil, String (readonly)

#presentation_before ⇒ nil, String (readonly)

#relation ⇒ nil, String (readonly)

#sentence ⇒ Sentence

#slashes ⇒ Array<Array<String,Fixnum>> (readonly)

Instance Method Details

#alignment(aligned_source) ⇒ Token, NilClass

#ancestors ⇒ Array<Token>

#citation ⇒ nil, String

#common_ancestors(other_token, inclusive: false) ⇒ Array<Token>

Examples:

#dependents ⇒ Array<Token> Also known as: children

#descendents ⇒ Array<Token> Also known as: descendants

#div ⇒ Div

#first_common_ancestor(other_token, inclusive: false) ⇒ nil, Token

Examples:

#has_citation? ⇒ true, false

#has_content? ⇒ true, false

#head ⇒ Token Also known as: parent

#is_empty? ⇒ true, false

#is_root? ⇒ true, false

#language ⇒ String

#morphology_hash ⇒ Hash<Symbol,String>

#part_of_speech_hash ⇒ Hash<Symbol,String> Also known as: pos_hash

#part_of_speech_with_nulls ⇒ String Also known as: pos_with_nulls

#printable_form(custom_token_formatter: nil) ⇒ String

#pro? ⇒ true, false

#source ⇒ Source

#treebank ⇒ Treebank

#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ `Token`

#alignment_id ⇒ `nil`, `Integer` (readonly)

#antecedent_id ⇒ `nil`, `Fixnum` (readonly)

#citation_part ⇒ `nil`, `String` (readonly)

#contrast_group ⇒ `nil`, `String` (readonly)

#empty_token_sort ⇒ `nil`, `String` (readonly)

#foreign_ids ⇒ `nil`, `String` (readonly)

#form ⇒ `nil`, `String` (readonly)

#head_id ⇒ `nil`, `Fixnum` (readonly)

#id ⇒ `Fixnum` (readonly)

#information_status ⇒ `nil`, `String` (readonly)

#lemma ⇒ `nil`, `String` (readonly)

#morphology ⇒ `nil`, `String` (readonly)

#part_of_speech ⇒ `nil`, `String` (readonly) Also known as: pos

#presentation_after ⇒ `nil`, `String` (readonly)

#presentation_before ⇒ `nil`, `String` (readonly)

#relation ⇒ `nil`, `String` (readonly)

#sentence ⇒ `Sentence`

#slashes ⇒ `Array<Array<String,Fixnum>>` (readonly)

#alignment(aligned_source) ⇒ `Token`, `NilClass`

#ancestors ⇒ `Array<Token>`

#citation ⇒ `nil`, `String`

#common_ancestors(other_token, inclusive: false) ⇒ `Array<Token>`

#dependents ⇒ `Array<Token>` Also known as: children

#descendents ⇒ `Array<Token>` Also known as: descendants

#div ⇒ `Div`

#first_common_ancestor(other_token, inclusive: false) ⇒ `nil`, `Token`

#has_citation? ⇒ `true`, `false`

#has_content? ⇒ `true`, `false`

#head ⇒ `Token` Also known as: parent

#is_empty? ⇒ `true`, `false`

#is_root? ⇒ `true`, `false`

#language ⇒ `String`

#morphology_hash ⇒ `Hash<Symbol,String>`

#part_of_speech_hash ⇒ `Hash<Symbol,String>` Also known as: pos_hash

#part_of_speech_with_nulls ⇒ `String` Also known as: pos_with_nulls

#printable_form(custom_token_formatter: nil) ⇒ `String`

#pro? ⇒ `true`, `false`

#source ⇒ `Source`

#treebank ⇒ `Treebank`