Module: Myasorubka::MSD::Russian

Defined in:
lib/myasorubka/msd/russian.rb

Overview

The Russian MULTEXT-East specifications were developed in the scope of an effort to produce a publicly available tagged corpus of Russian; this corpus and accompanying resources are available from corpus.leeds.ac.uk/mocky/. The morphosyntactic specifications and corpus are documented in: Serge Sharoff, Mikhail Kopotev, Tomaž Erjavec, Anna Feldman, Dagmar Divjak. Designing and evaluating Russian tagsets. In Proc. LREC 2008, Marrakech, May, 2008.

nl.ijs.si/ME/V4/msd/html/msd-ru.html

This specification was translated into the Ruby language by [Dmitry Ustalov](eveel.ru).

Constant Summary collapse

NOUN =

Russian Noun.

{
  code: 'N',
  attrs: [
    [ :type, {
      common: 'c',
      proper: 'p'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n',
      common: 'c'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      vocative: 'v',
      locative: 'l',
      instrumental: 'i'
    } ],
    [ :animate, {
      no: 'n',
      yes: 'y'
    } ],
    [ :case2, {
      partitive: 'p',
      locative: 'l'
    } ]
  ]
}
VERB =

Russian Verb.

{
  code: 'V',
  attrs: [
    [ :type, {
      main: 'm',
      auxiliary: 'a'
    } ],
    [ :vform, {
      indicative: 'i',
      imperative: 'm',
      conditional: 'c',
      infinitive: 'n',
      participle: 'p',
      gerund: 'g'
    } ],
    [ :tense, {
      present: 'p',
      future: 'f',
      past: 's'
    } ],
    [ :person, {
      first: '1',
      second: '2',
      third: '3'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n'
    } ],
    [ :voice, {
      active: 'a',
      passive: 'p',
      medial: 'm'
    } ],
    [ :definiteness, {
      short_art: 's',
      full_art: 'f'
    } ],
    [ :aspect, {
      progressive: 'p',
      perfective: 'e',
      biaspectual: 'b'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      locative: 'l',
      instrumental: 'i'
    } ]
  ]
}
ADJECTIVE =

Russian Adjective.

{
  code: 'A',
  attrs: [
    [ :type, {
      qualificative: 'f',
      possessive: 's'
    } ],
    [ :degree, {
      positive: 'p',
      comparative: 'c',
      superlative: 's'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      locative: 'l',
      instrumental: 'i'
    } ],
    [ :definiteness, {
      short_art: 's',
      full_art: 'f'
    } ]
  ]
}
PRONOUN =

Russian Pronoun.

{
  code: 'P',
  attrs: [
    [ :type, {
      personal: 'p',
      demonstrative: 'd',
      indefinite: 'i',
      possessive: 's',
      interrogative: 'q',
      relative: 'r',
      reflexive: 'x',
      negative: 'z',
      nonspecific: 'n'
    } ],
    [ :person, {
      first: '1',
      second: '2',
      third: '3'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      vocative: 'v',
      locative: 'l',
      instrumental: 'i'
    } ],
    [ :syntactic_type, {
      nominal: 'n',
      adjectival: 'a',
      adverbial: 'r'
    } ],
    [ :animate, {
      no: 'n',
      yes: 'y'
    } ]
  ]
}
ADVERB =

Russian Adverb.

{
  code: 'R',
  attrs: [
    [ :degree, {
      positive: 'p',
      comparative: 'c',
      superlative: 's'
    } ]
  ]
}
ADPOSITION =

Russian Adposition.

{
  code: 'S',
  attrs: [
    [ :type, {
      preposition: 'p'
    } ],
    [ :formation, {
      simple: 's',
      compound: 'c'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      locative: 'l',
      instrumental: 'i'
    } ]
  ]
}
CONJUNCTION =

Russian Conjunction.

{
  code: 'C',
  attrs: [
    [ :type, {
      coordinating: 'c',
      subordinating: 's'
    } ],
    [ :formation, {
      simple: 's',
      compound: 'c'
    } ],
    [ :coord_type, {
      sentence: 'p',
      words: 'w'
    } ],
    [ :sub_type, {
      negative: 'z',
      positive: 'p'
    } ],
  ]
}
NUMERAL =

Russian Numeral.

{
  code: 'M',
  attrs: [
    [ :type, {
      cardinal: 'c',
      ordinal: 'o',
      multiple: 'm',
      collect: 'l'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      locative: 'l',
      instrumental: 'i'
    } ],
    [ :form, {
      digit: 'd',
      roman: 'r',
      letter: 'l'
    } ],
    [ :blank, {} ],
    [ :blank, {} ],
    [ :blank, {} ],
    [ :animate, {
      no: 'n',
      yes: 'y'
    } ]
  ]
}
PARTICLE =

Russian Particle.

{
  code: 'Q',
  attrs: [
    [ :formation, {
      simple: 's',
      compound: 'c'
    } ]
  ]
}
INTERJECTION =

Russian Interjection.

{
  code: 'I',
  attrs: [
    [ :formation, {
      simple: 's',
      compound: 'c'
    } ]
  ]
}
ABBREVIATION =

Russian Abbreviation.

{
  code: 'Y',
  attrs: [
    [ :syntactic_type, {
      nominal: 'n',
      adverbial: 'r'
    } ],
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      locative: 'l',
      instrumental: 'i'
    } ]
  ]
}
RESIDUAL =

Russian Residual.

{
  code: 'X',
  attrs: []
}
CRUTCH =

Russian Crutch.

Some AOT definitions are written for meta ‘*` part of speech, so we have to implement it.

{
  code: '*',
  attrs: [
    [ :gender, {
      masculine: 'm',
      feminine: 'f',
      neuter: 'n',
      common: 'c'
    } ],
    [ :animate, {
      no: 'n',
      yes: 'y'
    } ],
    [ :number, {
      singular: 's',
      plural: 'p'
    } ],
    [ :case, {
      nominative: 'n',
      genitive: 'g',
      dative: 'd',
      accusative: 'a',
      vocative: 'v',
      locative: 'l',
      instrumental: 'i'
    } ],
    [ :case2, {
      partitive: 'p',
      locative: 'l'
    } ],
    [ :aspect, {
      progressive: 'p',
      perfective: 'e',
      biaspectual: 'b'
    } ],
    [ :voice, {
      active: 'a',
      passive: 'p',
      medial: 'm'
    } ],
    [ :tense, {
      present: 'p',
      future: 'f',
      past: 's'
    } ],
    [ :person, {
      first: '1',
      second: '2',
      third: '3'
    } ],
    [ :definiteness, {
      short_art: 's',
      full_art: 'f'
    } ],
    [ :degree, {
      positive: 'p',
      comparative: 'c',
      superlative: 's'
    } ]
  ]
}
CATEGORIES =

Actual part-of-speech mapping.

{
  noun: NOUN,
  verb: VERB,
  adjective: ADJECTIVE,
  pronoun: PRONOUN,
  adverb: ADVERB,
  adposition: ADPOSITION,
  conjunction: CONJUNCTION,
  numeral: NUMERAL,
  particle: PARTICLE,
  interjection: INTERJECTION,
  abbreviation: ABBREVIATION,
  residual: RESIDUAL,
  crutch: CRUTCH
}