Class: Opener::KAF::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/kaf/document.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ Document

Returns a new instance of Document.



12
13
14
# File 'lib/opener/kaf/document.rb', line 12

def initialize xml
  @document = xml
end

Instance Attribute Details

#documentObject (readonly)

Returns the value of attribute document.



7
8
9
# File 'lib/opener/kaf/document.rb', line 7

def document
  @document
end

#lexiconsObject (readonly)

Returns the value of attribute lexicons.



8
9
10
# File 'lib/opener/kaf/document.rb', line 8

def lexicons
  @lexicons
end

#mapObject

Returns the value of attribute map.



10
11
12
# File 'lib/opener/kaf/document.rb', line 10

def map
  @map
end

Class Method Details

.from_xml(xml) ⇒ Object



16
17
18
# File 'lib/opener/kaf/document.rb', line 16

def self.from_xml xml
  new Nokogiri::XML xml
end

Instance Method Details

#add_linguistic_processor(name, version, layer, timestamp: false) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/opener/kaf/document.rb', line 36

def add_linguistic_processor name, version, layer, timestamp: false
  header  = @document.at('kafHeader') || @document.root.add_child('<kafHeader/>').first
  procs   = header.css('linguisticProcessors').find{ |l| l.attr(:layer) == layer }
  procs ||= header.add_child("<linguisticProcessors layer='#{layer}'/>").first
  lp      = procs.add_child('<lp/>')
  lp.attr(
    timestamp: if timestamp then Time.now.iso8601 else '*' end,
    version:   version,
    name:      name,
  )
  lp
end

#add_term(params) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/opener/kaf/document.rb', line 64

def add_term params
  text  = @document.at('terms') || @document.root.add_child('<terms/>').first
  term  = text.add_child("<term/>")
  attrs = {
    tid:        "t#{params.tid}",
    type:       params.type,
    lemma:      params.lemma || params.text,
    text:       params.text,
    pos:        params.pos,
    morphofeat: params.morphofeat,
    head:       params.head,
    xpos:       params.xpos.to_s
  }
  term.attr attrs
  term.first.add_child("<span><target id='w#{params.wid}'/></span>")
end

#add_word_form(params) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/opener/kaf/document.rb', line 49

def add_word_form params
  text = @document.at('text') || @document.root.add_child('<text/>').first
  wf   = text.add_child("<wf>#{params.text}</wf>")
  attrs = {
    wid:    "w#{params.wid}",
    sent:   params.sid,
    para:   params.para,
    offset: params.offset,
    length: params.length,
    head:   params.head,
    xpos:   params.xpos.to_s
  }
  wf.attr attrs
end

#languageObject



20
21
22
# File 'lib/opener/kaf/document.rb', line 20

def language
  @language ||= @document.at_xpath('KAF').attr 'xml:lang'
end

#rawObject



32
33
34
# File 'lib/opener/kaf/document.rb', line 32

def raw
  @document.at('raw').text
end

#termsObject



24
25
26
# File 'lib/opener/kaf/document.rb', line 24

def terms
  @terms ||= collection 'KAF/terms/term', Term
end

#textsObject



28
29
30
# File 'lib/opener/kaf/document.rb', line 28

def texts
  @texts ||= collection 'KAF/texts/wf', Text
end

#to_xmlObject



81
82
83
# File 'lib/opener/kaf/document.rb', line 81

def to_xml
  @document.to_xml indent: 2
end