5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'lib/proiel/cli/converters/proielxml.rb', line 5
def process(tb, options)
builder = Builder::XmlMarkup.new(target: STDOUT, indent: 2)
builder.instruct! :xml, version: '1.0', encoding: 'UTF-8'
builder.proiel('export-time' => DateTime.now.xmlschema, 'schema-version' => '2.1') do
builder.annotation do
builder.relations do
tb.annotation_schema.relation_tags.each do |tag, value|
attrs = { tag: tag }
attrs.merge!(grab_features(value, %i(summary primary secondary)))
builder.value(attrs)
end
end
builder.tag! 'parts-of-speech' do
tb.annotation_schema.part_of_speech_tags.each do |tag, value|
attrs = { tag: tag }
attrs.merge!(grab_features(value, %i(summary)))
builder.value(attrs)
end
end
builder.morphology do
tb.annotation_schema.morphology_tags.each do |cat_tag, cat_values|
builder.field(tag: cat_tag) do
cat_values.each do |tag, value|
attrs = { tag: tag }
attrs.merge!(grab_features(value, %i(summary)))
builder.value(attrs)
end
end
end
end
builder.tag! 'information-statuses' do
tb.annotation_schema.information_status_tags.each do |tag, value|
attrs = { tag: tag }
attrs.merge!(grab_features(value, %i(summary)))
builder.value(attrs)
end
end
end
tb.sources.each do |source|
mandatory_features = %i(id language)
optional_features = []
optional_features += %i(alignment_id) unless options['remove-alignments']
builder.source(grab_features(source, mandatory_features, optional_features)) do
PROIEL::Treebank::METADATA_ELEMENTS.each do |field|
builder.tag!(field.to_s.gsub('_', '-'), source.send(field)) if source.send(field)
end
source.divs.each do |div|
if include_div?(div, options)
mandatory_features = %i()
optional_features = []
optional_features += %i(presentation_before presentation_after)
optional_features += %i(alignment_id) unless options['remove-alignments']
builder.div(grab_features(div, mandatory_features, optional_features)) do
builder.title div.title if div.title
div.sentences.each do |sentence|
if include_sentence?(sentence, options)
mandatory_features = %i(id)
optional_features = [] optional_features += %i(status) unless options['remove-status']
optional_features += %i(presentation_before presentation_after)
optional_features += %i(alignment_id) unless options['remove-alignments']
optional_features += %i(annotated_at) unless options['remove-annotator']
optional_features += %i(reviewed_at) unless options['remove-reviewer']
optional_features += %i(annotated_by) unless options['remove-annotator']
optional_features += %i(reviewed_by) unless options['remove-reviewer']
builder.sentence(grab_features(sentence, mandatory_features, optional_features)) do
sentence.tokens.each do |token|
next if token.empty_token_sort == 'P' and options['remove-information-structure']
next if token.empty_token_sort == 'C' and options['remove-syntax']
next if token.empty_token_sort == 'V' and options['remove-syntax']
mandatory_features = %i(id)
optional_features = %i(citation_part)
optional_features += %i(lemma part_of_speech morphology) unless options['remove-morphology']
optional_features += %i(head_id relation) unless options['remove-syntax']
optional_features += %i(antecedent_id information_status contrast_group) unless options['remove-information-structure']
unless token.is_empty?
mandatory_features << :form
optional_features += %i(presentation_before presentation_after foreign_ids)
else
mandatory_features << :empty_token_sort
end
optional_features += %i(alignment_id) unless options['remove-alignments']
attrs = grab_features(token, mandatory_features, optional_features)
unless token.slashes.empty? or options['remove-syntax'] builder.token(attrs) do
token.slashes.each do |relation, target_id|
builder.slash(:"target-id" => target_id, relation: relation)
end
end
else
unless options['remove-syntax'] and token.is_empty?
builder.token(attrs)
end
end
end
end
end
end
end
end
end
end
end
end
end
|