Class: TextAlignment::TextAlignment

Inherits:
Object
  • Object
show all
Defined in:
lib/text_alignment/text_alignment.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(reference_text, options = {}) ⇒ TextAlignment

Initialize with a reference text, against which texts will be aligned

Raises:

  • (ArgumentError)


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/text_alignment/text_alignment.rb', line 15

def initialize(reference_text, options = {})
	raise ArgumentError, "nil text" if reference_text.nil?

	options ||= {}
	@duplicate_texts = options[:duplicate_texts] || false
	@to_ignore_whitespaces = options[:to_ignore_whitespaces] || false
	@to_ignore_text_order = options[:to_ignore_text_order] || false

	@original_reference_text = reference_text
	@rtext_mapping = TextAlignment::CharMapping.new(reference_text, nil, @to_ignore_whitespaces)
	@mapped_reference_text = @rtext_mapping.mapped_text

	@original_text = nil
	@blocks = nil
	@cultivation_map = TextAlignment::CultivationMap.new
end

Instance Attribute Details

#block_alignmentObject (readonly)

Returns the value of attribute block_alignment.



10
11
12
# File 'lib/text_alignment/text_alignment.rb', line 10

def block_alignment
  @block_alignment
end

#lost_annotationsObject (readonly)

Returns the value of attribute lost_annotations.



12
13
14
# File 'lib/text_alignment/text_alignment.rb', line 12

def lost_annotations
  @lost_annotations
end

#similarityObject (readonly)

Returns the value of attribute similarity.



11
12
13
# File 'lib/text_alignment/text_alignment.rb', line 11

def similarity
  @similarity
end

Instance Method Details

#align(text, denotations = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/text_alignment/text_alignment.rb', line 32

def align(text, denotations = nil)
	# To maintain the cultivation map
	update_cultivation_map unless @duplicate_texts

	# In case the input text is the same as the previous one, reuse the previous text mapping
	unless @original_text && @original_text == text
		@original_text = text
		@text_mapping = TextAlignment::CharMapping.new(text, nil, @to_ignore_whitespaces)
	end

	@mapped_text = @text_mapping.mapped_text
	denotations_mapped = @text_mapping.enmap_denotations(denotations)

	## To generate the block_alignment of the input text against the reference text
	@blocks = if r = whole_block_alignment(@mapped_text, @mapped_reference_text, @cultivation_map)
		r
	else
		find_block_alignment(@mapped_text, @mapped_reference_text, denotations_mapped, @cultivation_map)
	end

	@block_alignment = {text: @original_text, reference_text: @original_reference_text, denotations: denotations, blocks: demap_blocks(@blocks)}
end

#alignment_showObject



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# File 'lib/text_alignment/text_alignment.rb', line 141

def alignment_show
	stext = @block_alignment[:text]
	ttext = @block_alignment[:reference_text]

	show = ''
	@block_alignment[:blocks].each do |a|
		show += case a[:alignment]
		when :block
			"===== common (block) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
			stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
		when :term
			"===== common (term) ===== [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
			stext[a[:source][:begin] ... a[:source][:end]] + "\n\n"
		when :empty
			"xxxxx disparate texts (similarity: #{a[:similarity]})\n" +
			"<<<<< string 1 [#{a[:source][:begin]} - #{a[:source][:end]}]\n" +
			stext[a[:source][:begin] ... a[:source][:end]] + "\n\n" +
			">>>>> string 2 " +
			if a[:target]
				"[#{a[:target][:begin]} - #{a[:target][:end]}]\n" +
				ttext[a[:target][:begin] ... a[:target][:end]] + "\n\n"
			else
				"[-]\n\n"
			end
		else
			astr1 = ''
			astr2 = ''

			base = a[:source][:begin]
			astr1 = a[:alignment].sdiff.map do |c|
				case c.action
				when '='
					stext[c.old_position + base]
				when '+'
					'_'
				when '-'
					stext[c.old_position + base]
				when '!'
					stext[c.old_position + base] + '_'
				end
			end.join('')

			base = a[:target][:begin]
			astr2 = a[:alignment].sdiff.map do |c|
				case c.action
				when '='
					ttext[c.new_position + base]
				when '+'
					ttext[c.new_position + base]
				when '-'
					'_'
				when '!'
					'_' + ttext[c.new_position + base]
				end
			end.join('')

			"***** local mismatch [#{a[:source][:begin]} - #{a[:source][:end]}] [#{a[:target][:begin]} - #{a[:target][:end]}] (similarity: #{a[:similarity]})\n" +
			"[#{astr1}]\n" +
			"[#{astr2.gsub("\n", " ")}]\n\n"
		end
	end
	show
end

#transform_a_span(span) ⇒ Object



99
100
101
# File 'lib/text_alignment/text_alignment.rb', line 99

def transform_a_span(span)
	{begin: transform_begin_position(span[:begin]), end: transform_end_position(span[:end])}
end

#transform_begin_position(_begin_position) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/text_alignment/text_alignment.rb', line 55

def transform_begin_position(_begin_position)
	begin_position = @text_mapping.enmap_position(_begin_position)

	i = @blocks.index{|b| b[:source][:end] > begin_position}
	block = @blocks[i]

	b = if block[:alignment] == :block || block[:alignment] == :term
		begin_position + block[:delta]
	elsif block[:alignment] == :empty
		if begin_position == block[:source][:begin]
			block[:target][:begin]
		else
			nil
		end
	else
		r = block[:alignment].transform_begin_position(begin_position - block[:source][:begin])
		r.nil? ? nil : r + block[:target][:begin]
	end

	@rtext_mapping.demap_position(b)
end

#transform_denotations!(denotations) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/text_alignment/text_alignment.rb', line 107

def transform_denotations!(denotations)
	return nil if denotations.nil?
	@lost_annotations = []

	denotations.each do |d|
		source = {begin:d.begin, end:d.end}
		d.begin = transform_begin_position(d.begin);
		d.end = transform_end_position(d.end);
		raise "invalid transform" unless !d.begin.nil? && !d.end.nil? && d.begin >= 0 && d.end > d.begin && d.end <= @original_reference_text.length
	rescue
		@lost_annotations << {source: source, target:{begin:d.begin, end:d.end}}
		d.begin = nil
		d.end = nil
	end

	@lost_annotations
end

#transform_end_position(_end_position) ⇒ Object



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/text_alignment/text_alignment.rb', line 77

def transform_end_position(_end_position)
	end_position = @text_mapping.enmap_position(_end_position)

	i = @blocks.index{|b| b[:source][:end] >= end_position}
	block = @blocks[i]

	e = if block[:alignment] == :block || block[:alignment] == :term
		end_position + block[:delta]
	elsif block[:alignment] == :empty
		if end_position == block[:source][:end]
			block[:target][:end]
		else
			nil
		end
	else
		r = block[:alignment].transform_end_position(end_position - block[:source][:begin])
		r.nil? ? nil : r + block[:target][:begin]
	end

	@rtext_mapping.demap_position(e)
end

#transform_hdenotations(hdenotations) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/text_alignment/text_alignment.rb', line 125

def transform_hdenotations(hdenotations)
	return nil if hdenotations.nil?
	@lost_annotations = []

	r = hdenotations.collect do |d|
		t = transform_a_span(d[:span])
		raise "invalid transform" unless !t[:begin].nil? && !t[:end].nil? && t[:begin] >= 0 && t[:end] > t[:begin] && t[:end] <= @original_reference_text.length
		new_d = d.dup.merge({span:t})
	rescue
		@lost_annotations << {source: d[:span], target:t}
		nil
	end.compact

	r
end

#transform_spans(spans) ⇒ Object



103
104
105
# File 'lib/text_alignment/text_alignment.rb', line 103

def transform_spans(spans)
	spans.map{|span| transform_a_span(span)}
end