Class: TxtTmImporter::Tm

Inherits:
Object
  • Object
show all
Defined in:
lib/txt_tm_importer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_path:, **args) ⇒ Tm

Returns a new instance of Tm.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/txt_tm_importer.rb', line 9

def initialize(file_path:, **args)
  @file_path = file_path
  @content = File.read(open(@file_path))
  if args[:encoding].nil?
    @encoding = CharlockHolmes::EncodingDetector.detect(@content[0..100_000])[:encoding]
    @encoding = 'UTF-16LE' if @encoding.nil?
  else
    @encoding = args[:encoding].upcase
  end
  @doc = {
    source_language: "",
    tu: { id: "", counter: 0, vals: [] },
    seg: { counter: 0, vals: [] },
    language_pairs: []
  }
  raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil?
  if @encoding.eql?('UTF-8')
    @text = @content
  else
    @text = CharlockHolmes::Converter.convert(@content, @encoding, 'UTF-8')
  end
end

Instance Attribute Details

#encodingObject (readonly)

Returns the value of attribute encoding.



8
9
10
# File 'lib/txt_tm_importer.rb', line 8

def encoding
  @encoding
end

#file_pathObject (readonly)

Returns the value of attribute file_path.



8
9
10
# File 'lib/txt_tm_importer.rb', line 8

def file_path
  @file_path
end

Instance Method Details

#importObject



49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/txt_tm_importer.rb', line 49

def import
  if wordfast?
    import_wordfast_file
  else
    if @text.include?('<RTF Preamble>')
      import_twb_file
    else
      raise "File type not recognized"
    end
  end
  [@doc[:tu][:vals], @doc[:seg][:vals]]
end

#statsObject



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/txt_tm_importer.rb', line 32

def stats
  if wordfast?
    wordfast_stats
  else
    if @text.include?('<RTF Preamble>')
      twb_export_file_stats
    else
      raise "File type not recognized"
    end
  end
  if @doc[:tu][:counter].eql?(0) && @doc[:seg][:counter].eql?(0) && @doc[:language_pairs].uniq.empty?
    raise "File type not recognized"
  else
    { tu_count: @doc[:tu][:counter], seg_count: @doc[:seg][:counter], language_pairs: @doc[:language_pairs].uniq }
  end
end