Class: TxmlImporter::Txml
- Inherits:
-
Object
- Object
- TxmlImporter::Txml
- Defined in:
- lib/txml_importer.rb
Instance Attribute Summary collapse
-
#encoding ⇒ Object
readonly
Returns the value of attribute encoding.
-
#file_path ⇒ Object
readonly
Returns the value of attribute file_path.
Instance Method Summary collapse
- #import ⇒ Object
-
#initialize(file_path:, **args) ⇒ Txml
constructor
A new instance of Txml.
- #stats ⇒ Object
Constructor Details
#initialize(file_path:, **args) ⇒ Txml
Returns a new instance of Txml.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/txml_importer.rb', line 10 def initialize(file_path:, **args) @file_path = file_path @content = File.read(open(@file_path)) if !args[:encoding].eql?('UTF-8') if args[:encoding].nil? @encoding = CharlockHolmes::EncodingDetector.detect(@content[0..100_000])[:encoding] if @encoding.nil? encoding_in_file = @content.dup.force_encoding('utf-8').scrub!("*").gsub!(/\0/, '').scan(/(?<=encoding=").*(?=")/)[0].upcase if encoding_in_file.eql?('UTF-8') @encoding = ('UTF-8') elsif encoding_in_file.eql?('UTF-16') @encoding = ('UTF-16LE') end end else @encoding = args[:encoding].upcase end @doc = { source_language: "", tu: { id: "", counter: 0, vals: [] }, seg: { counter: 0, vals: [] }, language_pairs: [] } raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil? raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE') @text = CharlockHolmes::Converter.convert(@content, @encoding, 'UTF-8') if !@encoding.eql?('UTF-8') end |
Instance Attribute Details
#encoding ⇒ Object (readonly)
Returns the value of attribute encoding.
9 10 11 |
# File 'lib/txml_importer.rb', line 9 def encoding @encoding end |
#file_path ⇒ Object (readonly)
Returns the value of attribute file_path.
9 10 11 |
# File 'lib/txml_importer.rb', line 9 def file_path @file_path end |
Instance Method Details
#import ⇒ Object
46 47 48 49 50 |
# File 'lib/txml_importer.rb', line 46 def import reader = read_file parse_file(reader) [@doc[:tu][:vals], @doc[:seg][:vals]] end |
#stats ⇒ Object
37 38 39 40 41 42 43 44 |
# File 'lib/txml_importer.rb', line 37 def stats if encoding.eql?('UTF-8') analyze_stats_utf_8 else analyze_stats_utf_16 end {tu_count: @doc[:tu][:counter], seg_count: @doc[:seg][:counter], language_pairs: @doc[:language_pairs].uniq} end |