Class: File

Inherits:
Object
  • Object
show all
Defined in:
lib/langa/file.rb

Overview

Extend class File to convert characters from different codepages into an

unicode stream.

You can specify any codepage, that is listed in the directory
#{LANGA}/unicode/mappings (omit the '.txt' extension). 
Grab the latest versions from http://www.unicode.org/Public/MAPPINGS

Instance Method Summary collapse

Constructor Details

#initialize(*par) ⇒ File

Specify a codepage with the new or open method. If no codepage is specified, UTF-8 will be assumed.

file = File.open([filename, codepage], ...)


42
43
44
45
46
47
48
49
# File 'lib/langa/file.rb', line 42

def initialize(*par)
  self.codepage = 'utf-8'
  self.codepage, par[0] = par[0].reverse if par[0].is_a?(Array)
  if par[1] =~ /[wa]/
    raise "mode not supported with codepage conversion"
  end
  old_initialize(*par)
end

Instance Method Details

#codepage=(cp) ⇒ Object

Specify a codepage after open and before processing the file.

file.codepage = '8859-4'


53
54
55
56
57
58
59
60
61
62
63
# File 'lib/langa/file.rb', line 53

def codepage=(cp)
  @codepage = cp
  unless utf8?
    @code_map = Array.new(256, 0)
    maps = File.join(File.dirname(__FILE__), '..', '..', 'unicode', 'mappings')
    File.open(File.join(maps, "#{cp}.txt")).each_line do |line|
      line.downcase =~ %r|^([0-9a-fx]+)\s([0-9a-fx]+)| && 
        @code_map[$1.hex] = $2.hex
    end
  end
end

#each_unicodeObject

Walk trought a file step by step for each unicode character.

file.each_unicode { |unicode| ... }


67
68
69
70
71
72
73
# File 'lib/langa/file.rb', line 67

def each_unicode
  readlines.each do |line|
    transcode(line).each do |char|
      yield char
    end
  end
end

#old_initializeObject



37
# File 'lib/langa/file.rb', line 37

alias old_initialize initialize

#utf8?Boolean

True, if codepage of file is UTF-8

file.utf8? -> true

Returns:

  • (Boolean)


77
78
79
# File 'lib/langa/file.rb', line 77

def utf8?
  @codepage == 'utf-8' || @codepage == 'utf8'
end