Class: PDF::Reader::Encoding
- Inherits:
-
Object
- Object
- PDF::Reader::Encoding
- Defined in:
- lib/pdf/reader/encoding.rb
Overview
:nodoc:
Constant Summary collapse
- CONTROL_CHARS =
[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31]
- UNKNOWN_CHAR =
▯
0x25AF
Instance Attribute Summary collapse
-
#unpack ⇒ Object
readonly
Returns the value of attribute unpack.
Instance Method Summary collapse
- #differences ⇒ Object
-
#differences=(diff) ⇒ Object
set the differences table for this encoding.
-
#initialize(enc) ⇒ Encoding
constructor
A new instance of Encoding.
- #to_unicode_required? ⇒ Boolean
-
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8.
Constructor Details
#initialize(enc) ⇒ Encoding
Returns a new instance of Encoding.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/pdf/reader/encoding.rb', line 34 def initialize(enc) if enc.kind_of?(Hash) self.differences = enc[:Differences] if enc[:Differences] enc = enc[:Encoding] || enc[:BaseEncoding] elsif enc != nil enc = enc.to_sym else enc = nil end @to_unicode_required = unicode_required?(enc) @unpack = get_unpack(enc) @map_file = get_mapping_file(enc) load_mapping(@map_file) if @map_file end |
Instance Attribute Details
#unpack ⇒ Object (readonly)
Returns the value of attribute unpack.
32 33 34 |
# File 'lib/pdf/reader/encoding.rb', line 32 def unpack @unpack end |
Instance Method Details
#differences ⇒ Object
79 80 81 |
# File 'lib/pdf/reader/encoding.rb', line 79 def differences @differences ||= {} end |
#differences=(diff) ⇒ Object
set the differences table for this encoding. should be an array in the following format:
[25, :A, 26, :B]
The array alternates between a decimal byte number and a glyph name to map to that byte
To save space the following array is also valid and equivalent to the previous one
[25, :A, :B]
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/pdf/reader/encoding.rb', line 63 def differences=(diff) raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array) @differences = {} byte = 0 diff.each do |val| if val.kind_of?(Numeric) byte = val.to_i else @differences[byte] = val byte += 1 end end @differences end |
#to_unicode_required? ⇒ Boolean
50 51 52 |
# File 'lib/pdf/reader/encoding.rb', line 50 def to_unicode_required? @to_unicode_required end |
#to_utf8(str, tounicode = nil) ⇒ Object
convert the specified string to utf8
-
unpack raw bytes into codepoints
-
replace any that have entries in the differences table with a glyph name
-
convert codepoints from source encoding to Unicode codepoints
-
convert any glyph names to Unicode codepoints
-
replace characters that didn’t convert to Unicode nicely with something valid
-
pack the final array of Unicode codepoints into a utf-8 string
-
mark the string as utf-8 if we’re running on a M17N aware VM
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/pdf/reader/encoding.rb', line 94 def to_utf8(str, tounicode = nil) ret = str.unpack(unpack).map { |c| differences[c] || c }.map { |num| original_codepoint_to_unicode(num, tounicode) }.map { |c| glyphnames[c] || c }.map { |c| if c.nil? || !c.is_a?(Fixnum) PDF::Reader::Encoding::UNKNOWN_CHAR else c end }.pack("U*") ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding) ret end |