Class: PDF::Reader::Encoding
- Inherits:
-
Object
- Object
- PDF::Reader::Encoding
- Defined in:
- lib/pdf/reader/encoding.rb
Overview
:nodoc:
Constant Summary collapse
- CONTROL_CHARS =
[0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23, 24,25,26,27,28,29,30,31]
- UNKNOWN_CHAR =
▯
0x25AF
Instance Attribute Summary collapse
-
#unpack ⇒ Object
readonly
Returns the value of attribute unpack.
Instance Method Summary collapse
- #differences ⇒ Object
-
#differences=(diff) ⇒ Object
set the differences table for this encoding.
-
#initialize(enc) ⇒ Encoding
constructor
A new instance of Encoding.
-
#to_utf8(str) ⇒ Object
convert the specified string to utf8.
Constructor Details
#initialize(enc) ⇒ Encoding
Returns a new instance of Encoding.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/pdf/reader/encoding.rb', line 34 def initialize(enc) if enc.kind_of?(Hash) self.differences = enc[:Differences] if enc[:Differences] enc = enc[:Encoding] || enc[:BaseEncoding] elsif enc != nil enc = enc.to_sym else enc = nil end @enc_name = enc @unpack = get_unpack(enc) @map_file = get_mapping_file(enc) load_mapping(@map_file) if @map_file end |
Instance Attribute Details
#unpack ⇒ Object (readonly)
Returns the value of attribute unpack.
32 33 34 |
# File 'lib/pdf/reader/encoding.rb', line 32 def unpack @unpack end |
Instance Method Details
#differences ⇒ Object
75 76 77 |
# File 'lib/pdf/reader/encoding.rb', line 75 def differences @differences ||= {} end |
#differences=(diff) ⇒ Object
set the differences table for this encoding. should be an array in the following format:
[25, :A, 26, :B]
The array alternates between a decimal byte number and a glyph name to map to that byte
To save space the following array is also valid and equivalent to the previous one
[25, :A, :B]
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/pdf/reader/encoding.rb', line 59 def differences=(diff) raise ArgumentError, "diff must be an array" unless diff.kind_of?(Array) @differences = {} byte = 0 diff.each do |val| if val.kind_of?(Numeric) byte = val.to_i else @differences[byte] = val byte += 1 end end @differences end |
#to_utf8(str) ⇒ Object
convert the specified string to utf8
-
unpack raw bytes into codepoints
-
replace any that have entries in the differences table with a glyph name
-
convert codepoints from source encoding to Unicode codepoints
-
convert any glyph names to Unicode codepoints
-
replace characters that didn’t convert to Unicode nicely with something valid
-
pack the final array of Unicode codepoints into a utf-8 string
-
mark the string as utf-8 if we’re running on a M17N aware VM
90 91 92 93 94 95 96 |
# File 'lib/pdf/reader/encoding.rb', line 90 def to_utf8(str) if utf8_conversion_impossible? little_boxes(str.unpack(unpack).size) else convert_to_utf8(str) end end |