Class: HexaPDF::Font::CMap

Inherits:
Object
  • Object
show all
Defined in:
lib/hexapdf/font/cmap.rb,
lib/hexapdf/font/cmap/parser.rb,
lib/hexapdf/font/cmap/writer.rb

Overview

Represents a CMap, a mapping from character codes to CIDs (character IDs) or to their Unicode value.

See: PDF2.0 s9.7.5, s9.10.3; Adobe Technical Notes #5014 and #5411

Defined Under Namespace

Classes: Parser, Writer

Constant Summary collapse

CMAP_DIR =

:nodoc:

File.join(HexaPDF.data_dir, 'cmap')

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCMap

Creates a new CMap object.



109
110
111
112
113
114
115
# File 'lib/hexapdf/font/cmap.rb', line 109

def initialize
  @codespace_ranges = []
  @cid_mapping = {}
  @cid_range_mappings = []
  @unicode_mapping = {}
  @unicode_range_mappings = []
end

Instance Attribute Details

#nameObject

The name of the CMap.



98
99
100
# File 'lib/hexapdf/font/cmap.rb', line 98

def name
  @name
end

#orderingObject

The ordering part of the CMap version.



92
93
94
# File 'lib/hexapdf/font/cmap.rb', line 92

def ordering
  @ordering
end

#registryObject

The registry part of the CMap version.



89
90
91
# File 'lib/hexapdf/font/cmap.rb', line 89

def registry
  @registry
end

#supplementObject

The supplement part of the CMap version.



95
96
97
# File 'lib/hexapdf/font/cmap.rb', line 95

def supplement
  @supplement
end

#wmodeObject

The writing mode of the CMap: 0 for horizontal, 1 for vertical writing.



101
102
103
# File 'lib/hexapdf/font/cmap.rb', line 101

def wmode
  @wmode
end

Class Method Details

.create_to_unicode_cmap(mapping) ⇒ Object

Returns a string containing a ToUnicode CMap that represents the given code to Unicode codepoint mapping.

See: Writer#create_to_unicode_cmap



84
85
86
# File 'lib/hexapdf/font/cmap.rb', line 84

def self.create_to_unicode_cmap(mapping)
  Writer.new.create_to_unicode_cmap(mapping)
end

.for_name(name) ⇒ Object

Creates a new CMap object by parsing a predefined CMap with the given name.

Raises an error if the given CMap is not found.



64
65
66
67
68
69
70
71
72
73
# File 'lib/hexapdf/font/cmap.rb', line 64

def self.for_name(name)
  return @cmap_cache[name] if @cmap_cache.key?(name)

  file = File.join(CMAP_DIR, name)
  if File.exist?(file)
    @cmap_cache[name] = parse(File.read(file, encoding: ::Encoding::UTF_8))
  else
    raise HexaPDF::Error, "No CMap named '#{name}' found"
  end
end

.parse(string) ⇒ Object

Creates a new CMap object from the given string which needs to contain a valid CMap file.



76
77
78
# File 'lib/hexapdf/font/cmap.rb', line 76

def self.parse(string)
  Parser.new.parse(string)
end

.predefined?(name) ⇒ Boolean

Returns true if the given name specifies a predefined CMap.

Returns:

  • (Boolean)


57
58
59
# File 'lib/hexapdf/font/cmap.rb', line 57

def self.predefined?(name)
  File.exist?(File.join(CMAP_DIR, name))
end

Instance Method Details

#add_cid_mapping(code, cid) ⇒ Object

Adds an individual mapping from character code to CID.



171
172
173
# File 'lib/hexapdf/font/cmap.rb', line 171

def add_cid_mapping(code, cid)
  @cid_mapping[code] = cid
end

#add_cid_range(start_code, end_code, start_cid) ⇒ Object

Adds a CID range, mapping characters codes from start_code to end_code to CIDs starting with start_cid.



177
178
179
# File 'lib/hexapdf/font/cmap.rb', line 177

def add_cid_range(start_code, end_code, start_cid)
  @cid_range_mappings << [start_code..end_code, start_cid]
end

#add_codespace_range(first, *rest) ⇒ Object

Add a codespace range using an array of ranges for the individual bytes.

This means that the first range is checked against the first byte, the second range against the second byte and so on.



130
131
132
# File 'lib/hexapdf/font/cmap.rb', line 130

def add_codespace_range(first, *rest)
  @codespace_ranges << [first, rest]
end

#add_unicode_mapping(code, string) ⇒ Object

Adds a mapping from character code to Unicode string in UTF-8 encoding.



196
197
198
# File 'lib/hexapdf/font/cmap.rb', line 196

def add_unicode_mapping(code, string)
  @unicode_mapping[code] = string
end

#add_unicode_range_mapping(start_code, end_code, start_values) ⇒ Object

Adds a mapping from a range of character codes to strings starting with the given 16-bit integer values (representing the raw UTF-16BE characters).



202
203
204
# File 'lib/hexapdf/font/cmap.rb', line 202

def add_unicode_range_mapping(start_code, end_code, start_values)
  @unicode_range_mappings << [start_code..end_code, start_values]
end

#read_codes(string) ⇒ Object

Parses the string and returns all character codes.

An error is raised if the string contains invalid bytes.



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/hexapdf/font/cmap.rb', line 137

def read_codes(string)
  codes = []
  bytes = string.each_byte

  loop do
    byte = bytes.next
    code = 0

    found = @codespace_ranges.any? do |first_byte_range, rest_ranges|
      next unless first_byte_range.cover?(byte)

      code = (code << 8) + byte
      valid = rest_ranges.all? do |range|
        begin
          byte = bytes.next
        rescue StopIteration
          raise HexaPDF::Error, "Missing bytes while reading codes via CMap"
        end
        code = (code << 8) + byte
        range.cover?(byte)
      end

      codes << code if valid
    end

    unless found
      raise HexaPDF::Error, "Invalid byte while reading codes via CMap: #{byte}"
    end
  end

  codes
end

#to_cid(code) ⇒ Object

Returns the CID for the given character code, or 0 if no mapping was found.



182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/hexapdf/font/cmap.rb', line 182

def to_cid(code)
  cid = @cid_mapping.fetch(code, -1)
  if cid == -1
    @cid_range_mappings.reverse_each do |range, start_cid|
      if range.cover?(code)
        cid = start_cid + code - range.first
        break
      end
    end
  end
  (cid == -1 ? 0 : cid)
end

#to_unicode(code) ⇒ Object

Returns the Unicode string in UTF-8 encoding for the given character code, or nil if no mapping was found.



208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/hexapdf/font/cmap.rb', line 208

def to_unicode(code)
  @unicode_mapping.fetch(code) do
    @unicode_range_mappings.reverse_each do |range, start_values|
      if range.cover?(code)
        str = start_values[0..-2].append(start_values[-1] + code - range.first).
          pack('n*').encode(::Encoding::UTF_8, ::Encoding::UTF_16BE)
        return @unicode_mapping[code] = str
      end
    end
    nil
  end
end

#use_cmap(cmap) ⇒ Object

Add all mappings from the given CMap to this CMap.



118
119
120
121
122
123
124
# File 'lib/hexapdf/font/cmap.rb', line 118

def use_cmap(cmap)
  @codespace_ranges.concat(cmap.codespace_ranges)
  @cid_mapping.merge!(cmap.cid_mapping)
  @cid_range_mappings.concat(cmap.cid_range_mappings)
  @unicode_mapping.merge!(cmap.unicode_mapping)
  @unicode_range_mappings.concat(cmap.unicode_range_mappings)
end