Class: PDF::Reader::Font

Inherits:

Object

Object
PDF::Reader::Font

show all

Defined in:: lib/pdf/reader/font.rb

Overview

Represents a single font PDF object and provides some useful methods for extracting info. Mainly used for converting text to UTF-8.

Instance Attribute Summary collapse

#basefont ⇒ Object

Returns the value of attribute basefont.
#cid_default_height ⇒ Object readonly

Returns the value of attribute cid_default_height.
#cid_default_width ⇒ Object readonly

Returns the value of attribute cid_default_width.
#cid_heights ⇒ Object readonly

Returns the value of attribute cid_heights.
#cid_widths ⇒ Object readonly

Returns the value of attribute cid_widths.
#descendantfonts ⇒ Object

Returns the value of attribute descendantfonts.
#encoding ⇒ Object

Returns the value of attribute encoding.
#first_char ⇒ Object readonly

Returns the value of attribute first_char.
#font_descriptor ⇒ Object readonly

Returns the value of attribute font_descriptor.
#last_char ⇒ Object readonly

Returns the value of attribute last_char.
#subtype ⇒ Object

Returns the value of attribute subtype.
#tounicode ⇒ Object

Returns the value of attribute tounicode.
#widths ⇒ Object readonly

Returns the value of attribute widths.

Instance Method Summary collapse

#glyph_height(code_point) ⇒ Object
#glyph_width(code_point) ⇒ Object

looks up the specified codepoint and returns a value that is in (pdf) glyph space, which is 1000 glyph units = 1 text space unit.
#initialize(ohash = nil, obj = nil) ⇒ Font constructor

A new instance of Font.
#is_space?(glyph_code) ⇒ Boolean

TODO: I don’t think this works in all cases.
#to_utf8(params) ⇒ Object
#unpack(data) ⇒ Object

Constructor Details

#initialize(ohash = nil, obj = nil) ⇒ `Font`

Returns a new instance of Font.

# File 'lib/pdf/reader/font.rb', line 39

def initialize(ohash = nil, obj = nil)
  if ohash.nil? || obj.nil?
    $stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
    return
  end
  @ohash = ohash
  @tounicode = nil

  extract_base_info(obj)
  extract_descriptor(obj)
  extract_descendants(obj)
  @width_calc = build_width_calculator

  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
end

Instance Attribute Details

#basefont ⇒ `Object`

Returns the value of attribute basefont.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def basefont
  @basefont
end

#cid_default_height ⇒ `Object` (readonly)

Returns the value of attribute cid_default_height.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def cid_default_height
  @cid_default_height
end

#cid_default_width ⇒ `Object` (readonly)

Returns the value of attribute cid_default_width.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def cid_default_width
  @cid_default_width
end

#cid_heights ⇒ `Object` (readonly)

Returns the value of attribute cid_heights.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def cid_heights
  @cid_heights
end

#cid_widths ⇒ `Object` (readonly)

Returns the value of attribute cid_widths.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def cid_widths
  @cid_widths
end

#descendantfonts ⇒ `Object`

Returns the value of attribute descendantfonts.



35
36
37

# File 'lib/pdf/reader/font.rb', line 35

def descendantfonts
  @descendantfonts
end

#encoding ⇒ `Object`

Returns the value of attribute encoding.



35
36
37

# File 'lib/pdf/reader/font.rb', line 35

def encoding
  @encoding
end

#first_char ⇒ `Object` (readonly)

Returns the value of attribute first_char.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def first_char
  @first_char
end

#font_descriptor ⇒ `Object` (readonly)

Returns the value of attribute font_descriptor.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def font_descriptor
  @font_descriptor
end

#last_char ⇒ `Object` (readonly)

Returns the value of attribute last_char.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def last_char
  @last_char
end

#subtype ⇒ `Object`

Returns the value of attribute subtype.



35
36
37

# File 'lib/pdf/reader/font.rb', line 35

def subtype
  @subtype
end

#tounicode ⇒ `Object`

Returns the value of attribute tounicode.



35
36
37

# File 'lib/pdf/reader/font.rb', line 35

def tounicode
  @tounicode
end

#widths ⇒ `Object` (readonly)

Returns the value of attribute widths.



36
37
38

# File 'lib/pdf/reader/font.rb', line 36

def widths
  @widths
end

Instance Method Details

#glyph_height(code_point) ⇒ `Object`

# File 'lib/pdf/reader/font.rb', line 98

def glyph_height(code_point)
  if code_point.is_a?(String)
    code_point = code_point.unpack(encoding.unpack).first
  end

  @cached_heights ||= {}
  @cached_heights[code_point] ||= @width_calc.glyph_height(code_point)
end

#glyph_width(code_point) ⇒ `Object`

looks up the specified codepoint and returns a value that is in (pdf) glyph space, which is 1000 glyph units = 1 text space unit

# File 'lib/pdf/reader/font.rb', line 89

def glyph_width(code_point)
  if code_point.is_a?(String)
    code_point = code_point.unpack(encoding.unpack).first
  end

  @cached_widths ||= {}
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
end

#is_space?(glyph_code) ⇒ `Boolean`

TODO: I don’t think this works in all cases. See page 245

Returns:

(Boolean)

# File 'lib/pdf/reader/font.rb', line 78

def is_space?(glyph_code)
  #maybe this: return glyph_code == 32
  return to_utf8(glyph_code) == " "
end

#to_utf8(params) ⇒ `Object`

# File 'lib/pdf/reader/font.rb', line 69

def to_utf8(params)
  if @tounicode
    to_utf8_via_cmap(params)
  else
    to_utf8_via_encoding(params)
  end
end

#unpack(data) ⇒ `Object`



83
84
85

# File 'lib/pdf/reader/font.rb', line 83

def unpack(data)
  data.unpack(encoding.unpack)
end

Class: PDF::Reader::Font

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ohash = nil, obj = nil) ⇒ Font

Instance Attribute Details

#basefont ⇒ Object

#cid_default_height ⇒ Object (readonly)

#cid_default_width ⇒ Object (readonly)

#cid_heights ⇒ Object (readonly)

#cid_widths ⇒ Object (readonly)

#descendantfonts ⇒ Object

#encoding ⇒ Object

#first_char ⇒ Object (readonly)

#font_descriptor ⇒ Object (readonly)

#last_char ⇒ Object (readonly)

#subtype ⇒ Object

#tounicode ⇒ Object

#widths ⇒ Object (readonly)