Module: Origami::Object

Included in:
Array, Boolean, Dictionary, Name, Null, Number, Reference, Stream, String
Defined in:
lib/origami/object.rb,
lib/origami/obfuscation.rb

Overview

Parent module representing a PDF Object. PDF specification declares a set of primitive object types :

  • Null

  • Boolean

  • Integer

  • Real

  • Name

  • String

  • Array

  • Dictionary

  • Stream

Constant Summary collapse

TOKENS =

:nodoc:

%w{ obj endobj }
@@regexp_obj =
Regexp.new(WHITESPACES + "(\\d+)" + WHITESPACES + "(\\d+)" + WHITESPACES + TOKENS.first + WHITESPACES)
@@regexp_endobj =
Regexp.new(WHITESPACES + TOKENS.last + WHITESPACES)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#file_offsetObject

Returns the value of attribute file_offset.



266
267
268
# File 'lib/origami/object.rb', line 266

def file_offset
  @file_offset
end

#generationObject

Returns the value of attribute generation.



266
267
268
# File 'lib/origami/object.rb', line 266

def generation
  @generation
end

#noObject

Returns the value of attribute no.



266
267
268
# File 'lib/origami/object.rb', line 266

def no
  @no
end

#objstm_offsetObject

Returns the value of attribute objstm_offset.



266
267
268
# File 'lib/origami/object.rb', line 266

def objstm_offset
  @objstm_offset
end

#parentObject

Returns the value of attribute parent.



267
268
269
# File 'lib/origami/object.rb', line 267

def parent
  @parent
end

Class Method Details

.native_typeObject

:nodoc:



611
# File 'lib/origami/object.rb', line 611

def self.native_type; Origami::Object end

.parse(stream, parser = nil) ⇒ Object

:nodoc:



547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
# File 'lib/origami/object.rb', line 547

def parse(stream, parser = nil) #:nodoc:
  offset = stream.pos

  #
  # End of body ?
  #
  return nil if stream.match?(/xref/) or stream.match?(/trailer/) or stream.match?(/startxref/)
 
  if stream.scan(@@regexp_obj).nil?
    raise InvalidObjectError, 
      "Object shall begin with '%d %d obj' statement"
  end
    
  no = stream[2].to_i
  gen = stream[4].to_i

  type = typeof(stream) 
  if type.nil?
    raise InvalidObjectError, 
      "Cannot determine object (no:#{no},gen:#{gen}) type"
  end
    
  begin
    newObj = type.parse(stream, parser)
  rescue Exception => e
    raise InvalidObjectError, 
      "Failed to parse object (no:#{no},gen:#{gen})\n\t -> [#{e.class}] #{e.message}"
  end

  newObj.set_indirect(true)
  newObj.no = no
  newObj.generation = gen
  newObj.file_offset = offset
    
  if stream.skip(@@regexp_endobj).nil?
    raise UnterminatedObjectError.new("Object shall end with 'endobj' statement", newObj)
  end
    
  newObj
end

.skip_until_next_obj(stream) ⇒ Object

:nodoc:



588
589
590
591
592
593
594
595
596
597
# File 'lib/origami/object.rb', line 588

def skip_until_next_obj(stream) #:nodoc:
  [ @@regexp_obj, /xref/, /trailer/, /startxref/ ].each do |re|
    if stream.scan_until(re)
      stream.pos -= stream.matched_size
      return true
    end
  end
  
  false
end

.typeof(stream, noref = false) ⇒ Object

:nodoc:



520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
# File 'lib/origami/object.rb', line 520

def typeof(stream, noref = false) #:nodoc:
  stream.skip(REGEXP_WHITESPACES)

  case stream.peek(1)
    when '/' then return Name
    when '<'
      return (stream.peek(2) == '<<') ? Stream : HexaString
    when '(' then return ByteString
    when '[' then return Origami::Array
    when 'n' then 
      return Null if stream.peek(4) == 'null'
    when 't' then
      return Boolean if stream.peek(4) == 'true'
    when 'f' then 
      return Boolean if stream.peek(5) == 'false'
  else
    if not noref and stream.check(Reference::REGEXP_TOKEN) then return Reference
    elsif stream.check(Real::REGEXP_TOKEN) then return Real
    elsif stream.check(Integer::REGEXP_TOKEN) then return Integer
    else
      nil
    end
  end
  
  nil
end

Instance Method Details

#<=>(obj) ⇒ Object

Compare two objects from their respective numbers.



316
317
318
# File 'lib/origami/object.rb', line 316

def <=>(obj)
  [@no, @generation] <=> [obj.no, obj.generation]
end

#cast_to(type) ⇒ Object

:nodoc:



620
621
622
623
624
625
626
# File 'lib/origami/object.rb', line 620

def cast_to(type) #:nodoc:
  if type.native_type != self.native_type
    raise TypeError, "Incompatible cast from #{self.class} to #{type}"
  end

  self
end

#copyObject

Deep copy of an object.



330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/origami/object.rb', line 330

def copy
  saved_pdf = @pdf
  saved_parent = @parent
  
  saved_xref_cache = @xref_cache
  @pdf = @parent = nil # do not process parent object and document in the copy

  # Perform the recursive copy (quite dirty).
  copyobj = Marshal.load(Marshal.dump(self))

  # restore saved values
  @pdf = saved_pdf
  @parent = saved_parent

  copyobj.set_pdf(saved_pdf) if copyobj.is_indirect?
  copyobj.parent = parent

  copyobj
end

#exportObject

Creates an exportable version of current object. The exportable version is a copy of self with solved references, no owning PDF and no parent. References to Catalog or PageTreeNode objects have been destroyed.

When exported, an object can be moved into another document without hassle.



405
406
407
408
409
410
411
412
413
# File 'lib/origami/object.rb', line 405

def export
  exported_obj = self.logicalize
  exported_obj.no = exported_obj.generation = 0
  exported_obj.set_pdf(nil) if exported_obj.is_indirect?
  exported_obj.parent = nil
  exported_obj.xref_cache.clear
  
  exported_obj
end

#indirect_parentObject

Returns the indirect object which contains this object. If the current object is already indirect, returns self.



473
474
475
476
477
478
# File 'lib/origami/object.rb', line 473

def indirect_parent 
  obj = self
  obj = obj.parent until obj.is_indirect?
    
  obj
end

#initialize(*cons) ⇒ Object

Creates a new PDF Object.



272
273
274
275
276
277
# File 'lib/origami/object.rb', line 272

def initialize(*cons)
  @indirect = false
  @no, @generation = 0, 0
  
  super(*cons) unless cons.empty?
end

#is_indirect?Boolean

Returns whether the objects is indirect, which means that it is not embedded into another object.

Returns:



323
324
325
# File 'lib/origami/object.rb', line 323

def is_indirect?
  @indirect
end

#logicalizeObject

Returns a logicalized copy of self. See logicalize!



419
420
421
# File 'lib/origami/object.rb', line 419

def logicalize #:nodoc:
  self.copy.logicalize!
end

#logicalize!Object

Transforms recursively every references to the copy of their respective object. Catalog and PageTreeNode objects are excluded to limit the recursion.



427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
# File 'lib/origami/object.rb', line 427

def logicalize! #:nodoc:

  def resolve_all_references(obj, browsed = [], ref_cache = {})
    return if browsed.include?(obj)
    browsed.push(obj)

    if obj.is_a?(ObjectStream)
      obj.each do |subobj|
        resolve_all_references(obj, browsed, ref_cache)
      end
    end

    if obj.is_a?(Dictionary) or obj.is_a?(Array)
      obj.map! do |subobj|
        if subobj.is_a?(Reference)
          new_obj = 
            if ref_cache.has_key?(subobj)
              ref_cache[subobj]
            else
              ref_cache[subobj] = subobj.solve.copy  
            end
          new_obj.no = new_obj.generation = 0
          new_obj.parent = obj

          new_obj unless new_obj.is_a?(Catalog) or new_obj.is_a?(PageTreeNode)
        else
          subobj
        end
      end

      obj.each do |subobj|
        resolve_all_references(subobj, browsed, ref_cache)
      end

    elsif obj.is_a?(Stream)
      resolve_all_references(obj.dictionary, browsed, ref_cache)
    end
  end
   
  resolve_all_references(self)
end

#native_typeObject

Returns the native PDF type of this Object.



616
617
618
# File 'lib/origami/object.rb', line 616

def native_type
  self.class.native_type
end

#pdfObject

Returns the PDF which the object belongs to.



504
505
506
507
508
509
# File 'lib/origami/object.rb', line 504

def pdf
  if self.is_indirect? then @pdf
  else
    @parent.pdf if @parent
  end
end

#pdf_version_requiredObject

:nodoc:



600
601
602
# File 'lib/origami/object.rb', line 600

def pdf_version_required #:nodoc:
  [ 1.0, 0 ]
end

#post_buildObject

Generic method called just after the object is finalized. At this time, any indirect object has its own number and generation identifier.



309
310
311
# File 'lib/origami/object.rb', line 309

def post_build
  self
end

#pre_buildObject

Generic method called just before the object is finalized. At this time, no number nor generation allocation has yet been done.



301
302
303
# File 'lib/origami/object.rb', line 301

def pre_build
  self
end

#referenceObject

Returns an indirect reference to this object, or a Null object is this object is not indirect.



353
354
355
356
357
358
359
360
361
362
# File 'lib/origami/object.rb', line 353

def reference
  unless self.is_indirect?
    raise InvalidObjectError, "Cannot reference a direct object"
  end

  ref = Reference.new(@no, @generation)
  ref.parent = self

  ref
end

#resolve_all_references(obj, browsed = [], ref_cache = {}) ⇒ Object

:nodoc:



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
# File 'lib/origami/object.rb', line 429

def resolve_all_references(obj, browsed = [], ref_cache = {})
  return if browsed.include?(obj)
  browsed.push(obj)

  if obj.is_a?(ObjectStream)
    obj.each do |subobj|
      resolve_all_references(obj, browsed, ref_cache)
    end
  end

  if obj.is_a?(Dictionary) or obj.is_a?(Array)
    obj.map! do |subobj|
      if subobj.is_a?(Reference)
        new_obj = 
          if ref_cache.has_key?(subobj)
            ref_cache[subobj]
          else
            ref_cache[subobj] = subobj.solve.copy  
          end
        new_obj.no = new_obj.generation = 0
        new_obj.parent = obj

        new_obj unless new_obj.is_a?(Catalog) or new_obj.is_a?(PageTreeNode)
      else
        subobj
      end
    end

    obj.each do |subobj|
      resolve_all_references(subobj, browsed, ref_cache)
    end

  elsif obj.is_a?(Stream)
    resolve_all_references(obj.dictionary, browsed, ref_cache)
  end
end

#set_indirect(bool) ⇒ Object

Sets whether the object is indirect or not. Indirect objects are allocated numbers at build time.



283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/origami/object.rb', line 283

def set_indirect(bool)
  unless bool == true or bool == false
    raise TypeError, "The argument must be boolean"
  end

  if not bool
    @no = @generation = 0
    @pdf = nil
  end

  @indirect = bool
  self
end

#set_pdf(pdf) ⇒ Object



511
512
513
514
515
516
# File 'lib/origami/object.rb', line 511

def set_pdf(pdf)
  if self.is_indirect? then @pdf = pdf
  else
    raise InvalidObjectError, "You cannot set the PDF parent of a direct object"
  end
end

#sizeObject

Returns the size of this object once converted to PDF code.



497
498
499
# File 'lib/origami/object.rb', line 497

def size
  to_s.size
end

#solveObject

Returns self.



490
491
492
# File 'lib/origami/object.rb', line 490

def solve
  self
end

#to_oObject

Returns self.



483
484
485
# File 'lib/origami/object.rb', line 483

def to_o
  self
end

#to_s(data) ⇒ Object Also known as: output, to_obfuscated_str

Outputs this object into PDF code.

data

The object data.



632
633
634
635
636
637
638
639
640
# File 'lib/origami/object.rb', line 632

def to_s(data)
  
  content = ""
  content << "#{no} #{generation} obj" << EOL if self.is_indirect?
  content << data
  content << EOL << "endobj" << EOL if self.is_indirect?
  
  content
end

#typeObject

Returns the symbol type of this Object.



607
608
609
# File 'lib/origami/object.rb', line 607

def type
  self.class.to_s.split("::").last.to_sym
end

#xrefsObject

Returns an array of references pointing to the current object.



367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# File 'lib/origami/object.rb', line 367

def xrefs
  unless self.is_indirect?
    raise InvalidObjectError, "Cannot find xrefs to a direct object"
  end

  if self.pdf.nil?
    raise InvalidObjectError, "Not attached to any PDF"
  end

  xref_cache = Hash.new([])
  @pdf.root_objects.each do |obj|
    case obj
      when Dictionary,Array then
        xref_cache.update(obj.xref_cache) do |ref, cache1, cache2|
          cache1.concat(cache2)
        end

      when Stream then
        obj.dictionary.xref_cache.each do |ref, cache|
          cache.map!{obj}
        end

        xref_cache.update(obj.dictionary.xref_cache) do |ref, cache1, cache2|
          cache1.concat(cache2)
        end
    end
  end

  xref_cache[self.reference]
end