Class: PDFBeads::PDFBuilder

Inherits:

Object

Object
PDFBeads::PDFBuilder

show all

Defined in:: lib/pdfbeads/pdfbuilder.rb

Overview

The key class where the actual generation of a PDF file is performed.

Defined Under Namespace

Classes: Dict, Doc, FontDataProvider, PDFLabels, PDFTOC, XObj

Constant Summary collapse

@@cmodes =

Hash[
  'BilevelType'              => '/DeviceGray',
  'GrayscaleType'            => '/DeviceGray',
  'PaletteType'              => '/Indexed',
  'PaletteMatteType'         => '/Indexed',
  'TrueColorType'            => '/DeviceRGB',
  'TrueColorMatteType'       => '/DeviceRGB',
  'ColorSeparationType'      => '/DeviceCMYK',
  'ColorSeparationMatteType' => '/DeviceCMYK',
  'PaletteBilevelMatteType'  => '/DeviceGray'
]

Instance Method Summary collapse

#initialize(pdfargs) ⇒ PDFBuilder constructor

A new instance of PDFBuilder.
#output(outpath) ⇒ Object

Output the created PDF file to the disk.
#process(pagefiles, st_format) ⇒ Object

Constructor Details

#initialize(pdfargs) ⇒ `PDFBuilder`

Returns a new instance of PDFBuilder.

# File 'lib/pdfbeads/pdfbuilder.rb', line 57

def initialize( pdfargs )
  @pdfargs = pdfargs
  @now = Time.now()
  @doc = Doc.new()
  @fdata = FontDataProvider.new()

  @dictpath = ''
  @dictobj  = nil
end

Instance Method Details

#output(outpath) ⇒ `Object`

Output the created PDF file to the disk.

# File 'lib/pdfbeads/pdfbuilder.rb', line 361

def output( outpath )
  begin
    if outpath.eql? 'STDOUT'
      out = $stdout
    else
      out = File.open( outpath,'w' )
    end

    out.binmode if /(win|w)32$/.match( RUBY_PLATFORM )
    out.write( @doc.to_s )
    out.close unless outpath.eql? 'STDOUT'
  rescue
    $stderr.puts( "Error: could not write to #{outpath}" )
  end
end

#process(pagefiles, st_format) ⇒ `Object`

# File 'lib/pdfbeads/pdfbuilder.rb', line 67

def process( pagefiles,st_format )
  labels = toc = nil
  labels = PDFLabels.new( @pdfargs[:labels] ) unless @pdfargs[:labels].nil?
  toc    = PDFTOC.new( @pdfargs[:toc] ) unless @pdfargs[:toc].nil?
  meta   = parseMeta( @pdfargs[:meta] )
  reader = getPDFReader( @pdfargs[:textpdf] )

  cat = XObj.new(Hash[
    'Type'       => '/Catalog',
    'PageLayout' => "/#{@pdfargs[:pagelayout]}"
    ])
  @doc.addObject(cat)

  offsign = 'Z'
  if @now.gmt_offset > 0
    offsign = "+"
  else
    offsign = "-"
  end
  creationDate = sprintf( "D:%04d%02d%02d%02d%02d%02d%s",
    @now.year, @now.month, @now.day, @now.hour, @now.min, @now.sec, offsign )
  unless offsign.eql? 'Z'
    gmt_mins = @now.gmt_offset/60
    creationDate << sprintf( "%02d'%02d", gmt_mins/60, gmt_mins%60 )
  end
  info = XObj.new(Hash[
    'Creator'      => "(PDFBeads)",
    'Producer'     => "(PDFBeads)",
    'CreationDate' => "(#{creationDate})"
  ])
  @doc.addObject(info)
  meta.each_key do |key|
    info.addToDict(key, "(\xFE\xFF#{meta[key].to_text})")
  end

  if ( toc != nil and toc.length > 0 ) or @pdfargs[:rtl]
    vpref = XObj.new(Hash.new())
    vpref.addToDict('Direction', "/R2L") if @pdfargs[:rtl]
    @doc.addObject(vpref)
    cat.addToDict('ViewerPreferences', ref(vpref.getID))
  end

  pages = XObj.new(Hash[
    'Type' => '/Pages'
  ])
  @doc.addObject(pages)
  cat.addToDict('Pages', ref(pages.getID))

  creator = XObj.new(Hash[
    'Subtype' => '/Artwork',
    'Creator' => "(PDFBeads)",
    'Feature' => '(Layers)'
  ])
  @doc.addObject(creator)

  ocFore = XObj.new(Hash[
    'Type'   => '/OCG',
    'Name'   => '(Foreground)',
    'Usage'  => "<</CreatorInfo #{ref(creator.getID)}>>",
    'Intent' => '[/View/Design]'
  ])
  @doc.addObject(ocFore)
  ocBack = XObj.new({
    'Type'   => '/OCG',
    'Name'   => '(Background)',
    'Usage'  => "<</CreatorInfo #{ref(creator.getID)}>>",
    'Intent' => '[/View/Design]'
  })
  @doc.addObject(ocBack)
  cat.addToDict('OCProperties',
    sprintf("<< /OCGs[%s %s] /D<< /Intent /View /BaseState (ON) /Order[%s %s] >>>>",
      ref(ocFore.getID), ref(ocBack.getID), ref(ocFore.getID), ref(ocBack.getID)))

  page_objs = Array.new()
  pages_by_num = Hash.new()
  symd = nil
  font = nil
  pidx = 0

  if labels != nil and labels.length > 0
    nTree = "<</Nums[\n"
    labels.each do |rng|
      nTree << "#{rng[:first]} << "
      if rng.has_key? :prefix
        begin
          # If possible, use iso8859-1 (aka PDFDocEncoding) for page labels:
          # it is at least guaranteed to be safe
          if rng[:prefix].respond_to? :encode
            ltitl = rng[:prefix].encode( "iso8859-1", "utf-8" )
          else
            ltitl = Iconv.iconv( "iso8859-1", "utf-8", rng[:prefix] ).first
          end
          nTree << "/P (#{ltitl.to_text}) "
        # Iconv::InvalidCharacter, Iconv::IllegalSequence, Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
        rescue
          ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first
          # If there is no number (just prefix) then put a zero character after the prefix:
          # this makes acroread happy, but prevents displaying the number in evince
          unless rng.has_key? :style
            nTree << "/P (\xFE\xFF#{ltitl.to_text}\x00\x00) "
          # Otherwise put a formally correct Unicode string, which, however, may stumble acroread
          else
            nTree << "/P (\xFE\xFF#{ltitl.to_text}) "
          end
        end
      end
      nTree << "/S /#{rng[:style]} " if rng.has_key? :style
      nTree << "/St #{rng[:start]}" if rng.has_key? :start
      nTree << ">>\n"
    end

    nTree << "]\n>>"
    cat.addToDict('PageLabels', nTree)
    cur_range_id = 0
  end

  needs_font = false
  fonts = encodings = nil
  unless reader.nil?
    fdict = importPDFFonts( reader,@pdfargs[:textpdf] )
  else
    pagefiles.each do |p|
      unless p.hocr_path.nil?
        needs_font = true
        break
      end
    end

    if needs_font
      fonts = Array.new()
      encodings = [ [' '] ]
      fdict = XObj.new( Hash[] )
      @doc.addObject( fdict )

      descr = XObj.new( Hash[
        'Type'     => '/FontDescriptor',
        'BaseFont' => '/Times-Roman',
        ] )
      @fdata.header.each_key do |key|
        descr.addToDict( key,@fdata.header[key] )
      end
      @doc.addObject( descr )
    end
  end

  pagefiles.each do |p|
    procSet = ['/PDF', '/ImageB']
    c_str = ''
    doc_objs = Array.new()
    lastimg = 0

    width = p.width; height = p.height
    xres  = p.x_res; yres   = p.y_res
    pwidth  = width.to_f  / xres * 72
    pheight = height.to_f / yres * 72

    p.stencils.each do |s|
      if st_format.eql? 'JBIG2'
        xobj,width,height,xres,yres = loadJBIG2Page( s[:jbig2path],s[:jbig2dict],ref(ocFore.getID) )
      else
        xobj,width,height,xres,yres = loadCCITTPage( s[:path],ref(ocFore.getID) )
      end
      break if xobj.nil?

      color = s[:rgb].join(' ') << ' rg'
      doc_objs << xobj

      c_str << "#{color} /Im#{lastimg} Do "
      lastimg += 1
    end

    fg_image = bg_image = nil
    fg_image = loadImage( p.fg_layer,ocFore.getID,procSet ) unless p.fg_layer.nil?
    bg_image = loadImage( p.bg_layer,ocBack.getID,procSet ) unless p.bg_layer.nil?

    contents = XObj.new(Hash[
      'Filter' => '/FlateDecode'
    ])
    resobj = XObj.new(Hash.new())
    resources = XObj.new(Hash[
      'XObject' => ref(resobj.getID)
    ])

    unless fg_image.nil?
      xobj = doc_objs[0]
      fg_image.addToDict('SMask', ref(xobj.getID))
      xobj.removeFromDict('ImageMask')
      xobj.addToDict('Decode', '[1 0]')
      resobj.addToDict('Im0', ref(fg_image.getID))
      doc_objs << fg_image
      c_str = '/Im0 Do '
    else
      doc_objs.each_index do |i|
        resobj.addToDict( "Im#{i}", ref(doc_objs[i].getID) )
      end
    end

    unless bg_image.nil?
      c_str = "/Im#{resobj.dictLength} Do " << c_str
      resobj.addToDict( "Im#{resobj.dictLength}", ref(bg_image.getID) )
      doc_objs << bg_image
    end
    c_str = sprintf( "q %.2f 0 0 %.2f 0 0 cm %sQ",pwidth,pheight,c_str )

    doc_objs.concat( [contents, resobj, resources] )

    hocr = nil
    if not reader.nil?
      procSet << '/Text'
      c_str   << getPDFText( reader,pidx,@pdfargs[:debug] )
    elsif not p.hocr_path.nil?
      hocr = open( p.hocr_path ) { |f| Nokogiri::HTML( f ) }
      procSet << '/Text'
      c_str   << getHOCRText( hocr,pheight,72.0/xres,72.0/yres,encodings )
    end

    unless @pdfargs[:debug]
      contents.reinit( Hash[
        'Filter' => '/FlateDecode'
      ], Zlib::Deflate.deflate( c_str,9 ) )
    else
      contents.reinit( Hash[], c_str )
    end
    resources.addToDict( 'ProcSet', "[ #{procSet.join(' ')} ]" )
    resources.addToDict( 'Font', ref( fdict.getID ) ) unless hocr.nil? and reader.nil?

    page = XObj.new(Hash[
      'Type'      => '/Page',
      'Parent'    => "#{pages.getID} 0 R",
      'MediaBox'  => sprintf( "[ 0 0 %.02f %.02f ]",pwidth,pheight ),
      'Contents'  => ref( contents.getID ),
      'Resources' => ref( resources.getID )
    ])
    # By default acroread uses /DeviceCMYK as a transparency blending space,
    # so adding an SMask image to a page would result to colors being shifted,
    # uless we take a special care of this. For more details see
    # http://comments.gmane.org/gmane.comp.tex.pdftex/3747
    unless fg_image.nil?
      cspace = '/DeviceRGB'
      cspace = fg_image.getFromDict( 'ColorSpace' ) if fg_image.hasInDict( 'ColorSpace' )
      page.addToDict( 'Group', "<< /S /Transparency /CS #{cspace} >>" )
    end
    doc_objs  << page
    doc_objs.each{ |x| @doc.addObject(x) }
    page_objs << page

    pages.addToDict( 'Count', page_objs.length )
    pages.addToDict( 'Kids', '[' << page_objs.map{|x| ref(x.getID).to_s}.join(' ') << ']' )

    pkey = (pidx + 1).to_s
    pkey = labels.getPageLabel( cur_range_id,pidx ) if labels != nil and labels.length > 0
    pages_by_num[pkey] = page.getID
    pidx += 1
    if labels != nil and labels.length > 0
      if cur_range_id < labels.length - 1 and labels[cur_range_id + 1][:first] == pidx
        cur_range_id += 1
      end
    end

    $stderr.puts("Processed #{p.name}\n")
    $stderr.puts("  Added background image from #{p.bg_layer}\n") unless bg_image.nil?
    $stderr.puts("  Added foreground image from #{p.fg_layer}\n") unless fg_image.nil?
  end

  if needs_font
    fidx = 1
    encodings.each do |enc|
      font = addFont( descr,enc,"Fnt#{fidx}" )
      fdict.addToDict( "Fnt#{fidx}",ref(font.getID) )
      fonts << font
      fidx += 1
    end
  end

  if toc != nil and toc.length > 0
    getOutlineObjs( toc,pages_by_num,page_objs[0].getID )
    cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
    cat.addToDict('PageMode', "/UseOutlines")
    vpref.addToDict('NonFullScreenPageMode', "/UseOutlines")
  end

  if @pdfargs[:delfiles]
    pagefiles.each do |p|
      $stderr.puts( "Cleaning up temporary files for #{p.name}" )
      safe_delete( p.fg_layer ) if p.fg_created
      safe_delete( p.bg_layer ) if p.bg_created
      p.stencils.each do |s|
        safe_delete( s[:path] ) if s[:created]
      end
    end
  end
end

Class: PDFBeads::PDFBuilder

Overview

Defined Under Namespace

Constant Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pdfargs) ⇒ PDFBuilder

Instance Method Details

#output(outpath) ⇒ Object

#process(pagefiles, st_format) ⇒ Object

#initialize(pdfargs) ⇒ `PDFBuilder`

#output(outpath) ⇒ `Object`

#process(pagefiles, st_format) ⇒ `Object`