Class: CBETA::HTMLToPDF

Inherits:
Object
  • Object
show all
Defined in:
lib/cbeta/html_to_pdf.rb

Instance Method Summary collapse

Constructor Details

#initialize(input, output, converter) ⇒ HTMLToPDF

Returns a new instance of HTMLToPDF.

Examples:

c = CBETA::HTMLToPDF.new('/temp/cbeta-html', '/temp/cbeta-pdf', "prince %{in} -o %{out}")

Parameters:

  • input (String)

    folder of source HTML, HTML can be produced by CBETA::P5aToHTMLForPDF.

  • output (String)

    output folder

  • converter (String)

    shell command to convert HTML to PDF

    • suggestion: www.princexml.com/

    • wkhtmltopdf has font problem to display unicode extb characters



10
11
12
13
14
# File 'lib/cbeta/html_to_pdf.rb', line 10

def initialize(input, output, converter)
  @input = input
  @output = output
  @converter = converter
end

Instance Method Details

#convert(target = nil) ⇒ Object

Convert CBETA HTML to PDF

T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: www.cbeta.org/format/id.php

Examples:

for convert Taisho (大正藏) Volumn 1:


c = CBETA::HTMLToPDF.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
c.convert('T01')

for convert all in Taisho (大正藏):


c = CBETA::HTMLToPDF.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
c.convert('T')

for convert Taisho Vol. 5~7:


c = CBETA::P5aToHTMLForPDF.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
c.convert('T05..T07')


34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/cbeta/html_to_pdf.rb', line 34

def convert(target=nil)
  return convert_all if target.nil?

  arg = target.upcase
  if arg.size == 1
    convert_collection(arg)
  else
    if arg.include? '..'
      arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
        convert_vols($1, $2)
      }
    else
      convert_vol(arg)
    end
  end
end

#convert_collection(c) ⇒ Object



51
52
53
54
55
56
57
58
59
# File 'lib/cbeta/html_to_pdf.rb', line 51

def convert_collection(c)
  @series = c
  puts 'convert_collection ' + c
  folder = File.join(@input, @series)
  Dir.foreach(folder) { |vol|
    next if ['.', '..', '.DS_Store'].include? vol
    convert_vol(vol)
  }
end

#convert_file(html_fn, pdf_fn) ⇒ Object



61
62
63
64
65
# File 'lib/cbeta/html_to_pdf.rb', line 61

def convert_file(html_fn, pdf_fn)
  puts "convert file: #{html_fn} to #{pdf_fn}"
  cmd = @converter % { in: html_fn, out: pdf_fn}
  `#{cmd}`
end

#convert_vol(arg) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/cbeta/html_to_pdf.rb', line 67

def convert_vol(arg)
  vol = arg.upcase
  canon = vol[0]
  vol_folder = File.join(@input, canon, vol)
  
  output_folder = File.join(@output, canon, vol)
  FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
  
  Dir.entries(vol_folder).sort.each do |f|
    next if f.start_with? '.'
    src = File.join(vol_folder, f, 'main.htm')  
    dest = File.join(output_folder, "#{f}.pdf")
    convert_file(src, dest)
  end
end

#convert_vols(v1, v2) ⇒ Object



83
84
85
86
87
88
89
90
91
92
# File 'lib/cbeta/html_to_pdf.rb', line 83

def convert_vols(v1, v2)
  puts "convert volumns: #{v1}..#{v2}"
  @series = v1[0]
  folder = File.join(@input, @series)
  Dir.foreach(folder) { |vol|
    next if vol < v1
    next if vol > v2
    convert_vol(vol)
  }
end