Class: Aozora4Reader

Inherits:
Object
  • Object
show all
Defined in:
lib/aozora4reader.rb

Constant Summary collapse

PreambleLineNumber =
13
KANJIPAT =
"[々〇〻\u3400-\u9FFF\uF900-\uFAFF※ヶ〆]"
MAX_SAGE =
15

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ Aozora4Reader

Returns a new instance of Aozora4Reader.



22
23
24
25
26
27
28
29
30
31
# File 'lib/aozora4reader.rb', line 22

def initialize(file)
  @inputfile_name = file

  @jisage = false
  @log_text = []
  @line_num=0
  @gaiji = {}
  @gaiji2 = {}
  @sayuuchuuou = false
end

Class Method Details

.a4r(file) ⇒ Object



18
19
20
# File 'lib/aozora4reader.rb', line 18

def self.a4r(file)
  self.new(file).main
end

Instance Method Details

#load_gaijiObject

外字用ハッシュを作成



333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
# File 'lib/aozora4reader.rb', line 333

def load_gaiji
  datadir = File.dirname(__FILE__)+"/../data"
  File.open(datadir+"/gaiji.txt", "r:UTF-8") do |f|
    while gaiji_line = f.gets
      gaiji_line.chomp!
      key, data = gaiji_line.split
      @gaiji[key] = data
    end
  end

  File.open(datadir+"/gaiji2.txt", "r:UTF-8") do |f|
    while gaiji_line = f.gets
      gaiji_line.chomp!
      key, data = gaiji_line.split
      data.gsub(/#.*$/,'')
      @gaiji2[key] = data
    end
  end

end

#mainObject

メインパート



357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
# File 'lib/aozora4reader.rb', line 357

def main
  load_gaiji()

  # 入出力ファイルの定義
  outputfile_name = @inputfile_name.sub(/\.txt$/, ".tex")
  inputfile = File.open(@inputfile_name, "r:SJIS")
  outputfile = File.open(outputfile_name, "w:UTF-8")

  # プリアンブルの処理
  empty_line = 0
  in_note = false
   = []
  while empty_line < 2
    line = inputfile.gets.chomp
    line = NKF::nkf('-wS', line)
    if in_note
      if line =~ /^-+$/
        in_note = false
        break
      end
    else
      if line =~ /^-+$/
        in_note = true
      else
        if line =~ /^$/
          empty_line += 1
        else
          if line =~ /《.*?》/
            translate_ruby(line)
          end
           << line
        end
      end
    end
  end

  @line_num +=  .size
  @title = normalize(.shift)
  case .size
  when 1
    @author = normalize(.shift)
  when 2
    @subtitle = normalize(.shift)
    @author = normalize(.shift)
  when 3
    @subtitle = normalize(.shift)
    @author = normalize(.shift)
    @subauthor = normalize(.shift)
  else
    @subtitle = normalize(.shift)
    @meta_data = []
    until .empty?
      @meta_data << normalize(.shift)
    end
    @subauthor = @meta_data.pop
    @author = @meta_data.pop
  end

  outputfile.write(preamble())

  outputfile.print "\\title{"+@title+"}\n"
  outputfile.print "\\subtitle{"+@subtitle+"}\n" if @subtitle
  outputfile.print "\\author{"+@author+"}\n"
  outputfile.print "\\subauthor{"+@subauthor+"}\n" if @subauthor

  if @meta_data
    @meta_data.each do |data|
      outputfile.print "\\metadata{"+data+"}\n"
    end
  end
  outputfile.print "\\date{}\n"

  # 本文の処理
  outputfile.print "\\begin{document}\n\\maketitle\n"

  @line_num += PreambleLineNumber
  while line = inputfile.gets
    @line_num += 1
    line.chomp!
    line = NKF::nkf('-wS', line)

    break if line =~ /^底本/
    if line =~ /^ 「/
      line.sub!(/^ 「/, "\\mbox{ }\\kern0mm\\inhibitglue「")
    end
    if line =~ /[ワヰヱヲ]゛/
      line.gsub!(/ワ゛/, "\\ajLig{ワ゛}")
      line.gsub!(/ヰ゛/, "\\ajLig{ヰ゛}")
      line.gsub!(/ヱ゛/, "\\ajLig{ヱ゛}")
      line.gsub!(/ヲ゛/, "\\ajLig{ヲ゛}")
    end
    if line =~ /[?!] /
      line.gsub!(/([?!]) /, '\1{}')
    end
    if line =~ /——/
      line.gsub!(/——/, "\\——{}")
    end
    if line =~ //\/
      line.gsub!(//\/, "\\ajKunoji{}")
    end
    if line =~ //″\/
      line.gsub!(//″\/, "\\ajDKunoji{}")
    end

=begin
	if line =~ /^  +.+/
line.gsub!(/^  +([一二三四五六七八九〇十].*)/, '\\section*{\1}')
	end
=end

    while line =~ /(.+?)[#(「\1」は横?[1|一]文字[^]]*?)]/
      line = line.sub(/(.+?)[#(「\1」は横?[1|一]文字[^]]*?)]/){"\\ajLig{"+to_single_byte($1)+"}"}
    end
    if line =~ /[#改丁.*?]/
      if @sayuuchuuou
        line = "\\vspace*{\\stretch{1}}" + line
        @sayuuchuuou = false
      end
      line.sub!(/[#改丁.*?]/, "\\cleardoublepage")
    end
    if line =~ /[#改[頁|ページ].*?]/
      if @sayuuchuuou
        line = "\\vspace*{\\stretch{1}}" + line
        @sayuuchuuou = false
      end
      line.sub!(/[#改[頁|ページ].*?]/, "\\clearpage")
    end

    if line =~ /〔.*?〕/
      translate_accent(line)
    end

    if line =~ //
      translate_gaiji(line)
    end
    if line =~ /《.*?》/
      translate_ruby(line)
    end
    if line =~ /[#(.+?)傍点]/
      translate_bouten(line)
    end
    if line =~ /[#傍点].+?[#傍点終わり]/
      translate_bouten(line)
    end
    if line =~ /[#「(.+?)」に(?:二重)?[傍鎖破波]線]/
      translate_bousen(line)
    end
    if line =~ /[#この行.*?([1234567890一二三四五六七八九〇十]*)字下げ]/
      outputfile.print "\\begin{jisage}{"+to_single_byte($1)+"}\n"
      line = line.sub(/[#この行.*?字下げ]/, "")+"\n\\end{jisage}"
      @line_num += 2
    end

    if line =~ /[#ここから地から.+字上げ]/
      line.sub!(/[#ここから地から([1234567890一二三四五六七八九〇十]*)字上げ]/){"\\begin{flushright}\\advance\\rightskip"+to_single_byte($1)+"zw"}
      @jisage = true
    end
    if line =~ /[#ここで字上げ終わり]/
      line.sub!(/[#ここで字上げ終わり]/){"\\end{flushright}"}
      @jisage = false
    end

    if line =~ /[#ここから改行天付き、折り返して.*?字下げ]/
      if @jisage
        outputfile.print "\\end{jisage}\n"
        @line_num += 1
      end
      line.sub!(/[#ここから改行天付き、折り返して([1234567890一二三四五六七八九〇十]*)字下げ]/){"\\begin{jisage}{#{to_single_byte($1)}}\\setlength\\parindent{-"+to_single_byte($1)+"zw}"}
      @jisage = true
    end

    if line =~ /[#.*?字下げ[^]]*?(?:終わり|まで)[^]]*?]/ 
      line = line.sub(/[#.*?字下げ.*?(?:終わり|まで).*?]/, "")+"\\end{jisage}"
      @jisage = false
    end
    if line =~ /[#(ここから|これより|ここより|以下).+字下げ.*?]/
      if @jisage
        outputfile.print "\\end{jisage}\n"
        @line_num += 1
      end
      line.sub!(/[#(ここから|これより|ここより|以下).*?([1234567890一二三四五六七八九〇十]*)字下げ.*?]/){"\\begin{jisage}{"+to_single_byte($2)+"}"}
      @jisage = true
    end
    if line =~ /^[#ここから地付き]$/
      @jisage = true
      line = "\\begin{flushright}"
    end
    if line =~ /^[#ここで地付き終わり]$/
      line = "\\end{flushright}"
      @jisage = false
    end

    if line =~ /[#.*?地付き.*?]$/
      line = "\\begin{flushright}\n"+line.sub(/[#.*?地付き.*?]$/, "\\end{flushright}")
      @line_num += 1
    elsif line =~ /[#.*?地付き.*?]/
      line = line.sub(/[#.*?地付き.*?]/, "\\begin{flushright}\n")+"\\end{flushright}"
      @line_num += 1
    end
    if line =~ /[#.*?(?:行末|地)(?:から|より).*?([1234567890一二三四五六七八九〇十]*)字上.*?]$$/
      line = "\\begin{flushright}\\advance\\rightskip"+to_single_byte($1)+"zw\n"+line.sub(/[#.*?(?:行末|地)(?:から|より).*?字上.*?]$/, "\\end{flushright}")
      @line_num += 1
    elsif line =~ /^(.*?)[#.*?(?:行末|地)(?:から|より).*?([1234567890一二三四五六七八九〇十]*)字上.*?](.*)$/
      line = $1+"\\begin{flushright}\\advance\\rightskip"+to_single_byte($2)+"zw\n"+$3+"\\end{flushright}"
      @line_num += 1
    end
    if line =~ /[#「.+?」は返り点]/
      line.gsub!(/(.+)[#「\1」は返り点]/, '\\kaeriten{\ajKunten{\1}}')
    end
    if line =~ /[#[一二三上中下甲乙丙丁レ]*]/
      line.gsub!(/[#([一二三上中下甲乙丙丁レ]*)]/, '\\kaeriten{\ajKunten{\1}}')
    end
    if line =~ /[#(.*?)]/
      line.gsub!(/[#((.*?))]/, '\\okurigana{\1}')
    end
    if line =~ /[#「.+?」.*?ママ.*?注記]/
      line.gsub!(/(.+)[#「\1」.*?ママ.*?注記]/, '\\ruby{\1}{ママ}')
    end

    if line =~ /[#[^]]+(([^)]+.png).*?)[^]]+]/
      line.gsub!(/[#[^]]+(([^)]+.png).*?)[^]]+]/, '\\sashie{\1}')
    end

    if line =~ /[#([1234567890一二三四五六七八九〇十]*)字下げ]/
      num = to_single_byte($1).to_i
      if num > MAX_SAGE
        num = MAX_SAGE
      end
      outputfile.print "\\begin{jisage}{#{num}}\n"
      line = line.sub(/[#.*?字下げ]/, "")+"\n\\end{jisage}"
    end

    ## ちょっと汚いけど二重指定の対策
    if line =~ /[#「(.*?)」は縦中横][#「(.*?)」は中見出し]/
      line.gsub!(/(.*?)[#「(\1)」は縦中横][#「(\1)」は中見出し]/){"{\\large \\rensuji{#{$1}}}"}
    end

    if line =~ /[#「(.*?)」は大見出し]/
      line.gsub!(/(.*?)[#「(.*?)」は大見出し]/){"{\\Large #{$1}}"}
    end
    if line =~ /[#「(.*?)」は中見出し]/
      line.gsub!(/(.*?)[#「(.*?)」は中見出し]/){"{\\large #{$1}}"}
    end
    if line =~ /[#「(.*?)」は小見出し]/
      line.gsub!(/(.*?)[#「(.*?)」は小見出し]/){"{\\gtfamily #{$1}}"}
    end
    if line =~ /[#小見出し](.*?)[#小見出し終わり]/
      line.gsub!(/[#小見出し](.*?)[#小見出し終わり]/){"{\\gtfamily #{$1}}"}
    end
    if line =~ /[#中見出し](.*?)[#中見出し終わり]/
      line.gsub!(/[#中見出し](.*?)[#中見出し終わり]/){"{\\large #{$1}}"}
    end
    if line =~ /[#大見出し](.*?)[#大見出し終わり]/
      line.gsub!(/[#大見出し](.*?)[#大見出し終わり]/){"{\\Large #{$1}}"}
    end



    if line =~ /[#ここから中見出し]/
      line.gsub!(/[#ここから中見出し]/){"{\\large"}
    end
    if line =~ /[#ここで中見出し終わり]/
      line.gsub!(/[#ここで中見出し終わり]/){"}"}
    end

    if line =~ /[#ページの左右中央]/
      line.gsub!(/[#ページの左右中央]/, "\\vspace*{\\stretch{1}}")
      @sayuuchuuou = true
    end

    ## XXX 字詰めは1行の文字数が少ないので無視
    if line =~ /[#ここから([1234567890一二三四五六七八九〇十]*)字詰め]/
      line.gsub!(/[#ここから([1234567890一二三四五六七八九〇十]*)字詰め]/, "")
    end
    if line =~ /[#ここで字詰め終わり]/
      line.gsub!(/[#ここで字詰め終わり]/, "")
    end

    # XXX 割り注も無視
    if line =~ /[#ここから割り注]/
      line.gsub!(/[#ここから割り注]/, "")
    end
    if line =~ /[#ここで割り注終わり]/
      line.gsub!(/[#ここで割り注終わり]/, "")
    end

    if line =~ /[#「(.*?)」は太字]/
      line.gsub!(/(.+)[#「\1」は太字]/,'{\\textbf{\1}}')
    end
    if line =~ /[#「.+?」は縦中横]/
      line.gsub!(/(.+)[#「\1」は縦中横]/, '\\rensuji{\1}')
    end
    if line =~ /[#「(1)(/)(\d+)」は分数]/
      bunshi = to_single_byte($1)
      bunbo = $3
      line.gsub!(/(.+)[#「.+?」は分数]/, "\\rensuji{#{bunshi}/#{bunbo}}")
    end
    if line =~ /[#「.+?」は罫囲み]/
      line.gsub!(/(.+)[#「\1」は罫囲み]/, '\\fbox{\1}')
    end
    if line =~ /[#「(.+?)」は(本文より)?([123456])段階大きな文字]/
      line.gsub!(/([^[]+?)[#「\1」は(本文より)?([123456])段階大きな文字]/) {
        num = to_single_byte($3).to_i
        case num
        when 1
          "{\\large #{$1}}"
        when 2
          "{\\Large #{$1}}"
        when 3
          "{\\LARGE #{$1}}"
        when 4
          "{\\huge #{$1}}"
        when 5
          "{\\Huge #{$1}}"
        when 6
          "{\\Huge #{$1}}"
        end
      }
    end

    if line =~ /[#「.+?」は斜体]/
      line.gsub!(/(.+)[#「\1」は斜体]/){
        shatai = to_single_byte($1).tr("abcdefghijklmnopqrstuvwxyz","abcdefghijklmnopqrstuvwxyz")
        "\\rensuji{\\textsl{"+shatai+"}}"
      }
    end
    if line =~ /[#「[0-90-9]」は下付き小文字]/
      line.gsub!(/([A-Za-za-zA-Zαβδγ])([0-90-9])[#「\2」は下付き小文字]/){
        "$"+$1+"_{"+to_single_byte($2)+"}$"
      }
    end
    if line =~ /([^ ]*)[#ゴシック体]$/
      line.gsub!(/([^ ]*)[#ゴシック体]/){"{\\gtfamily #{$1}}"}
    end
    if line =~ /[#「.+?」はゴシック体]/
      line.gsub!(/(.+?)[#「\1」はゴシック体]/){"{\\gtfamily #{$1}}"}
    end

    if line =~ /[#ここから横組み](.*?)[#ここで横組み終わり]/
      line.gsub!(/[#ここから横組み](.*?)[#ここで横組み終わり]/){
        yoko_str = $1
        yoko_str.gsub!(/π/,"\\pi ")
        yoko_str.gsub!(//,"=")
        yoko_str.gsub!(/(\d+)[#「\1」は指数]/){"^{#{$1}}"}
        "$"+yoko_str+"$"
      }
    end
    line.tr!("┌┐┘└│─┏┓┛┗┃━→","┐┘└┌─│┓┛┗┏━┃↓")
    if line =~ /[#改段]/
      line.sub!(/[#改段]/, "\\clearpage")
    end
    if line =~ /[aioeu]\^/i
      line.gsub!(/([aioeu])\^/i){ "\\\^{#{$1}}"}
    end
    if line =~ /[aioeu]\'/i
      line.gsub!(/([aioeu])\'/i){ "\\\'{#{$1}}"}
    end
    if line =~ /[#天から.*?([1234567890一二三四五六七八九〇十]*)字下げ]/
      num = to_single_byte($1).to_i
      if num > MAX_SAGE
        num = MAX_SAGE
      end
      outputfile.print "\\begin{jisage}{#{num}}\n"
      line = line.sub(/[#天から.*?字下げ]/, "")+"\n\\end{jisage}"
    end

    line.gsub!(/[#図形 □(四角)に内接する◆]/, '{\setlength{\fboxsep}{0pt}\fbox{◆}}')

    if line =~ /[#[^]]+?]/
      line.gsub!(/[#([^]]+?)]/, '\\endnote{\1}')
    end
    if line =~ /\\[a-z]*?bou/
      tuning_bou(line)
    end
    if line =~ /\\ajD?Kunoji\{\}\}/
      line.gsub!(/(\\ajD?Kunoji)\{\}\}/, '\1}')
    end
    if line =~ /\\ruby/
      tuning_ruby(line)
    end
    if line =~ /^$/
      line = " "
    end
    outputfile.print normalize(line)+"\n"
  end

  # 底本の処理
  outputfile.write(postamble())
  outputfile.print normalize(line)+"\n"
  while line = inputfile.gets
    line.chomp!
    line = NKF::nkf('-wS', line)
    outputfile.print normalize(line)+"\n"
  end
  outputfile.print "\n\\end{minipage}\n\\end{teihon}\n\\end{document}\n"
  if @log_text.size > 0
    until @log_text.empty?
      outputfile.print @log_text.shift
    end
  end
end

#normalize(l) ⇒ Object

UTF-8で出力



34
35
36
37
38
39
# File 'lib/aozora4reader.rb', line 34

def normalize(l)
  ##l.gsub!(/&/, '\\\\&')
  l.gsub!(/#/,'\\#')
  l.gsub!(/_/,'\\textunderscore{}')
  l.to_s
end

#postambleObject

底本の表示用



105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/aozora4reader.rb', line 105

def postamble
  str = <<"END_OF_POST"
\\theendnotes
\\begin{teihon}
\\clearpage\\null\\newpage\\thispagestyle{empty}
\\begin{minipage}<y>{\\textheight}
\\vspace{1\\baselineskip}
\\scriptsize
END_OF_POST

  str
end

#preambleObject

プリアンブルの出力



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/aozora4reader.rb', line 72

def preamble
  title = remove_ruby(@title)
  author = remove_ruby(@author)
  str = <<"END_OF_PRE"
\\documentclass[a5paper]{tbook}
%\\documentclass[a5paper, twocolumn]{tbook}
%\\usepackage[deluxe]{otf}
\\usepackage[expert, deluxe]{otf}
%\\usepackage{utf}
% Bookmarkの文字化け対策(日本語向け)
\\ifnum 46273=\\euc"B4C1 % 46273 == 0xB4C1 == 漢(EUC-JP)
\\usepackage{atbegshi}%
\\AtBeginShipoutFirst{\\special{pdf:tounicode EUC-UCS2}}%
\\else
\\usepackage{atbegshi}%
\\AtBeginShipoutFirst{\\special{pdf:tounicode 90ms-RKSJ-UCS2}}%
\\fi
\\usepackage[dvipdfm,bookmarks=false,bookmarksnumbered=false,hyperfootnotes=false,%
          pdftitle={#{title}},%
          pdfauthor={#{author}}]{hyperref}
\\usepackage{furikana}
\\usepackage{type1cm}
\\usepackage[size=large]{aozora4reader}
\\def\\rubykatuji{\\rubyfamily\\tiny}
%\\def\\rubykatuji{\\tiny}%for UTF package
%\\renewenvironment{teihon}{\\comment}{\\endcomment}

END_OF_PRE

  str
end

#remove_ruby(str) ⇒ Object

ルビの削除(表題等)



67
68
69
# File 'lib/aozora4reader.rb', line 67

def remove_ruby(str)
  str.gsub(/\\ruby{([^}]+)}{[^}]*}/i){$1}
end

#to_single_byte(str) ⇒ Object

全角→半角



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/aozora4reader.rb', line 42

def to_single_byte(str)
  s = str.dup
  if s =~ /[0-9]/
    s.tr!("1234567890", "1234567890")
  elsif s =~ /[一二三四五六七八九〇]/
    s.tr!("一二三四五六七八九〇", "1234567890")
  end
  case s
  when /\d十\d/
    s.sub!(/(\d)十(\d)/, '\1\2')
  when /\d十/
    s.sub!(/(\d)十/, '\{1}0')
  when /十\d/
    s.sub!(/十(\d)/, '1\1')
  when //
    s.sub!(//, "10")
  end
  if s =~/[!?]/
    s.tr!("!?", "!?")
  end

  return s
end

#translate_accent(l) ⇒ Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/aozora4reader.rb', line 121

def translate_accent(l)
  l.gsub!(/([ij]):/){"\\\"{\\#{$1}}"}
  l.gsub!(/([AIOEUaioeu])(['`~^])/){"\\#$2{#$1}"}
  l.gsub!(/([AIOEUaioeu]):/){"\\\"{#$1}"}
  l.gsub!(/([AIOEUaioeu])_/){"\\={#$1}"}
  l.gsub!(/([!?])@/){"#$1'"}
  l.gsub!(/([Aa])&/){"\\r{#$1}"}
  l.gsub!(/AE&/){"\\AE{}"}
  l.gsub!(/ae&/){"\\ae{}"}
  l.gsub!(/OE&/){"\\OE{}"}
  l.gsub!(/oe&/){"\\oe{}"}
  l.gsub!(/s&/){"\\ss{}"}
  l.gsub!(/([cC]),/){"\\c{#$1}"}
  l.gsub!(//,'')
  l.gsub!(//,'')
  return l
end

#translate_bousen(l) ⇒ Object

傍線の処理用



282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/aozora4reader.rb', line 282

def translate_bousen(l)
  if l =~ /[#「.+?」に傍線]/
    l.gsub!(/(.+?)[#「\1」に傍線]/, '\\bousen{\1}')
  end
  if l =~ /[#「.+?」に二重傍線]/
    l.gsub!(/(.+?)[#「\1」に二重傍線]/, '\\bousen{\1}')
  end
  if l =~ /[#「.+?」に鎖線]/
    l.gsub!(/(.+?)[#「\1」に鎖線]/, '\\bousen{\1}')
  end
  if l =~ /[#「.+?」に破線]/
    l.gsub!(/(.+?)[#「\1」に破線]/, '\\bousen{\1}')
  end
  if l =~ /[#「.+?」に波線]/
    l.gsub!(/(.+?)[#「\1」に波線]/, '\\bousen{\1}')
  end
  return l
end

#translate_bouten(l) ⇒ Object

傍点の処理用



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/aozora4reader.rb', line 248

def translate_bouten(l)
  bouten_list = [
                 ["傍点", "bou"],
                 ["白ゴマ傍点","sirogomabou"],
                 ["丸傍点","marubou"],
                 ["白丸傍点","siromarubou"],
                 ["黒三角傍点","kurosankakubou"],
                 ["白三角傍点","sirosankakubou"],
                 ["二重丸傍点","nijyuumarubou"],
                 ["蛇の目傍点","jyanomebou"]]

  bouten_list.each{ |name, fun|
    if l =~ /[#「.+?」に#{name}/
      l.gsub!(/(.+?)[#.*?「\1」に#{name}/){
        str = $1
        str.gsub!(/(\\UTF{.+?})/){ "{"+$1+"}"}
        str.gsub!(/(\\ruby{.+?}{.+?})/i){ "{"+$1+"}"}
        "\\#{fun}{"+str+"}"
      }
    end
  }

  if l =~ /[#傍点].+?[#傍点終わり]/
    l.gsub!(/[#傍点](.+?)[#傍点終わり]/){
      str = $1
      str.gsub!(/(\\UTF{.+?})/){ "{"+$1+"}"}
      str.gsub!(/(\\ruby{.+?}{.+?})/i){ "{"+$1+"}"}
      "\\bou{"+str+"}"
    }
  end
  return l
end

#translate_gaiji(l) ⇒ Object

外字の処理用



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/aozora4reader.rb', line 141

def translate_gaiji(l)
  if l =~/※[#([^]]*)、([^、]]*)]/
    if @gaiji2[$1]
      l.gsub!(/※[#([^]]*)、([^、]]*)]/){@gaiji2[$1]}
    end
  end
  ## ※[#「姉」の正字、「女+※[#第3水準1-85-57]のつくり」、256-下-16]
  if l =~/※[#([^]]*※[#[^]]*][^]]*)、([^、]]*)]/
    if @gaiji2[$1]
      l.gsub!(/※[#([^]]*※[#[^]]*][^]]*)、([^、]]*)]/){@gaiji2[$1]}
    end
  end
  ## ※[#「さんずい+闊」]
  if l =~ /※[#「([^]]+?)」]/
    if @gaiji2[$1]
      l.gsub!(/※[#「([^]]+?)」]/){@gaiji2[$1]}
    end
  end

  if l =~ /※[#[^]]*?※[#[^]]*?[12]\-\d{1,2}\-\d{1,2}[^]]*?][^]]*?]/
    l.gsub!(/※[#([^]]*?)※[#([^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?)]([^]]*?)]/){"※\\footnote{#$1"+@gaiji[$3]+"#$4}"}
  end
  if l =~ /※[#[^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?]/
    if @gaiji[$1]
      l.gsub!(/※[#([^]]*?([12]\-\d{1,2}\-\d{1,2})[^]]*?)]/){@gaiji[$2]}
    end
  end
  if l =~ /※[#濁点付き片仮名([ワヰヱヲ])、.*?]/
    l.gsub!(/※[#濁点付き片仮名([ワヰヱヲ])、.*?]/){ "\\ajLig{#{$1}゛}"}
  end
  if l =~ /※[#感嘆符三つ.*]/
    l.gsub!(/※[#感嘆符三つ.*?]/){ "\\rensuji{!!!}"}
  end

  if l =~ /※[#.*?([A-Za-z0-9_]+\.png).*?]/
    l.gsub!(/※[#([^]]+?)]/, "\\includegraphics{#{$1}}")
  end

  if l =~ /※[#[^]]+?]/
    l.gsub!(/※[#([^]]+?)]/, '※\\footnote{\1}')
  end

  if l =~ //
    STDERR.puts("Remaining Unprocessed Gaiji Character in Line #@line_num.")
    @log_text << normalize("未処理の外字が#{@line_num}行目にあります.\n")
  end
  return l
end

#translate_ruby(l) ⇒ Object

ルビの処理用



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/aozora4reader.rb', line 191

def translate_ruby(l)

  # 被ルビ文字列内に外字の注記があるばあい,ルビ文字列の後ろに移動する
  # ただし,順番が入れ替わってしまう
  while l =~ /※\\footnote\{[^(?:\\footnote)]+\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》/
    l.sub!(/(※)(\\footnote\{[^(?:\\footnote)]+\})((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》)/, '\1\3\2')
  end

  # 被ルビ文字列内に誤記などの注記が存在する場合は、ルビの後ろに移動する
  while l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?[#[^]]*?](?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》/
    l.sub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)([#[^]]*?])((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?《.+?》)/, '\1\3\2')
  end

  # ルビ文字列内に誤記などの注記が存在する場合は、ルビの後ろに移動する
  if l =~ /《[^》]*?[#[^]]*?][^》]*?》/
    l.gsub!(/(《[^》]*?)([#[^]]*?])([^》]*?》)/, '\1\3\2')
  end

  # 一連のルビの処理
  # 1 縦棒ありの場合
  if l =~ //
    l.gsub!(/|(.+?)《(.+?)》/, '\\ruby{\1}{\2}')
  end

  # 2 漢字および外字
  if l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?《.+?》/
    l.gsub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)《(.+?)》/, '\\ruby{\1}{\2}')
  end

  # 3 ひらがな
  if l =~ /[あ-ん](?:[ぁ-んーヽヾ]|\\CID\{12107\})*?《.+?》/
    l.gsub!(/([あ-ん](?:[ぁ-んーヽヾ]|\\CID\{12107\})*?)《(.+?)》/, '\\ruby{\1}{\2}')
  end

  # 4 カタカナ
  if l =~ /[ア-ヴ](?:[ァ-ヴーゝゞ]|\\CID\{12107\})*?《.+?》/
    l.gsub!(/([ア-ヴ](?:[ァ-ヴーゝゞ]|\\CID\{12107\})*?)《(.+?)》/, '\\ruby{\1}{\2}')
  end

  # 5 全角アルファベットなど
  if l =~ /[A-Za-zΑ-Ωα-ωА-Яа-я・]+?《.+?》/
    l.gsub!(/([A-Za-zΑ-Ωα-ωА-Яа-я・]+?)《(.+?)》/, '\\ruby{\1}{\2}')
  end

  # 6 半角英数字
  if l =~ /[A-Za-z0-9#_\-\;\&.\'\^\`\\\{\} ]+?《.+?》/
    l.gsub!(/([A-Za-z0-9#_\-\;\&.\'\^\`\\\{\} ]+?)《(.+?)》/, '\\ruby{\1}{\2}')
  end
  if l =~ /《.*?》/
    STDERR.puts("Unknown ruby pattern found in #@line_num.")
    @log_text << normalize("未処理のルビが#{@line_num}行目にあります.\n")
  end

  return l
end

#tuning_bou(l) ⇒ Object

傍点の調整



321
322
323
324
325
326
327
328
329
330
# File 'lib/aozora4reader.rb', line 321

def tuning_bou(l)
  # 傍点の中の「くの字点」を変換
  while l =~ /(\\[a-z]*?bou\{(?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)(\\ajD?Kunoji)\{\}((?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?)\}/
    l.gsub!(/((\\([a-z]*?)bou)\{(?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?)(\\ajD?Kunoji)\{\}((?:\w|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))*?)\}/, '\1}\4with\3Bou\2{\5}')
  end
  if l =~ /\\[a-z]*?bou\{\}/
    l.gsub!(/\\([a-z]*?)bou\{\}/, '{}')
  end
  return l
end

#tuning_ruby(l) ⇒ Object

ルビの調整



302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
# File 'lib/aozora4reader.rb', line 302

def tuning_ruby(l)

  # 1 直前が漢字の場合
  if l =~ /(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))\\ruby/
    l.gsub!(/((?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\})))\\ruby/, '\1\\Ruby')
  end

  # 2 直後が漢字の場合
  if l =~ /\\ruby\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\\{\}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))/
    l.gsub!(/\\ruby(\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\\{\}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\})))/, '\\Ruby\1')
  end

  # 3 ルビが連続する場合
  while l =~ /\\(?:ruby|RUBY|Ruby)\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\{}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\\ruby/
    l.sub!(/\\(?:ruby|RUBY|Ruby)(\{(?:#{KANJIPAT}|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\}\{(?:[^\\{}]|(?:\\UTF\{[0-9a-fA-F]+\}|\\CID\{[0-9]+\}))+?\})\\ruby/, '\\RUBY\1\\RUBY')
  end
end