Class: ConverterBase

Inherits:

Object

Object
ConverterBase

Defined in:: lib/converterbase.rb

Constant Summary collapse

KANJI_NUM =

"〇一二三四五六七八九"

ENGLISH_SENTENCES_CHARACTERS =

/[\w.,!?'" &:;_-]+/

ENGLISH_SENTENCES_MIN_LENGTH = この文字数以上アルファベットが続くと半角のまま

KANJI_NUM_UNITS =

%w(万 億 兆 京).unshift("")

KANJI_KURAI =

%w(十 百 千).unshift("")

KANJI_NUM_UNITS_DIGIT =

{
  "十" => 1, "百" => 2, "千" => 3, "万" => 4, "億" => 8, "兆" => 12, "京" => 16
}

RECONVERT_KANJI_TO_NUM_PATTERN_UNIT =

"％㎜㎝㎞㎎㎏㏄㎡㎥"

ROME_NUM_ALPHABET =

%w(II III IV VI VII VIII IX ii iii iv vi vii viii ix)

ROME_NUM =

%w(Ⅱ Ⅲ Ⅳ Ⅵ Ⅶ Ⅷ Ⅸ ⅱ ⅲ ⅳ ⅵ ⅶ ⅷ ⅸ)

SINGLE_MINUTE_FAMILY = ミュート（ノノカギ）化する記号定義

%!‘’'!

DOUBLE_MINUTE_FAMILY =

%!“”〝〟"!

HALF_INDENT_TARGET =

/^[ 　\t]*((?:[〔「『(（【〈《≪〝])|(?:※［＃始め二重山括弧］))/

FULL_INDENT_TARGET =

/^[ 　\t]*(――)/

AUTO_INDENT_IGNORE_INDENT_CHAR =

Inspector::IGNORE_INDENT_CHAR.sub("・", "")

AUTHOR_INTRODUCTION_SPLITTER = 前書き・後書きの検出及び処理 ==============================

/^　*[\*＊]{44}$/

AUTHOR_POSTSCRIPT_SPLITTER =

/^　*[\*＊]{48}$/

AUTHOR_COMMENT_CHUKI =

{
  introduction: {
    open: "［＃ここから前書き］", close: "［＃ここで前書き終わり］"
  },
  postscript: {
    open: "［＃ここから後書き］", close: "［＃ここで後書き終わり］"
  }
}

BRACKETS =

[%w(「 」), %w(『 』)

OPENCLOSE_REGEXPS = ネストに対応したかぎ括弧の正規表現

BRACKETS.map { |bracket|
  bo, bc = bracket
  /(?<oc>#{bo}[^#{bo+bc}]*(?:\g<oc>[^#{bo+bc}]*)*#{bc})/m
}

CHARACTER_OF_RUBY =

"一-龠Ａ-Ｚａ-ｚA-Za-z"

Instance Attribute Summary collapse

#use_dakuten_font ⇒ Object readonly

Returns the value of attribute use_dakuten_font.

Class Method Summary collapse

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Summary collapse

#__calc_kanji_num_with_unit(string) ⇒ Object
#__calc_sum_unit(units) ⇒ Object
#after(io, text_type) ⇒ Object
#after_convert(io) ⇒ Object
#alphabet_to_zenkaku(data, force = false) ⇒ Object

半角アルファベットを全角に変換する.
#author_comment_force_close ⇒ Object
#auto_indent(data) ⇒ Object

行頭字下げ.
#auto_join_in_brackets(data) ⇒ Object

かぎ括弧内自動連結.
#auto_join_line(data) ⇒ Object

手動折り返しの自動連結.
#before(io, text_type) ⇒ Object
#before_convert(io) ⇒ Object
#blank_line?(line) ⇒ Boolean
#border_symbol?(line) ⇒ Boolean
#calc_cr_count(str) ⇒ Object
#comments_block?(line) ⇒ Boolean

コメントブロックを検出する.
#convert(text, text_type) ⇒ Object
#convert_arrow(data) ⇒ Object

おかしくなりやすい矢印文字の変換.
#convert_dakuten_char_to_font(data) ⇒ Object

濁点のついてない文字に濁点をつける表現を対応.
#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

ギュメを二重山括弧（の外字）に変換.
#convert_for_all_data(data) ⇒ Object

小説データ全体に対して施す変換.
#convert_fraction_and_date(data) ⇒ Object

分数表記を○分の○表記に変更、及び日付表記を検出.
#convert_horizontal_ellipsis(data) ⇒ Object

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換.
#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

漢数字を単位を使った表現に変換.
#convert_main(io) ⇒ Object

変換処理本体.
#convert_novel_rule(data) ⇒ Object

小説のルールに沿うように変換.
#convert_numbers(data) ⇒ Object

数字の変換.
#convert_page_break(data) ⇒ Object

一定以上の連続する空行を改ページに変換.
#convert_rome_numeric(data) ⇒ Object

ローマ数字っぽいアルファベットをローマ数字に変換.
#convert_special_characters(data) ⇒ Object

特定の表現・記号を変換していく.
#convert_tatechuyoko(data) ⇒ Object

縦中横にすべき表現を変換.
#delay_outputs(data = "") ⇒ Object
#delete_dust_char(data) ⇒ Object

表示上化けてしまうゴミ削除.
#enchant_midashi(data) ⇒ Object

［＃改ページ］直後の行を見出しに設定する.
#erase_comments_block(data) ⇒ Object

コメントブロックを削除する.
#erase_introduction(data) ⇒ Object

前書きを削除する.
#erase_postscript(data) ⇒ Object

後書きを削除する.
#exception_reconvert_kanji_to_num(data) ⇒ Object

アラビア数字を使うべきところはアラビア数字に戻す.
#find_introduction? ⇒ Boolean

前書きの検出.
#force_indent_special_chapter(data) ⇒ Object

章見出しっぽい文字列を字下げする.
#half_indent_bracket(data) ⇒ Object

行頭かぎ括弧(等)に二分アキを追加する.
#hankaku_num_to_zenkaku_num(data) ⇒ Object

半角アラビア数字の全角化.
#hankakukana_to_zenkakukana(data) ⇒ Object

半角カナと｢｣｡､･等を全角に変換.
#inclusion_author_comment_block?(line) ⇒ Boolean
#initialize(setting, inspector, illustration) ⇒ ConverterBase constructor

A new instance of ConverterBase.
#initialize_member_values ⇒ Object
#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object
#insert_blank_line_to_border_symbol(line) ⇒ Object

■などの区切りの前後には空行が必ず存在するようにする.
#insert_separate_space(data) ⇒ Object

特定の記号の直後は全角アキを挿入する.
#is_sesame?(str, ten, last_char) ⇒ Boolean
#jisage(line, num) ⇒ Object

行頭空白を考慮した字下げ.
#join_inner_bracket(str) ⇒ Object

改行を連結した文章を作る.
#kanji_num_to_integer(string) ⇒ Object
#leave_author_comment_block?(line) ⇒ Boolean
#midashi(str) ⇒ Object
#modify_kana_ni_to_kanji_ni(data) ⇒ Object

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する.
#narou_ruby(data) ⇒ Object

小説家になろうのルビ対策.
#num_to_kanji(data) ⇒ Object

アラビア数字を漢数字に.
#object_of_ruby?(char) ⇒ Boolean
#outputs(data = "", force = false) ⇒ Object
#page_break?(line) ⇒ Boolean

改ページある？.
#process_author_comment(line) ⇒ Object
#rebuild_english_sentences(data) ⇒ Object

英文を再構成する.
#rebuild_force_indent_special_chapter(data) ⇒ Object
#rebuild_hankaku_num_and_comma(data) ⇒ Object
#rebuild_illust(data) ⇒ Object
#rebuild_kanji_num(data) ⇒ Object
#rebuild_kome_to_gaiji(data) ⇒ Object

※の外字注記化.
#rebuild_url(data) ⇒ Object
#replace_by_replace_txt(text) ⇒ Object

replace.txt により単純置換.
#replace_illust_tag(data) ⇒ Object

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する.
#replace_narou_tag(data) ⇒ Object

小説家になろう専用タグを置換.
#replace_tatesen(str) ⇒ Object
#replace_url(data) ⇒ Object

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない.
#rstrip_all_lines(data) ⇒ Object

すべての行の行末空白を削除.
#ruby_youon_to_big(ruby) ⇒ Object

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする.
#sesame(str) ⇒ Object
#stash_hankaku_num_and_comma(num) ⇒ Object
#stash_kanji_num(data) ⇒ Object
#stash_kome(data) ⇒ Object

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする.
#symbols_to_zenkaku(data) ⇒ Object

半角記号を全角に変換.
#tcy(str) ⇒ Object

縦中横注記取得.
#to_ruby(match, m1, m2, openclose_symbols) ⇒ Object
#zenkaku_num_to_hankaku_num(num) ⇒ Object

全角数字(漢数字含む)を半角アラビア数字に.
#zenkaku_num_to_kanji(str) ⇒ Object

全角アラビア数字を漢数字に.
#zenkaku_rstrip(line) ⇒ Object

全角版 String#rstrip!.

Constructor Details

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

Returns a new instance of ConverterBase.

# File 'lib/converterbase.rb', line 34

def initialize(setting, inspector, illustration)
  @setting = setting
  @inspector = inspector
  @illustration = illustration
  @use_dakuten_font = false
  initialize_member_values
end

Instance Attribute Details

#use_dakuten_font ⇒ `Object` (readonly)

Returns the value of attribute use_dakuten_font.



18
19
20

# File 'lib/converterbase.rb', line 18

def use_dakuten_font
  @use_dakuten_font
end

Class Method Details

.rebuild_brackets(data, stack) ⇒ `Object`

# File 'lib/converterbase.rb', line 819

def self.rebuild_brackets(data, stack)
  data.gsub(/［＃かぎ括弧＝(\d+)］/) do
    stack[$1.to_i]
  end
end

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 161

def __calc_kanji_num_with_unit(string)
  total = 0
  string.scan(/([#{KANJI_NUM}]*)([十百千]*)/) do |num, units|
    break if num + units == ""
    num = "1" if num.empty?
    num_tr = num.tr(KANJI_NUM, "0-9")
    if units.empty?
      total += num_tr.to_i
    else
      total += (num_tr + __calc_sum_unit(units).to_s[1, 99]).to_i
    end
  end
  total
end

#__calc_sum_unit(units) ⇒ `Object`

# File 'lib/converterbase.rb', line 155

def __calc_sum_unit(units)
  units.each_char.inject(0) do |sum, c|
    sum + ("1" + "0" * KANJI_NUM_UNITS_DIGIT[c]).to_i
  end
end

#after(io, text_type) ⇒ `Object`



30
31
32

# File 'lib/converterbase.rb', line 30

def after(io, text_type)
  io
end

#after_convert(io) ⇒ `Object`



1103
1104
1105

# File 'lib/converterbase.rb', line 1103

def after_convert(io)
  after(io, @text_type)
end

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

半角アルファベットを全角に変換する

force : 強制的に全アルファベットを全角にするか？

false の場合、英文章（半角スペースで区切られた2単語以上）を半角のままにする
英文の定義： 1. 半角スペースで区切られた２単語以上の文章、
             2. 一定以上の長さの一文字以上アルファベットを含む文章

# File 'lib/converterbase.rb', line 478

def alphabet_to_zenkaku(data, force = false)
  if force
    data.gsub!(/[a-zA-Z]+/) do |match|
      match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
    end
  else
    data.gsub!(ENGLISH_SENTENCES_CHARACTERS) do |match|
      if match.split(" ").size >= 2 \
         || (match.length >= ENGLISH_SENTENCES_MIN_LENGTH && match.match(/[a-z]/i))
        @english_sentences << match
        "［＃英文＝#{@english_sentences.size - 1}］"
      else
        match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
      end
    end
  end
end

#author_comment_force_close ⇒ `Object`

# File 'lib/converterbase.rb', line 761

def author_comment_force_close
  if @in_author_comment_block
    outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
  end
end

#auto_indent(data) ⇒ `Object`

行頭字下げ

必ず下げなければいけないところは強制的に字下げ他の部分は全体的に判断して字下げ

# File 'lib/converterbase.rb', line 576

def auto_indent(data)
  data.gsub!(FULL_INDENT_TARGET, "　\\1")
  if @setting.enable_auto_indent && @inspector.inspect_indent(data)
    data.gsub!(/^([^#{AUTO_INDENT_IGNORE_INDENT_CHAR}])/) do
      # 行頭に三点リーダーの代わりに連続中黒（・・・）が来た場合の対策
      # https://github.com/whiteleaf7/narou/issues/35
      # 行頭に中黒１個だけの場合はよくある表現なので字下げしない
      if $1 == "・" && $'[0] != "・"
        "・"
      else
        $1 == " " || $1 == "　" ? "　" : "　#{$1}"
      end
    end
  end
end

#auto_join_in_brackets(data) ⇒ `Object`

かぎ括弧内自動連結

# File 'lib/converterbase.rb', line 795

def auto_join_in_brackets(data)
  if !@setting.enable_auto_join_in_brackets && !@setting.enable_inspect_invalid_openclose_brackets
    return
  end
  OPENCLOSE_REGEXPS.each_with_index do |openclose, i|
    stack = {}
    data.gsub!(openclose).with_index do |match, j|
      joined_str = join_inner_bracket(match)
      if @setting.enable_auto_join_in_brackets && joined_str
        error = @inspector.validate_joined_inner_brackets(match, joined_str, BRACKETS[i])
        stack[j] = error ? match : joined_str
      else
        stack[j] = match
      end
      "［＃かぎ括弧＝#{j}］"
    end
    if @setting.enable_inspect_invalid_openclose_brackets
      # 正しく閉じてないかぎ括弧だけが data に残ってる
      @inspector.inspect_invalid_openclose_brackets(data, BRACKETS[i], stack)
    end
    data.replace(ConverterBase.rebuild_brackets(data, stack))
  end
end

#auto_join_line(data) ⇒ `Object`

手動折り返しの自動連結

# File 'lib/converterbase.rb', line 828

def auto_join_line(data)
  # 次の行の冒頭が開き記号だったら意図的な改行だと判断して連結しない
  data.gsub!(/([^、])、\n　([^「『(（【<＜〈《≪…‥―])/, "\\1、\\2")
end

#before(io, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 20

def before(io, text_type)
  data = io.string
  convert_page_break(data) if @text_type == "body" || @text_type == "textfile"
  if @text_type != "story" && @setting.enable_pack_blank_line
    data.gsub!("\n\n", "\n")
    data.gsub!(/(^\n){3}/m, "\n\n")   # 改行のみの行３つを２つに削減
  end
  io
end

#before_convert(io) ⇒ `Object`



1099
1100
1101

# File 'lib/converterbase.rb', line 1099

def before_convert(io)
  before(io, @text_type)
end

#blank_line?(line) ⇒ `Boolean`

Returns:

(Boolean)



646
647
648

# File 'lib/converterbase.rb', line 646

def blank_line?(line)
  line =~ /\A[ 　\t]*$/
end

#border_symbol?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 639

def border_symbol?(line)
  @@symbols ||= open(File.join(Narou.get_preset_dir, "bordersymbols.txt"), "r:BOM|UTF-8") { |fp|
    fp.read.strip
  }
  line =~ /^[ 　\t]*[#{@@symbols}]+$/
end

#calc_cr_count(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 997

def calc_cr_count(str)
  head_cr_count = str.index(/[^\n]/)
  head_cr_count > 2 ? 2 : head_cr_count
end

#comments_block?(line) ⇒ `Boolean`

コメントブロックを検出する

コメントブロックの定義は - のみが50回以上連続された行に囲まれている間

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 510

def comments_block?(line)
  if line =~ /^-{50,}$/
    @in_comment_block ^= 1
    return true
  end
  @in_comment_block
end

#convert(text, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 1107

def convert(text, text_type)
  return "" if text == ""
  @text_type = text_type
  io = StringIO.new(rstrip_all_lines(text))
  (io = before_convert(io)).rewind
  (io = convert_main(io)).rewind
  (io = after_convert(io)).rewind
  return io.read
end

#convert_arrow(data) ⇒ `Object`

おかしくなりやすい矢印文字の変換

# File 'lib/converterbase.rb', line 401

def convert_arrow(data)
  @@device ||= Narou.get_device
  # Kindle PW でしか確認してないのでとりあえず device=kindle の場合のみ変換
  if @@device && @@device.kindle?
    data.tr!("⇒⇐", "→←")
  end
end

#convert_dakuten_char_to_font(data) ⇒ `Object`

濁点のついてない文字に濁点をつける表現を対応

濁点つきフォントに部分的に切り替える

# File 'lib/converterbase.rb', line 438

def convert_dakuten_char_to_font(data)
  data.gsub!(/(.)[゛ﾞ]/) do
    m1 = $1
    if m1 =~ /[ぁ-んァ-ヶι]/ && @setting.enable_dakuten_font
      @use_dakuten_font = true
      "［＃濁点］#{m1}［＃濁点終わり］"
    else
      tcy(m1 + "゛")
    end
  end
end

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

ギュメを二重山括弧（の外字）に変換

# File 'lib/converterbase.rb', line 419

def convert_double_angle_quotation_to_gaiji(data)
  data.gsub!("≪", "※［＃始め二重山括弧］")
  data.gsub!("≫", "※［＃終わり二重山括弧］")
end

#convert_for_all_data(data) ⇒ `Object`

小説データ全体に対して施す変換

# File 'lib/converterbase.rb', line 1074

def convert_for_all_data(data)
  hankakukana_to_zenkakukana(data)
  auto_join_in_brackets(data)
  auto_join_line(data) if @setting.enable_auto_join_line
  erase_comments_block(data)
  replace_illust_tag(data)
  replace_url(data)
  replace_narou_tag(data)
  convert_rome_numeric(data)
  alphabet_to_zenkaku(data, @setting.enable_alphabet_force_zenkaku)
  force_indent_special_chapter(data)
  convert_numbers(data)
  exception_reconvert_kanji_to_num(data)
  if @setting.enable_convert_num_to_kanji && @text_type != "subtitle" && @text_type != "chapter" \
     && @setting.enable_kanji_num_with_units
    convert_kanji_num_with_unit(data, @setting.kanji_num_with_units_lower_digit_zero)
  end
  rebuild_kanji_num(data)
  insert_separate_space(data)
  convert_special_characters(data)
  convert_fraction_and_date(data)
  modify_kana_ni_to_kanji_ni(data)
  convert_dakuten_char_to_font(data)
end

#convert_fraction_and_date(data) ⇒ `Object`

分数表記を○分の○表記に変更、及び日付表記を検出

スラッシュで区切られた数字が２個なら分数、３個なら日付と定義

# File 'lib/converterbase.rb', line 243

def convert_fraction_and_date(data)
  if !@setting.enable_transform_fraction && !@setting.enable_transform_date
    return
  end
  target_num = "\d０-９#{KANJI_NUM}十百千万億兆京垓"
  data.gsub!(/[#{target_num}\/／]+/) do |match|
    numerics = match.split(/[\/／]/)
    case numerics.size
    when 2
      # 分数
      if @setting.enable_transform_fraction
        "#{zenkaku_num_to_kanji(numerics[1])}分の#{zenkaku_num_to_kanji(numerics[0])}"
      else
        match
      end
    when 3
      # 日付
      if @setting.enable_transform_date
        begin
          date = Date.new(*numerics.map { |s|
            s.tr!("0-9０-９#{KANJI_NUM}", "0-90-90-9")
            s.to_i
          })
        rescue ArgumentError
          match
        else
          convert_numbers(date.strftime(@setting.date_format))
        end
      end
    else
      match
    end
  end
end

#convert_horizontal_ellipsis(data) ⇒ `Object`

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換

# File 'lib/converterbase.rb', line 963

def convert_horizontal_ellipsis(data)
  return if !@setting.enable_convert_horizontal_ellipsis || \
            @text_type == "subtitle" || @text_type == "chapter"
  %w(・ 。 、 ．).each do |char|
    data.gsub!(/#{char}{3,}/) do |match|
      pre_char, post_char = $`[-1], $'[0]
      if pre_char == "―" || post_char == "―"
        match
      else
        "…" * ((match.length / 3.0 / 2).ceil * 2)
      end
    end
  end
  data.gsub!("。。", "。")
  data.gsub!("、、", "、")
end

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

漢数字を単位を使った表現に変換

８００万１０００といったような表現は、内部一度で 8001000 に変換する。 lower_digit_zero はこの最後の 000 に適用される

# File 'lib/converterbase.rb', line 190

def convert_kanji_num_with_unit(data, lower_digit_zero = 0)
  data.gsub!(/([#{KANJI_NUM}十百千万億兆京]+)/) do |match|
    total = kanji_num_to_integer($1)
    next match if total.to_s.length > KANJI_NUM_UNITS_DIGIT["京"] + 4
    m1 = total.to_s.tr("0-9", KANJI_NUM)
    if m1 =~ /〇{#{lower_digit_zero},}$/
      digits = m1.reverse.scan(/.{1,4}/).map(&:reverse).reverse   # 下の桁から4桁ずつ区切った配列を作成
      keta = digits.size - 1
      digits.map.with_index { |nums, keta_i|
        four_digit_num = nums.scan(/./).map.with_index { |d, di|
          next "" if d == "〇"
          kurai = KANJI_KURAI[nums.length - di - 1]
          if d == "一"
            # 4桁の千の前は一は必須ではなく、5桁以上の場合の千の前には一をつける
            # 1100 → 千百、11100 → 一万一千百
            if kurai != "" && !(keta > 0 && kurai == "千")
              d = ""
            end
          end
          d + kurai
        }.join
        if four_digit_num.length > 0
          four_digit_num + KANJI_NUM_UNITS[keta - keta_i]
        else
          ""
        end
      }.join
    else
      match
    end
  end
end

#convert_main(io) ⇒ `Object`

変換処理本体

@text_type: 渡されるテキストの種類。

subtitle, introduction, body, postscript, textfile, chapter, story

# File 'lib/converterbase.rb', line 1123

def convert_main(io)
  @write_fp = StringIO.new
  case @text_type
  when "introduction"
    return @write_fp if @setting.enable_erase_introduction
  when "postscript"
    return @write_fp if @setting.enable_erase_postscript
  end
  title_and_author = nil
  if @text_type == "textfile"
    # タイトル・著者名スキップ
    title_and_author = io.gets + io.gets
    data = io.read
  else
    data = io.read
  end
  initialize_member_values
  convert_for_all_data(data)
  if @text_type == "textfile"
    # convert_for_all_data -> replace_narou_tag
    # で改行化を行わないと正確な改行数は分からない
    progressbar = ProgressBar.new(data.count("\n") + 1)
    progressbar.output(0)
  end
  @read_fp = StringIO.new(data)
  if @text_type == "subtitle"
    @write_fp.write(data)
  else
    @read_fp.each_with_index do |line, i|
      progressbar.output(i) if @text_type == "textfile"
      @request_skip_output_line = false
      zenkaku_rstrip(line)
      if @request_insert_blank_next_line
        outputs unless blank_line?(line)
        @request_insert_blank_next_line = false
        @before_line = ""
      end
      process_author_comment(line) if @text_type == "textfile"
      insert_blank_before_line_and_behind_to_special_chapter(line)
      insert_blank_line_to_border_symbol(line)

      outputs(line)
      unless @delay_outputs_buffer.empty?
        @write_fp.write(@delay_outputs_buffer)
        @before_line = @delay_outputs_buffer
        @delay_outputs_buffer = ""
      else
        @before_line = line
      end
    end
    author_comment_force_close if @text_type == "textfile"
  end

  @write_fp.rewind
  data = @write_fp.string
  if @text_type == "textfile"
    if @setting.enable_author_comments
      erase_introduction(data) if @setting.enable_erase_introduction
      erase_postscript(data) if @setting.enable_erase_postscript
    end
    if @setting.enable_enchant_midashi
      enchant_midashi(data)
    end
  end
  rebuild_illust(data)
  rebuild_url(data)
  rebuild_english_sentences(data)
  rebuild_hankaku_num_and_comma(data)
  rebuild_kome_to_gaiji(data)
  rebuild_force_indent_special_chapter(data)
  if @text_type == "body" || @text_type == "textfile"
    half_indent_bracket(data)
    auto_indent(data)
  end
  # 再構築された文章にルビがふられる可能性を考慮して、
  # この位置でルビの処理を行う
  narou_ruby(data) if @setting.enable_ruby
  # 三点リーダーの変換は、ルビで圏点として・・・を使っている場合を考慮して、ルビ処理後にする
  convert_horizontal_ellipsis(data)
  # ルビ化されなくて残ったギュメを二重山括弧（の外字）に変換
  convert_double_angle_quotation_to_gaiji(data)
  delete_dust_char(data)
  if title_and_author
    puts title_and_author
    data.replace(title_and_author + data)
  end
  data.rstrip!
  progressbar.clear if @text_type == "textfile"
  @write_fp
end

#convert_novel_rule(data) ⇒ `Object`

小説のルールに沿うように変換

# File 'lib/converterbase.rb', line 453

def convert_novel_rule(data)
  # 括弧の閉じの直前の句点を消す
  data.gsub!(/。([」』）])/, "\\1")
  # 原則偶数個を１セットで使うべき文字を偶数個に補正
  # MEMO:（―も偶数個セットにするべきだが、記号的な意味で使われる場合もあるので無視）
  %w(… ‥).each do |target|
    data.gsub!(/#{target}+/) do |match|
      len = match.length
      len += 1 if len.odd?
      target * len
    end
  end
  # たまに見かける誤字対策
  data.gsub!(/。　/, "。")
  data.gsub!(/([？！])。/, "\\1")
end

#convert_numbers(data) ⇒ `Object`

数字の変換

# File 'lib/converterbase.rb', line 79

def convert_numbers(data)
  # 小数点を・に
  data.gsub!(/([\d０-９#{KANJI_NUM}]+?)[\.．]([\d０-９#{KANJI_NUM}]+?)/, "\\1・\\2")
  if @setting.enable_convert_num_to_kanji &&
     @text_type != "subtitle" && @text_type != "chapter" && @text_type != "story"
    num_to_kanji(data)
  else
    hankaku_num_to_zenkaku_num(data)
  end
  data
end

#convert_page_break(data) ⇒ `Object`

一定以上の連続する空行を改ページに変換

# File 'lib/converterbase.rb', line 1055

def convert_page_break(data)
  if @setting.enable_convert_page_break
    threshold = @setting.to_page_break_threshold
    # `改ページ' を使うと見出し付与等で混乱するので自動生成したものは区別する
    data.gsub!(/(^\n){#{threshold},}/, "［＃改頁］\n")
  end
end

#convert_rome_numeric(data) ⇒ `Object`

ローマ数字っぽいアルファベットをローマ数字に変換

※alphabet_to_zenkaku の前に実行する必要あり

# File 'lib/converterbase.rb', line 309

def convert_rome_numeric(data)
  ROME_NUM_ALPHABET.each_with_index do |rome, i|
    data.gsub!(/([^a-zA-Z])#{rome}([^a-zA-Z])/, "\\1#{ROME_NUM[i]}\\2")
  end
end

#convert_special_characters(data) ⇒ `Object`

特定の表現・記号を変換していく

# File 'lib/converterbase.rb', line 318

def convert_special_characters(data)
  stash_kome(data)
  convert_double_angle_quotation_to_gaiji(data)   # 最初からギュメなのはルビ対象外なので外字注記に
  symbols_to_zenkaku(data)
  convert_tatechuyoko(data)
  convert_novel_rule(data)
  convert_arrow(data)
end

#convert_tatechuyoko(data) ⇒ `Object`

縦中横にすべき表現を変換

# File 'lib/converterbase.rb', line 361

def convert_tatechuyoko(data)
  # 感嘆符及び疑問符の縦中横化
  # AozoraEPUB3の縦中横設定を使えば明示的に注記を使う必要はないが、
  # 見出しの中では自動で縦中横にはならないため、明示的指定をしておく
  # 事前に !? は全角にしておく
  data.gsub!(/！+/) do |match|
    if "#{$`[-1]}#{$'[0]}".include?("？")
      next match
    end
    len = match.length
    if len == 3
      tcy("!!!")
    elsif len >= 4
      # 4個以上なら偶数になるように調整（奇数だった場合増やす方向（+1））して2個ずつ縦中横
      len += 1 if len.odd?
      tcy("!!") * (len / 2)
    else
      match
    end
  end
  data.gsub!(/[！？]+/) do |match|
    case match.length
    when 2
      tcy(match.tr("！？", "!?"))
    when 3
      # 見た目的にこのパターンだけ縦中横化を許容する
      if %w(！！？ ？！！).find { |v| v == match }
        tcy(match.tr("！？", "!?"))
      else
        match
      end
    else
      match
    end
  end
end

#delay_outputs(data = "") ⇒ `Object`

# File 'lib/converterbase.rb', line 63

def delay_outputs(data = "")
  unless @request_skip_output_line
    @delay_outputs_buffer << data + "\n"
  end
end

#delete_dust_char(data) ⇒ `Object`

表示上化けてしまうゴミ削除

# File 'lib/converterbase.rb', line 1066

def delete_dust_char(data)
  data.gsub!("︎", "")
  data.gsub!("︎", "")
end

#enchant_midashi(data) ⇒ `Object`

［＃改ページ］直後の行を見出しに設定する

# File 'lib/converterbase.rb', line 990

def enchant_midashi(data)
  def midashi(str)
    midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
    @inspector.subtitle = midashi_title
    "［＃３字下げ］［＃ここから中見出し］#{midashi_title}［＃ここで中見出し終わり］"
  end

  def calc_cr_count(str)
    head_cr_count = str.index(/[^\n]/)
    head_cr_count > 2 ? 2 : head_cr_count
  end

  # 実際に見出しを付与する
  data.gsub!(/［＃改ページ］\n(.+?)\n/) do |match|
    m1 = $1
    rest = $'
    # 前書きがある場合は今回は保留して、次の処理で見出しを付与する
    if $1 =~ /#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}/
      match
    else
      # 見出しの次の行が空行ではない場合空行を追加する
      add_tail = "\n" * (2 - calc_cr_count(rest))
      # 見出しと本文の間には空行を２行挟む
      "［＃改ページ］\n#{midashi(m1)}\n#{add_tail}"
    end
  end
  # 前書きがある場合は、前書き→見出しの順番を見出し→前書きに入れ替えて置換
  data.gsub!(/(［＃改ページ］\n)(#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}\n)(.+?\n)/m) do
    m1, m2, m3 = $1, $2, $3
    add_tail = $' =~ /\A$/ ? "" : "\n"
    "#{m1 + midashi(m3) + m2}#{add_tail}"
  end
end

#erase_comments_block(data) ⇒ `Object`

コメントブロックを削除する



521
522
523

# File 'lib/converterbase.rb', line 521

def erase_comments_block(data)
  data.gsub!(/^-{50,}\n.*^-{50,}\n/m, "")
end

#erase_introduction(data) ⇒ `Object`

前書きを削除する

# File 'lib/converterbase.rb', line 1027

def erase_introduction(data)
  del_count = 0
  data.gsub!(/(［＃改ページ］)\n#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("前書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#erase_postscript(data) ⇒ `Object`

後書きを削除する

# File 'lib/converterbase.rb', line 1041

def erase_postscript(data)
  del_count = 0
  data.gsub!(/#{AUTHOR_COMMENT_CHUKI[:postscript][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:postscript][:close]}\n(［＃改ページ］|\z)/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("後書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

アラビア数字を使うべきところはアラビア数字に戻す

# File 'lib/converterbase.rb', line 228

def exception_reconvert_kanji_to_num(data)
  return unless @setting.enable_convert_num_to_kanji
  data.gsub!(/([Ａ-Ｚａ-ｚ])([#{KANJI_NUM}・～]+)/) do   # ｖｅｒ１・０１ のようなパターンも許容する
    $1 + $2.tr(KANJI_NUM, "０-９")
  end
  data.gsub!(/([#{KANJI_NUM}・～]+)([Ａ-Ｚａ-ｚ#{RECONVERT_KANJI_TO_NUM_PATTERN_UNIT}])/) do
    $1.tr(KANJI_NUM, "０-９") + $2
  end
end

#find_introduction? ⇒ `Boolean`

前書きの検出

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 718

def find_introduction?
  pos = @read_fp.pos
  result = false
  @read_fp.each do |line|
    break if page_break?(line)
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
      break
    end
  end
  @read_fp.pos = pos
  result
end

#force_indent_special_chapter(data) ⇒ `Object`

章見出しっぽい文字列を字下げする

# File 'lib/converterbase.rb', line 595

def force_indent_special_chapter(data)
  return unless @text_type == "body" || @text_type == "textfile"
  @@count_of_rebuild_container ||= 0
  data.gsub!(/^[ 　\t]*([－―<＜〈-]*)([0-9０-９#{KANJI_NUM}]{1,3})([－―>＞〉-]*)$/) do
    top, chapter, bottom = $1, $2, $3
    if top != "" && "―－-".include?(top)   # include?は空文字("")だとtrueなのでチェック必須
      top = "― "
      bottom = " ―"
    end
    str = "　　　［＃ゴシック体］#{top}"
    str += hankaku_num_to_zenkaku_num(chapter.tr("０-９", "0-9"))
    str += "#{bottom}［＃ゴシック体終わり］"
    # 前後に空行を入れたいが、それは行処理ループ中に行う
    symbols_to_zenkaku(str)
    index = @@count_of_rebuild_container += 1
    @force_indent_special_chapter_list[convert_numbers(index.to_s)] = str
    "［＃章見出しっぽい文＝#{index}］"
  end
end

#half_indent_bracket(data) ⇒ `Object`

行頭かぎ括弧(等)に二分アキを追加する

「や（などの前にカスタム注記（［＃二分アキ］）を追加し、半文字分字下げする(二分アキ)。 kindle paperwhite で鍵括弧のインデントがおかしいことへの対応

# File 'lib/converterbase.rb', line 560

def half_indent_bracket(data)
  data.gsub!(HALF_INDENT_TARGET) do
    if @setting.enable_half_indent_bracket
      "［＃二分アキ］#{$1}"
    else
      $1
    end
  end
end

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

半角アラビア数字の全角化

1桁、3桁以上：全角化 2桁：縦中横化

# File 'lib/converterbase.rb', line 538

def hankaku_num_to_zenkaku_num(data)
  data.gsub!(/\d+/) do |num|
    if num.length == 2
      tcy(num)
    elsif num.length == 3 && @text_type == "subtitle" && $`.empty?
      tcy(num)
    else
      num.tr("0-9", "０-９")
    end
  end
  data
end

#hankakukana_to_zenkakukana(data) ⇒ `Object`

半角カナと｢｣｡､･等を全角に変換



330
331
332

# File 'lib/converterbase.rb', line 330

def hankakukana_to_zenkakukana(data)
  data.replace(NKF.nkf("-wWX", data).tr("\u2014", "―"))
end

#inclusion_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 732

def inclusion_author_comment_block?(line)
  result = false
  if page_break?(line)
    if find_introduction?
      @in_author_comment_block = :introduction
      result = true
    end
  elsif line =~ AUTHOR_POSTSCRIPT_SPLITTER
    @in_author_comment_block = :postscript
    result = true
  end
  result
end

#initialize_member_values ⇒ `Object`

# File 'lib/converterbase.rb', line 42

def initialize_member_values
  @request_insert_blank_next_line = false
  @request_skip_output_line = false
  @before_line = ""
  @delay_outputs_buffer = ""
  @in_comment_block = false
  @english_sentences = []
  @url_list = []
  @illust_chuki_list = []
  @kanji_num_list = {}
  @num_and_comma_list = {}
  @force_indent_special_chapter_list = {}
  @in_author_comment_block = nil
end

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 621

def insert_blank_before_line_and_behind_to_special_chapter(line)
  result = ""
  if line =~ /［＃章見出しっぽい文＝/
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
  end
  line.sub!(/\A/, result)
end

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

■などの区切りの前後には空行が必ず存在するようにする

# File 'lib/converterbase.rb', line 653

def insert_blank_line_to_border_symbol(line)
  result = ""
  if border_symbol?(line)
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
    jisage(line, 4)
  end
  line.sub!(/\A/, result)
end

#insert_separate_space(data) ⇒ `Object`

特定の記号の直後は全角アキを挿入する

# File 'lib/converterbase.rb', line 281

def insert_separate_space(data)
  data.gsub!(/([!?！？]+)([^!?！？])/) do
    m1, m2 = $1, $2
    m2 = "　" if m2 == " "
    if m2 =~ /[^」］\]』】〉》〕＞>≫)）"”’〟　☆★♪［―]/
      "#{m1}　#{m2}"
    else
      "#{m1}#{m2}"
    end
  end
end

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

Returns:

(Boolean)



857
858
859

# File 'lib/converterbase.rb', line 857

def is_sesame?(str, ten, last_char)
  ten =~ /^[・、]+$/ && (str.include?("｜") || object_of_ruby?(last_char))
end

#jisage(line, num) ⇒ `Object`

行頭空白を考慮した字下げ



635
636
637

# File 'lib/converterbase.rb', line 635

def jisage(line, num)
  line.sub!(/^[ 　\t]*/, "　" * num)
end

#join_inner_bracket(str) ⇒ `Object`

改行を連結した文章を作る

改行がひとつもなかった場合は nil を返す

# File 'lib/converterbase.rb', line 782

def join_inner_bracket(str)
  joined_str = str.dup
  return nil if str.count("\n") == 0
  joined_str.gsub!(/([…―])\n/, "\\1。\n")
  joined_str = joined_str.split("\n").map { |s|
    s.sub(/^　+/, "")
  }.join
  joined_str
end

#kanji_num_to_integer(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 176

def kanji_num_to_integer(string)
  total = 0
  string.scan(/([#{KANJI_NUM}十百千]+)([万億兆京]*)/) do |num, units|
    total += (__calc_kanji_num_with_unit(num).to_s + units.each_char.map { |c| "0" * KANJI_NUM_UNITS_DIGIT[c] }.join).to_i
  end
  total
end

#leave_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 746

def leave_author_comment_block?(line)
  result = false
  case @in_author_comment_block
  when :introduction
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
    end
  when :postscript
    if page_break?(line)
      result = true
    end
  end
  result
end

#midashi(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 991

def midashi(str)
  midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
  @inspector.subtitle = midashi_title
  "［＃３字下げ］［＃ここから中見出し］#{midashi_title}［＃ここで中見出し終わり］"
end

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する



983
984
985

# File 'lib/converterbase.rb', line 983

def modify_kana_ni_to_kanji_ni(data)
  data.gsub!(/([^ァ-ヶー])ニ([^ァ-ヶー])/, "\\1二\\2")
end

#narou_ruby(data) ⇒ `Object`

小説家になろうのルビ対策

# File 'lib/converterbase.rb', line 836

def narou_ruby(data)
  if @text_type != "subtitle" && @text_type != "chapter"
    # 《》なルビの対処
    data.gsub!(/(.+?)≪([^≪]+?)≫/) do |match|
      to_ruby(match, $1, $2, ["≪", "≫"])
    end
    # （）なルビの対処
    data.gsub!(/(.+?)（([ぁ-んァ-ヶーゝゞ・Ａ-Ｚａ-ｚA-Za-z 　]{,20})）/) do |match|
      to_ruby(match, $1, $2, ["（", "）"])
    end
  end
  data.replace(replace_tatesen(data))
  data.gsub!("［＃ルビ用縦線］", "｜")
end

#num_to_kanji(data) ⇒ `Object`

アラビア数字を漢数字に

カンマ区切りの数字はアラビア数字のままにしておくもともと漢数字なのは他の変換を受けないように退避させておく

# File 'lib/converterbase.rb', line 97

def num_to_kanji(data)
  stash_kanji_num(data)
  data.gsub!(/[\d０-９,，]+/) do |match|
    if match =~ /[,，]/
      if match =~ /[\d]/
        stash_hankaku_num_and_comma(match.tr("，", ","))
      else
        match
      end
    else
      zenkaku_num_to_kanji(match.tr("0-9", KANJI_NUM))
    end
  end
  data
end

#object_of_ruby?(char) ⇒ `Boolean`

Returns:

(Boolean)



853
854
855

# File 'lib/converterbase.rb', line 853

def object_of_ruby?(char)
  char =~ /[#{CHARACTER_OF_RUBY}]/
end

#outputs(data = "", force = false) ⇒ `Object`

# File 'lib/converterbase.rb', line 57

def outputs(data = "", force = false)
  if !@request_skip_output_line || force
    @write_fp.puts(data)
  end
end

#page_break?(line) ⇒ `Boolean`

改ページある？

Returns:

(Boolean)



668
669
670

# File 'lib/converterbase.rb', line 668

def page_break?(line)
  line =~ /［＃改ページ］/
end

#process_author_comment(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 687

def process_author_comment(line)
  if @setting.enable_author_comments
    if @in_author_comment_block
      if leave_author_comment_block?(line)
        outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
        if @in_author_comment_block == :introduction
          @request_skip_output_line = true
          line.clear
          @in_author_comment_block = nil
        elsif @in_author_comment_block == :postscript
          @in_author_comment_block = nil
          # ［＃改ページ］（前書きの開始位置）を検出したため、
          # 改めて前書きの検出をする
          process_author_comment(line)
        end
      end
    else
      if inclusion_author_comment_block?(line)
        # outputs を使うと改ページより前に注記が入ってしまうため、
        # delay_outputs を使って出力を line 出力の後に遅らせる
        delay_outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:open]) 
        if @in_author_comment_block == :postscript
          @request_skip_output_line = true
          line.clear
        end
      end
    end
  end
end

#rebuild_english_sentences(data) ⇒ `Object`

英文を再構成する

# File 'lib/converterbase.rb', line 499

def rebuild_english_sentences(data)
  @english_sentences.each_with_index do |sentence, id|
    data.sub!("［＃英文＝#{convert_numbers(id.to_s)}］", sentence)
  end
end

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 615

def rebuild_force_indent_special_chapter(data)
  data.gsub!(/［＃章見出しっぽい文＝(.+?)］/) do
    @force_indent_special_chapter_list[$1]
  end
end

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 120

def rebuild_hankaku_num_and_comma(data)
  data.gsub!(/［＃半角数字＝(.+?)］/) do
    @num_and_comma_list[$1.to_i]
  end
end

#rebuild_illust(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 954

def rebuild_illust(data)
  @illust_chuki_list.each_with_index do |chuki, id|
    data.sub!("［＃挿絵＝#{convert_numbers(id.to_s)}］", chuki)
  end
end

#rebuild_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 136

def rebuild_kanji_num(data)
  data.gsub!(/［＃漢数字＝(.+?)］/) do
    @kanji_num_list[$1]
  end
end

#rebuild_kome_to_gaiji(data) ⇒ `Object`

※の外字注記化

stash_kome で2つにしておいた※を外字注記化する



429
430
431

# File 'lib/converterbase.rb', line 429

def rebuild_kome_to_gaiji(data)
  data.gsub!("※※", "※［＃米印、1-2-8］")
end

#rebuild_url(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 935

def rebuild_url(data)
  @url_list.each_with_index do |url, id|
    data.sub!("［＃ＵＲＬ＝#{convert_numbers(id.to_s)}］",
              "<a href=\"#{Helper.ampersand_to_entity(url)}\">#{url}</a>")
  end
end

#replace_by_replace_txt(text) ⇒ `Object`

replace.txt により単純置換

# File 'lib/converterbase.rb', line 1217

def replace_by_replace_txt(text)
  @setting.replace_pattern.each do |pattern|
    src, dst = pattern
    text.gsub!(src, dst)
  end
end

#replace_illust_tag(data) ⇒ `Object`

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する

# File 'lib/converterbase.rb', line 946

def replace_illust_tag(data)
  @illustration.scanner(data) do |chuki|
    next "" unless @setting.enable_illust
    @illust_chuki_list << chuki
    "［＃挿絵＝#{@illust_chuki_list.size - 1}］\n"
  end
end

#replace_narou_tag(data) ⇒ `Object`

小説家になろう専用タグを置換

# File 'lib/converterbase.rb', line 296

def replace_narou_tag(data)
  data.gsub!("【改ページ】", "")
  data.gsub!(/<KBR>/i, "\n")
  data.gsub!(/<PBR>/i, "\n")
end

#replace_tatesen(str) ⇒ `Object`



876
877
878

# File 'lib/converterbase.rb', line 876

def replace_tatesen(str)
  str.gsub("｜", "※［＃縦線］")
end

#replace_url(data) ⇒ `Object`

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない

# File 'lib/converterbase.rb', line 928

def replace_url(data)
  data.gsub!(URI.regexp(%w(http https))) do |match|
    @url_list << match
    "［＃ＵＲＬ＝#{@url_list.size - 1}］"
  end
end

#rstrip_all_lines(data) ⇒ `Object`

すべての行の行末空白を削除



72
73
74

# File 'lib/converterbase.rb', line 72

def rstrip_all_lines(data)
  data.gsub(/[ 　\t]+$/m, "")
end

#ruby_youon_to_big(ruby) ⇒ `Object`

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする

# File 'lib/converterbase.rb', line 909

def ruby_youon_to_big(ruby)
  result = ruby
  if @setting.enable_ruby_youon_to_big
    result = ruby.tr("ぁぃぅぇぉゃゅょゎっァィゥェォャュョヮッヵヶ",
                     "あいうえおやゆよわつアイウエオヤユヨワツカケ")
  end
  result
end

#sesame(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 861

def sesame(str)
  if str.include?("｜")
    str.sub("｜", "［＃傍点］") + "［＃傍点終わり］"
  else
    str.sub(/([#{CHARACTER_OF_RUBY}　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃傍点］#{match_target[$1.length..-1]}"
      else
        "［＃傍点］#{match_target}"
      end
    } + "［＃傍点終わり］"
  end
end

#stash_hankaku_num_and_comma(num) ⇒ `Object`

# File 'lib/converterbase.rb', line 113

def stash_hankaku_num_and_comma(num)
  @@num_and_comma_list_counter ||= 0
  @@num_and_comma_list_counter += 1
  @num_and_comma_list[@@num_and_comma_list_counter] = num
  "［＃半角数字＝#{@@num_and_comma_list_counter}］"
end

#stash_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 126

def stash_kanji_num(data)
  data.gsub!(/[#{KANJI_NUM}十百千万億兆京]+/).with_index do |match, i|
    if "#{$`[-1]}#{$'[0]}" =~ /[\d０-９]/
      next match
    end
    @kanji_num_list[convert_numbers(i.to_s)] = match
    "［＃漢数字＝#{i}］"
  end
end

#stash_kome(data) ⇒ `Object`

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする



412
413
414

# File 'lib/converterbase.rb', line 412

def stash_kome(data)
  data.gsub!("※", "※※")
end

#symbols_to_zenkaku(data) ⇒ `Object`

半角記号を全角に変換

# File 'lib/converterbase.rb', line 341

def symbols_to_zenkaku(data)
  data.gsub!(/[#{SINGLE_MINUTE_FAMILY}]([^"\n]+)[#{SINGLE_MINUTE_FAMILY}]/, "〝\\1〟")
  # MEMO: シングルミュートを表示出来るフォントはほとんど無いためダブルにする
  data.gsub!(/[#{DOUBLE_MINUTE_FAMILY}]([^"\n]+)[#{DOUBLE_MINUTE_FAMILY}]/, "〝\\1〟")
  data.tr!("-=+/*《》'\"%$#&!?<>＜＞()|‐,._;:[]",
           "－＝＋／＊≪≫’”％＄＃＆！？〈〉〈〉（）｜－，．＿；：［］")
  data.gsub!("\\", "￥")
  data
end

#tcy(str) ⇒ `Object`

縦中横注記取得



354
355
356

# File 'lib/converterbase.rb', line 354

def tcy(str)
  "［＃縦中横］#{str}［＃縦中横終わり］"
end

#to_ruby(match, m1, m2, openclose_symbols) ⇒ `Object`

# File 'lib/converterbase.rb', line 880

def to_ruby(match, m1, m2, openclose_symbols)
  last_char = m1[-1]
  case
  when last_char == "｜"
    # 直前に｜がある場合ルビ化は抑制される
    "#{m1[0...-1]}#{openclose_symbols[0]}#{m2}#{openclose_symbols[1]}"
  when is_sesame?(m1, m2, last_char)
    sesame(m1)
  when m1.include?("｜")
    "#{m1.sub(/｜([^｜]*)$/, "［＃ルビ用縦線］\\1")}《#{m2}》"
  when object_of_ruby?(last_char)
    # なろうのルビ対象文字を辿って｜を挿入する（青空文庫となろうのルビ仕様の差異吸収のため）
    # 空白もルビ対象文字に含むのはなろうの仕様である
    m1.sub(/([#{CHARACTER_OF_RUBY} 　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃ルビ用縦線］#{match_target[$1.length..-1]}"
      else
        "［＃ルビ用縦線］#{match_target}"
      end
    } + "《#{ruby_youon_to_big(m2)}》"
  else
    match
  end
end

#zenkaku_num_to_hankaku_num(num) ⇒ `Object`

全角数字(漢数字含む)を半角アラビア数字に



528
529
530

# File 'lib/converterbase.rb', line 528

def zenkaku_num_to_hankaku_num(num)
  num.tr("０-９#{KANJI_NUM}", "0-90-9")
end

#zenkaku_num_to_kanji(str) ⇒ `Object`

全角アラビア数字を漢数字に



145
146
147

# File 'lib/converterbase.rb', line 145

def zenkaku_num_to_kanji(str)
  str.tr("０-９", KANJI_NUM)
end

#zenkaku_rstrip(line) ⇒ `Object`

全角版 String#rstrip!



921
922
923

# File 'lib/converterbase.rb', line 921

def zenkaku_rstrip(line)
  line.gsub!(/[　\s]+\z/, "")
end

Class: ConverterBase

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(setting, inspector, illustration) ⇒ ConverterBase

Instance Attribute Details

#use_dakuten_font ⇒ Object (readonly)

Class Method Details

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ Object

#__calc_sum_unit(units) ⇒ Object

#after(io, text_type) ⇒ Object

#after_convert(io) ⇒ Object

#alphabet_to_zenkaku(data, force = false) ⇒ Object

#author_comment_force_close ⇒ Object

#auto_indent(data) ⇒ Object

#auto_join_in_brackets(data) ⇒ Object

#auto_join_line(data) ⇒ Object

#before(io, text_type) ⇒ Object

#before_convert(io) ⇒ Object

#blank_line?(line) ⇒ Boolean

#border_symbol?(line) ⇒ Boolean

#calc_cr_count(str) ⇒ Object

#comments_block?(line) ⇒ Boolean

#convert(text, text_type) ⇒ Object

#convert_arrow(data) ⇒ Object

#convert_dakuten_char_to_font(data) ⇒ Object

#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

#convert_for_all_data(data) ⇒ Object

#convert_fraction_and_date(data) ⇒ Object

#convert_horizontal_ellipsis(data) ⇒ Object

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

#convert_main(io) ⇒ Object

#convert_novel_rule(data) ⇒ Object

#convert_numbers(data) ⇒ Object

#convert_page_break(data) ⇒ Object

#convert_rome_numeric(data) ⇒ Object

#convert_special_characters(data) ⇒ Object

#convert_tatechuyoko(data) ⇒ Object

#delay_outputs(data = "") ⇒ Object

#delete_dust_char(data) ⇒ Object

#enchant_midashi(data) ⇒ Object

#erase_comments_block(data) ⇒ Object

#erase_introduction(data) ⇒ Object

#erase_postscript(data) ⇒ Object

#exception_reconvert_kanji_to_num(data) ⇒ Object

#find_introduction? ⇒ Boolean

#force_indent_special_chapter(data) ⇒ Object

#half_indent_bracket(data) ⇒ Object

#hankaku_num_to_zenkaku_num(data) ⇒ Object

#hankakukana_to_zenkakukana(data) ⇒ Object

#inclusion_author_comment_block?(line) ⇒ Boolean

#initialize_member_values ⇒ Object

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object

#insert_blank_line_to_border_symbol(line) ⇒ Object

#insert_separate_space(data) ⇒ Object

#is_sesame?(str, ten, last_char) ⇒ Boolean

#jisage(line, num) ⇒ Object

#join_inner_bracket(str) ⇒ Object

#kanji_num_to_integer(string) ⇒ Object

#leave_author_comment_block?(line) ⇒ Boolean

#midashi(str) ⇒ Object

#modify_kana_ni_to_kanji_ni(data) ⇒ Object

#narou_ruby(data) ⇒ Object

#num_to_kanji(data) ⇒ Object

#object_of_ruby?(char) ⇒ Boolean

#outputs(data = "", force = false) ⇒ Object

#page_break?(line) ⇒ Boolean

#process_author_comment(line) ⇒ Object

#rebuild_english_sentences(data) ⇒ Object

#rebuild_force_indent_special_chapter(data) ⇒ Object

#rebuild_hankaku_num_and_comma(data) ⇒ Object

#rebuild_illust(data) ⇒ Object

#rebuild_kanji_num(data) ⇒ Object

#rebuild_kome_to_gaiji(data) ⇒ Object

#rebuild_url(data) ⇒ Object

#replace_by_replace_txt(text) ⇒ Object

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

#use_dakuten_font ⇒ `Object` (readonly)

.rebuild_brackets(data, stack) ⇒ `Object`

#__calc_kanji_num_with_unit(string) ⇒ `Object`

#__calc_sum_unit(units) ⇒ `Object`

#after(io, text_type) ⇒ `Object`

#after_convert(io) ⇒ `Object`

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

#author_comment_force_close ⇒ `Object`

#auto_indent(data) ⇒ `Object`

#auto_join_in_brackets(data) ⇒ `Object`

#auto_join_line(data) ⇒ `Object`

#before(io, text_type) ⇒ `Object`

#before_convert(io) ⇒ `Object`

#blank_line?(line) ⇒ `Boolean`

#border_symbol?(line) ⇒ `Boolean`

#calc_cr_count(str) ⇒ `Object`

#comments_block?(line) ⇒ `Boolean`

#convert(text, text_type) ⇒ `Object`

#convert_arrow(data) ⇒ `Object`

#convert_dakuten_char_to_font(data) ⇒ `Object`

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

#convert_for_all_data(data) ⇒ `Object`

#convert_fraction_and_date(data) ⇒ `Object`

#convert_horizontal_ellipsis(data) ⇒ `Object`

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

#convert_main(io) ⇒ `Object`

#convert_novel_rule(data) ⇒ `Object`

#convert_numbers(data) ⇒ `Object`

#convert_page_break(data) ⇒ `Object`

#convert_rome_numeric(data) ⇒ `Object`

#convert_special_characters(data) ⇒ `Object`

#convert_tatechuyoko(data) ⇒ `Object`

#delay_outputs(data = "") ⇒ `Object`

#delete_dust_char(data) ⇒ `Object`

#enchant_midashi(data) ⇒ `Object`

#erase_comments_block(data) ⇒ `Object`

#erase_introduction(data) ⇒ `Object`

#erase_postscript(data) ⇒ `Object`

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

#find_introduction? ⇒ `Boolean`

#force_indent_special_chapter(data) ⇒ `Object`

#half_indent_bracket(data) ⇒ `Object`

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

#hankakukana_to_zenkakukana(data) ⇒ `Object`

#inclusion_author_comment_block?(line) ⇒ `Boolean`

#initialize_member_values ⇒ `Object`

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

#insert_separate_space(data) ⇒ `Object`

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

#jisage(line, num) ⇒ `Object`

#join_inner_bracket(str) ⇒ `Object`

#kanji_num_to_integer(string) ⇒ `Object`

#leave_author_comment_block?(line) ⇒ `Boolean`

#midashi(str) ⇒ `Object`

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

#narou_ruby(data) ⇒ `Object`

#num_to_kanji(data) ⇒ `Object`

#object_of_ruby?(char) ⇒ `Boolean`

#outputs(data = "", force = false) ⇒ `Object`

#page_break?(line) ⇒ `Boolean`

#process_author_comment(line) ⇒ `Object`

#rebuild_english_sentences(data) ⇒ `Object`

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

#rebuild_illust(data) ⇒ `Object`

#rebuild_kanji_num(data) ⇒ `Object`

#rebuild_kome_to_gaiji(data) ⇒ `Object`

#rebuild_url(data) ⇒ `Object`

#replace_by_replace_txt(text) ⇒ `Object`

#replace_illust_tag(data) ⇒ `Object`

#replace_narou_tag(data) ⇒ `Object`

#replace_tatesen(str) ⇒ `Object`

#replace_url(data) ⇒ `Object`

#rstrip_all_lines(data) ⇒ `Object`

#ruby_youon_to_big(ruby) ⇒ `Object`

#sesame(str) ⇒ `Object`

#stash_hankaku_num_and_comma(num) ⇒ `Object`

#stash_kanji_num(data) ⇒ `Object`