Class: ConverterBase

Inherits:

Object

Object
ConverterBase

show all

Defined in:: lib/converterbase.rb

Constant Summary collapse

KANJI_NUM =

"〇一二三四五六七八九"

KANJI_NUM_UNITS =

%w(万 億 兆 京).unshift("")

KANJI_KURAI =

%w(十 百 千).unshift("")

KANJI_NUM_UNITS_DIGIT =

{
  "十" => 1, "百" => 2, "千" => 3, "万" => 4, "億" => 8, "兆" => 12, "京" => 16
}

RECONVERT_KANJI_TO_NUM_PATTERN_UNIT =

"％㎜㎝㎞㎎㎏㏄㎡㎥"

ROME_NUM_ALPHABET =

%w(II III IV VI VII VIII IX ii iii iv vi vii viii ix)

ROME_NUM =

%w(Ⅱ Ⅲ Ⅳ Ⅵ Ⅶ Ⅷ Ⅸ ⅱ ⅲ ⅳ ⅵ ⅶ ⅷ ⅸ)

HALF_INDENT_TARGET =

/^[ 　\t]*([〔「『(（【〈《≪〝])/

FULL_INDENT_TARGET =

/^[ 　\t]*(――)/

AUTO_INDENT_IGNORE_INDENT_CHAR =

Inspector::IGNORE_INDENT_CHAR.sub("・", "")

AUTHOR_INTRODUCTION_SPLITTER = 前書き・後書きの検出及び処理 ==============================

/^　*[\*＊]{44}$/

AUTHOR_POSTSCRIPT_SPLITTER =

/^　*[\*＊]{48}$/

AUTHOR_COMMENT_CHUKI =

{
  introduction: {
    open: "［＃ここから前書き］", close: "［＃ここで前書き終わり］"
  },
  postscript: {
    open: "［＃ここから後書き］", close: "［＃ここで後書き終わり］"
  }
}

BRACKETS =

[%w(「 」), %w(『 』)]

OPENCLOSE_REGEXPS = ネストに対応したかぎ括弧の正規表現

BRACKETS.map { |bracket|
  bo, bc = bracket
  /(?<oc>#{bo}[^#{bo+bc}]*(?:\g<oc>[^#{bo+bc}]*)*#{bc})/m
}

CHARACTER_OF_RUBY =

"一-龠Ａ-Ｚａ-ｚA-Za-z"

Instance Attribute Summary collapse

#use_dakuten_font ⇒ Object readonly

Returns the value of attribute use_dakuten_font.

Class Method Summary collapse

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Summary collapse

#__calc_kanji_num_with_unit(string) ⇒ Object
#__calc_sum_unit(units) ⇒ Object
#after(io, text_type) ⇒ Object
#after_convert(io) ⇒ Object
#alphabet_to_zenkaku(data, force = false) ⇒ Object

半角アルファベットを全角に変換する.
#author_comment_force_close ⇒ Object
#auto_indent(data) ⇒ Object

行頭字下げ.
#auto_join_in_brackets(data) ⇒ Object

かぎ括弧内自動連結.
#auto_join_line(data) ⇒ Object

手動折り返しの自動連結.
#before(io, text_type) ⇒ Object
#before_convert(io) ⇒ Object
#blank_line?(line) ⇒ Boolean
#border_symbol?(line) ⇒ Boolean
#comments_block?(line) ⇒ Boolean

コメントブロックを検出する.
#convert(text, text_type) ⇒ Object
#convert_arrow(data) ⇒ Object

おかしくなりやすい矢印文字の変換.
#convert_dakuten_char_to_font(data) ⇒ Object

濁点のついてない文字に濁点をつける表現を対応.
#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

ギュメを二重山括弧（の外字）に変換.
#convert_for_all_data(data) ⇒ Object

小説データ全体に対して施す変換.
#convert_fraction_and_date(data) ⇒ Object

分数表記を○分の○表記に変更、及び日付表記を検出.
#convert_horizontal_ellipsis(data) ⇒ Object

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換.
#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

漢数字を単位を使った表現に変換.
#convert_main(io) ⇒ Object

変換処理本体.
#convert_novel_rule(data) ⇒ Object

小説のルールに沿うように変換.
#convert_numbers(data) ⇒ Object

数字の変換.
#convert_page_break(data) ⇒ Object

一定以上の連続する空行を改ページに変換.
#convert_rome_numeric(data) ⇒ Object

ローマ数字っぽいアルファベットをローマ数字に変換.
#convert_special_characters(data) ⇒ Object

特定の表現・記号を変換していく.
#convert_tatechuyoko(data) ⇒ Object

縦中横にすべき表現を変換.
#delay_outputs(data = "") ⇒ Object
#delete_dust_char(data) ⇒ Object

表示上化けてしまうゴミ削除.
#enchant_midashi(data) ⇒ Object

［＃改ページ］直後の行を見出しに設定する.
#erase_comments_block(data) ⇒ Object

コメントブロックを削除する.
#erase_introduction(data) ⇒ Object

前書きを削除する.
#erase_postscript(data) ⇒ Object

後書きを削除する.
#exception_reconvert_kanji_to_num(data) ⇒ Object

アラビア数字を使うべきところはアラビア数字に戻す.
#find_introduction? ⇒ Boolean

前書きの検出.
#force_indent_special_chapter(line) ⇒ Object

章見出しっぽい文字列を字下げして前後に空行を入れる.
#half_indent_bracket(data) ⇒ Object

行頭かぎ括弧(等)に二分アキを追加する.
#hankaku_num_to_zenkaku_num(data) ⇒ Object

半角アラビア数字の全角化.
#hankakukana_to_zenkakukana(data) ⇒ Object

半角カナと｢｣｡､･等を全角に変換.
#inclusion_author_comment_block?(line) ⇒ Boolean
#initialize(setting, inspector, illustration) ⇒ ConverterBase constructor

A new instance of ConverterBase.
#initialize_member_values ⇒ Object
#insert_blank_line_to_border_symbol(line) ⇒ Object

■などの区切りの前後には空行が必ず存在するようにする.
#insert_separate_space(data) ⇒ Object

特定の記号の直後は全角アキを挿入する.
#is_sesame?(str, ten, last_char) ⇒ Boolean
#jisage(line, num) ⇒ Object

行頭空白を考慮した字下げ.
#join_inner_bracket(str) ⇒ Object

改行を連結した文章を作る.
#kanji_num_to_integer(string) ⇒ Object
#leave_author_comment_block?(line) ⇒ Boolean
#midashi(str) ⇒ Object
#modify_kana_ni_to_kanji_ni(data) ⇒ Object

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する.
#narou_ruby(data) ⇒ Object

小説家になろうのルビ対策.
#num_to_kanji(data) ⇒ Object

アラビア数字を漢数字に.
#object_of_ruby?(char) ⇒ Boolean
#outputs(data = "", force = false) ⇒ Object
#page_break?(line) ⇒ Boolean

改ページある？.
#process_author_comment(line) ⇒ Object
#rebuild_english_sentences(data) ⇒ Object

英文を再構成する.
#rebuild_hankaku_num_and_comma(data) ⇒ Object
#rebuild_illust(data) ⇒ Object
#rebuild_kanji_num(data) ⇒ Object
#rebuild_kome_to_gaiji(data) ⇒ Object

※の外字注記化.
#rebuild_url(data) ⇒ Object
#replace_by_replace_txt(text) ⇒ Object

replace.txt により単純置換.
#replace_illust_tag(data) ⇒ Object

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する.
#replace_narou_tag(data) ⇒ Object

小説家になろう専用タグを置換.
#replace_tatesen(str) ⇒ Object
#replace_url(data) ⇒ Object

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない.
#rstrip_all_lines(data) ⇒ Object

すべての行の行末空白を削除.
#ruby_youon_to_big(ruby) ⇒ Object

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする.
#sesame(str, ten) ⇒ Object
#stash_hankaku_num_and_comma(num) ⇒ Object
#stash_kanji_num(data) ⇒ Object
#stash_kome(data) ⇒ Object

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする.
#symbols_to_zenkaku(data) ⇒ Object

半角記号を全角に変換.
#tcy(str) ⇒ Object

縦中横注記取得.
#to_ruby(match, m1, m2, openclose_symbols) ⇒ Object
#zenkaku_num_to_hankaku_num(num) ⇒ Object

全角数字を半角アラビア数字に.
#zenkaku_num_to_kanji(str) ⇒ Object

全角アラビア数字を漢数字に.
#zenkaku_rstrip(line) ⇒ Object

全角版 String#rstrip!.

Constructor Details

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

Returns a new instance of ConverterBase.

# File 'lib/converterbase.rb', line 32

def initialize(setting, inspector, illustration)
  @setting = setting
  @inspector = inspector
  @illustration = illustration
  @use_dakuten_font = false
  initialize_member_values
end

Instance Attribute Details

#use_dakuten_font ⇒ `Object` (readonly)

Returns the value of attribute use_dakuten_font.



16
17
18

# File 'lib/converterbase.rb', line 16

def use_dakuten_font
  @use_dakuten_font
end

Class Method Details

.rebuild_brackets(data, stack) ⇒ `Object`

# File 'lib/converterbase.rb', line 779

def self.rebuild_brackets(data, stack)
  data.gsub(/［＃かぎ括弧＝(\d+)］/) do
    stack[$1.to_i]
  end
end

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 158

def __calc_kanji_num_with_unit(string)
  total = 0
  string.scan(/([#{KANJI_NUM}]*)([十百千]*)/) do |num, units|
    break if num + units == ""
    num = "1" if num.empty?
    num_tr = num.tr(KANJI_NUM, "0-9")
    if units.empty?
      total += num_tr.to_i
    else
      total += (num_tr + __calc_sum_unit(units).to_s[1, 99]).to_i
    end
  end
  total
end

#__calc_sum_unit(units) ⇒ `Object`

# File 'lib/converterbase.rb', line 152

def __calc_sum_unit(units)
  units.each_char.inject(0) do |sum, c|
    sum + ("1" + "0" * KANJI_NUM_UNITS_DIGIT[c]).to_i
  end
end

#after(io, text_type) ⇒ `Object`



28
29
30

# File 'lib/converterbase.rb', line 28

def after(io, text_type)
  io
end

#after_convert(io) ⇒ `Object`



1055
1056
1057

# File 'lib/converterbase.rb', line 1055

def after_convert(io)
  after(io, @text_type)
end

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

半角アルファベットを全角に変換する

force : 強制的に全アルファベットを全角にするか？

false の場合、英文章（半角スペースで区切られた2単語以上）を半角のままにする

# File 'lib/converterbase.rb', line 467

def alphabet_to_zenkaku(data, force = false)
  if force
    data.gsub!(/[a-zA-Z]+/) do |match|
      match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
    end
  else
   data.gsub!(/[\w.,!?' ]+/) do |match|
      if match.split(" ").count > 1
        @english_sentences << match
        "［＃英文＝#{@english_sentences.count - 1}］"
      else
        match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
      end
    end
  end
end

#author_comment_force_close ⇒ `Object`

# File 'lib/converterbase.rb', line 721

def author_comment_force_close
  if @in_author_comment_block
    outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
  end
end

#auto_indent(data) ⇒ `Object`

行頭字下げ

必ず下げなければいけないところは強制的に字下げ他の部分は全体的に判断して字下げ

# File 'lib/converterbase.rb', line 558

def auto_indent(data)
  data.gsub!(FULL_INDENT_TARGET, "　\\1")
  if @setting.enable_auto_indent && @inspector.inspect_indent(data)
    data.gsub!(/^([^#{AUTO_INDENT_IGNORE_INDENT_CHAR}])/) do
      # 行頭に三点リーダーの代わりに連続中黒（・・・）が来た場合の対策
      # https://github.com/whiteleaf7/narou/issues/35
      # 行頭に中黒１個だけの場合はよくある表現なので字下げしない
      if $1 == "・" && $'[0] != "・"
        "・"
      else
        $1 == " " || $1 == "　" ? "　" : "　#{$1}"
      end
    end
  end
end

#auto_join_in_brackets(data) ⇒ `Object`

かぎ括弧内自動連結

# File 'lib/converterbase.rb', line 755

def auto_join_in_brackets(data)
  if !@setting.enable_auto_join_in_brackets && !@setting.enable_inspect_invalid_openclose_brackets
    return
  end
  OPENCLOSE_REGEXPS.each_with_index do |openclose, i|
    stack = {}
    data.gsub!(openclose).with_index do |match, j|
      joined_str = join_inner_bracket(match)
      if @setting.enable_auto_join_in_brackets && joined_str
        error = @inspector.validate_joined_inner_brackets(match, joined_str, BRACKETS[i])
        stack[j] = error ? match : joined_str
      else
        stack[j] = match
      end
      "［＃かぎ括弧＝#{j}］"
    end
    if @setting.enable_inspect_invalid_openclose_brackets
      # 正しく閉じてないかぎ括弧だけが data に残ってる
      @inspector.inspect_invalid_openclose_brackets(data, BRACKETS[i], stack)
    end
    data.replace(ConverterBase.rebuild_brackets(data, stack))
  end
end

#auto_join_line(data) ⇒ `Object`

手動折り返しの自動連結

# File 'lib/converterbase.rb', line 788

def auto_join_line(data)
  # 次の行の冒頭が開き記号だったら意図的な改行だと判断して連結しない
  data.gsub!(/([^、])、\n　([^「『(（【<＜〈《≪…‥―])/, "\\1、\\2")
end

#before(io, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 18

def before(io, text_type)
  data = io.string
  convert_page_break(data) if @text_type == "body"
  if @text_type != "story"
    data.gsub!("\n\n", "\n")
    data.gsub!("\n\n\n", "\n\n")
  end
  io
end

#before_convert(io) ⇒ `Object`



1051
1052
1053

# File 'lib/converterbase.rb', line 1051

def before_convert(io)
  before(io, @text_type)
end

#blank_line?(line) ⇒ `Boolean`

Returns:

(Boolean)



606
607
608

# File 'lib/converterbase.rb', line 606

def blank_line?(line)
  line =~ /\A[ 　\t]*$/
end

#border_symbol?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 599

def border_symbol?(line)
  @@symbols ||= open(File.join(Narou.get_preset_dir, "bordersymbols.txt"), "r:BOM|UTF-8") { |fp|
    fp.read.strip
  }
  line =~ /^[ 　\t]*[#{@@symbols}]+$/
end

#comments_block?(line) ⇒ `Boolean`

コメントブロックを検出する

コメントブロックの定義は - のみが50回以上連続された行に囲まれている間

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 498

def comments_block?(line)
  if line =~ /^-{50,}$/
    @in_comment_block ^= 1
    return true
  end
  @in_comment_block
end

#convert(text, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 1059

def convert(text, text_type)
  return "" if text == ""
  @text_type = text_type
  io = StringIO.new(rstrip_all_lines(text))
  (io = before_convert(io)).rewind
  (io = convert_main(io)).rewind
  (io = after_convert(io)).rewind
  return io.read
end

#convert_arrow(data) ⇒ `Object`

おかしくなりやすい矢印文字の変換

# File 'lib/converterbase.rb', line 392

def convert_arrow(data)
  @@device ||= Narou.get_device
  # Kindle PW でしか確認してないのでとりあえず device=kindle の場合のみ変換
  if @@device && @@device.kindle?
    data.tr!("⇒⇐", "→←")
  end
end

#convert_dakuten_char_to_font(data) ⇒ `Object`

濁点のついてない文字に濁点をつける表現を対応

濁点つきフォントに部分的に切り替える

# File 'lib/converterbase.rb', line 429

def convert_dakuten_char_to_font(data)
  data.gsub!(/(.)[゛ﾞ]/) do
    m1 = $1
    if m1 =~ /[ぁ-んァ-ヶι]/ && @setting.enable_dakuten_font
      @use_dakuten_font = true
      "［＃濁点］#{m1}［＃濁点終わり］"
    else
      tcy(m1 + "゛")
    end
  end
end

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

ギュメを二重山括弧（の外字）に変換

# File 'lib/converterbase.rb', line 410

def convert_double_angle_quotation_to_gaiji(data)
  data.gsub!("≪", "※［＃始め二重山括弧］")
  data.gsub!("≫", "※［＃終わり二重山括弧］")
end

#convert_for_all_data(data) ⇒ `Object`

小説データ全体に対して施す変換

# File 'lib/converterbase.rb', line 1023

def convert_for_all_data(data)
  hankakukana_to_zenkakukana(data)
  auto_join_in_brackets(data)
  auto_join_line(data) if @setting.enable_auto_join_line
  erase_comments_block(data)
  replace_illust_tag(data)
  replace_url(data)
  replace_narou_tag(data)
  convert_rome_numeric(data)
  alphabet_to_zenkaku(data, @setting.enable_alphabet_force_zenkaku)
  convert_numbers(data)
  exception_reconvert_kanji_to_num(data)
  if @setting.enable_convert_num_to_kanji && @text_type != "subtitle" && @text_type != "chapter" \
     && @setting.enable_kanji_num_with_units
    convert_kanji_num_with_unit(data, @setting.kanji_num_with_units_lower_digit_zero)
  end
  rebuild_kanji_num(data)
  insert_separate_space(data)
  convert_special_characters(data)
  convert_fraction_and_date(data)
  modify_kana_ni_to_kanji_ni(data)
  if @text_type == "body" || @text_type == "textfile"
    half_indent_bracket(data)
    auto_indent(data)
  end
  convert_dakuten_char_to_font(data)
end

#convert_fraction_and_date(data) ⇒ `Object`

分数表記を○分の○表記に変更、及び日付表記を検出

スラッシュで区切られた数字が２個なら分数、３個なら日付と定義

# File 'lib/converterbase.rb', line 239

def convert_fraction_and_date(data)
  if !@setting.enable_transform_fraction && !@setting.enable_transform_date
    return
  end
  target_num = "\d０-９#{KANJI_NUM}十百千万億兆京垓"
  data.gsub!(/[#{target_num}\/／]+/) do |match|
    numerics = match.split(/[\/／]/)
    case numerics.count
    when 2
      # 分数
      if @setting.enable_transform_fraction
        "#{zenkaku_num_to_kanji(numerics[1])}分の#{zenkaku_num_to_kanji(numerics[0])}"
      else
        match
      end
    when 3
      # 日付
      if @setting.enable_transform_date
        begin
          date = Date.new(*numerics.map { |s|
            s.tr!("0-9０-９#{KANJI_NUM}", "0-90-90-9")
            s.to_i
          })
        rescue ArgumentError
          match
        else
          convert_numbers(date.strftime(@setting.date_format))
        end
      end
    else
      match
    end
  end
end

#convert_horizontal_ellipsis(data) ⇒ `Object`

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換

# File 'lib/converterbase.rb', line 922

def convert_horizontal_ellipsis(data)
  return if !@setting.enable_convert_horizontal_ellipsis || @text_type == "subtitle" || @text_type == "chapter"
  %w(・ 。 、).each do |char|
    data.gsub!(/#{char}{3,}/) do |match|
      pre_char, post_char = $`[-1], $'[0]
      if pre_char == "―" || post_char == "―"
        match
      else
        "…" * ((match.length / 3.0 / 2).ceil * 2)
      end
    end
  end
  data.gsub!("。。", "。")
  data.gsub!("、、", "、")
end

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

漢数字を単位を使った表現に変換

８００万１０００といったような表現は、内部一度で 8001000 に変換する。 lower_digit_zero はこの最後の 000 に適用される

# File 'lib/converterbase.rb', line 187

def convert_kanji_num_with_unit(data, lower_digit_zero = 0)
  data.gsub!(/([#{KANJI_NUM}十百千万億兆京]+)/) do |match|
    total = kanji_num_to_integer($1)
    m1 = total.to_s.tr("0-9", KANJI_NUM)
    if m1 =~ /〇{#{lower_digit_zero},}$/
      digits = m1.reverse.scan(/.{1,4}/).map(&:reverse).reverse   # 下の桁から4桁ずつ区切った配列を作成
      keta = digits.count - 1
      digits.map.with_index { |nums, keta_i|
        four_digit_num = nums.scan(/./).map.with_index { |d, di|
          next "" if d == "〇"
          kurai = KANJI_KURAI[nums.length - di - 1]
          if d == "一"
            # 4桁の千の前は一は必須ではなく、5桁以上の場合の千の前には一をつける
            # 1100 → 千百、11100 → 一万一千百
            if kurai != "" && !(keta > 0 && kurai == "千")
              d = ""
            end
          end
          d + kurai
        }.join
        if four_digit_num.length > 0
          four_digit_num + KANJI_NUM_UNITS[keta - keta_i]
        else
          ""
        end
      }.join
    else
      match
    end
  end
end

#convert_main(io) ⇒ `Object`

変換処理本体

@text_type: 渡されるテキストの種類。

subtitle, introduction, body, postscript, textfile, chapter, story

# File 'lib/converterbase.rb', line 1075

def convert_main(io)
  @write_fp = StringIO.new
  case @text_type
  when "introduction"
    return @write_fp if @setting.enable_erase_introduction
  when "postscript"
    return @write_fp if @setting.enable_erase_postscript
  end
  if @text_type == "textfile"
    @write_fp.puts(io.gets + io.gets)   # タイトル・著者名スキップ
    data = io.read
  else
    data = io.read
  end
  initialize_member_values
  convert_for_all_data(data)
  if @text_type == "textfile"
    # convert_for_all_data -> replace_narou_tag
    # で改行化を行わないと正確な改行数は分からない
    progressbar = ProgressBar.new(data.count("\n") + 1)
    progressbar.output(0)
  end
  @read_fp = StringIO.new(data)
  if @text_type == "subtitle"
    @write_fp.write(data)
  else
    @read_fp.each_with_index do |line, i|
      progressbar.output(i) if @text_type == "textfile"
      @request_skip_output_line = false
      zenkaku_rstrip(line)
      if @request_insert_blank_next_line
        outputs unless blank_line?(line)
        @request_insert_blank_next_line = false
      end
      process_author_comment(line) if @text_type == "textfile"
      insert_blank_line_to_border_symbol(line)
      force_indent_special_chapter(line)

      outputs(line)
      unless @delay_outputs_buffer.empty?
        @write_fp.write(@delay_outputs_buffer)
        @before_line = @delay_outputs_buffer
        @delay_outputs_buffer = ""
      else
        @before_line = line
      end
    end
    author_comment_force_close if @text_type == "textfile"
  end

  @write_fp.rewind
  data = @write_fp.string
  if @text_type == "textfile"
    if @setting.enable_author_comments
      erase_introduction(data) if @setting.enable_erase_introduction
      erase_postscript(data) if @setting.enable_erase_postscript
    end
    if @setting.enable_enchant_midashi
      enchant_midashi(data)
    end
  end
  rebuild_illust(data)
  rebuild_url(data)
  rebuild_english_sentences(data)
  rebuild_hankaku_num_and_comma(data)
  rebuild_kome_to_gaiji(data)
  # 再構築された文章にルビがふられる可能性を考慮して、
  # この位置でルビの処理を行う
  narou_ruby(data) if @setting.enable_ruby
  # 三点リーダーの変換は、ルビで圏点として・・・を使っている場合を考慮して、ルビ処理後にする
  convert_horizontal_ellipsis(data)
  # ルビ化されなくて残ったギュメを二重山括弧（の外字）に変換
  convert_double_angle_quotation_to_gaiji(data)
  delete_dust_char(data)
  data.strip!
  progressbar.clear if @text_type == "textfile"
  @write_fp
end

#convert_novel_rule(data) ⇒ `Object`

小説のルールに沿うように変換

# File 'lib/converterbase.rb', line 444

def convert_novel_rule(data)
  # 括弧の閉じの直前の句点を消す
  data.gsub!(/。([」』）])/, "\\1")
  # 原則偶数個を１セットで使うべき文字を偶数個に補正
  # MEMO:（―も偶数個セットにするべきだが、記号的な意味で使われる場合もあるので無視）
  %w(… ‥).each do |target|
    data.gsub!(/#{target}+/) do |match|
      len = match.length
      len += 1 if len.odd?
      target * len
    end
  end
  # たまに見かける誤字対策
  data.gsub!(/。　/, "。")
  data.gsub!(/([？！])。/, "\\1")
end

#convert_numbers(data) ⇒ `Object`

数字の変換

# File 'lib/converterbase.rb', line 76

def convert_numbers(data)
  # 小数点を・に
  data.gsub!(/([\d０-９#{KANJI_NUM}]+?)[\.．]([\d０-９#{KANJI_NUM}]+?)/, "\\1・\\2")
  if @setting.enable_convert_num_to_kanji &&
     @text_type != "subtitle" && @text_type != "chapter" && @text_type != "story"
    num_to_kanji(data)
  else
    hankaku_num_to_zenkaku_num(data)
  end
  data
end

#convert_page_break(data) ⇒ `Object`

一定以上の連続する空行を改ページに変換

# File 'lib/converterbase.rb', line 1004

def convert_page_break(data)
  if @setting.enable_convert_page_break
    threshold = @setting.to_page_break_threshold + 1
    # `改ページ' を使うと見出し付与等で混乱するので自動生成したものは区別する
    data.gsub!(/\n{#{threshold},}/, "\n［＃改頁］\n")
  end
end

#convert_rome_numeric(data) ⇒ `Object`

ローマ数字っぽいアルファベットをローマ数字に変換

※alphabet_to_zenkaku の前に実行する必要あり

# File 'lib/converterbase.rb', line 305

def convert_rome_numeric(data)
  ROME_NUM_ALPHABET.each_with_index do |rome, i|
    data.gsub!(/([^a-zA-Z])#{rome}([^a-zA-Z])/, "\\1#{ROME_NUM[i]}\\2")
  end
end

#convert_special_characters(data) ⇒ `Object`

特定の表現・記号を変換していく

# File 'lib/converterbase.rb', line 314

def convert_special_characters(data)
  stash_kome(data)
  convert_double_angle_quotation_to_gaiji(data)   # 最初からギュメなのはルビ対象外なので外字注記に
  symbols_to_zenkaku(data)
  convert_tatechuyoko(data)
  convert_novel_rule(data)
  convert_arrow(data)
end

#convert_tatechuyoko(data) ⇒ `Object`

縦中横にすべき表現を変換

# File 'lib/converterbase.rb', line 352

def convert_tatechuyoko(data)
  # 感嘆符及び疑問符の縦中横化
  # AozoraEPUB3の縦中横設定を使えば明示的に注記を使う必要はないが、
  # 見出しの中では自動で縦中横にはならないため、明示的指定をしておく
  # 事前に !? は全角にしておく
  data.gsub!(/！+/) do |match|
    if "#{$`[-1]}#{$'[0]}".include?("？")
      next match
    end
    len = match.length
    if len == 3
      tcy("!!!")
    elsif len >= 4
      # 4個以上なら偶数になるように調整（奇数だった場合増やす方向（+1））して2個ずつ縦中横
      len += 1 if len.odd?
      tcy("!!") * (len / 2)
    else
      match
    end
  end
  data.gsub!(/[！？]+/) do |match|
    case match.length
    when 2
      tcy(match.tr("！？", "!?"))
    when 3
      # 見た目的にこのパターンだけ縦中横化を許容する
      if %w(！！？ ？！！).find { |v| v == match }
        tcy(match.tr("！？", "!?"))
      else
        match
      end
    else
      match
    end
  end
end

#delay_outputs(data = "") ⇒ `Object`

# File 'lib/converterbase.rb', line 60

def delay_outputs(data = "")
  unless @request_skip_output_line
    @delay_outputs_buffer << data + "\n"
  end
end

#delete_dust_char(data) ⇒ `Object`

表示上化けてしまうゴミ削除

# File 'lib/converterbase.rb', line 1015

def delete_dust_char(data)
  data.gsub!("︎", "")
  data.gsub!("︎", "")
end

#enchant_midashi(data) ⇒ `Object`

［＃改ページ］直後の行を見出しに設定する

# File 'lib/converterbase.rb', line 948

def enchant_midashi(data)
  def midashi(str)
    midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
    @inspector.subtitle = midashi_title
    "［＃３字下げ］［＃ここから中見出し］#{midashi_title}［＃ここで中見出し終わり］"
  end

  data.gsub!(/［＃改ページ］\n(.+?)\n/) do |match|
    m1 = $1
    rest = $'
    if $1 =~ /#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}/
      match
    else
      # 見出しの次の行が空行ではない場合空行を追加する
      add_tail = rest =~ /\A$/ ? "" : "\n\n"
      "［＃改ページ］\n#{midashi(m1)}\n#{add_tail}"
    end
  end
  data.gsub!(/(［＃改ページ］\n)(#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}\n)(.+?\n)/m) do
    m1, m2, m3 = $1, $2, $3
    add_tail = $' =~ /\A$/ ? "" : "\n"
    "#{m1 + midashi(m3) + m2}#{add_tail}"   # 前書き→見出しの順番を見出し→前書きに入れ替えて置換
  end
end

#erase_comments_block(data) ⇒ `Object`

コメントブロックを削除する



509
510
511

# File 'lib/converterbase.rb', line 509

def erase_comments_block(data)
  data.gsub!(/^-{50,}\n.*^-{50,}\n/m, "")
end

#erase_introduction(data) ⇒ `Object`

前書きを削除する

# File 'lib/converterbase.rb', line 976

def erase_introduction(data)
  del_count = 0
  data.gsub!(/(［＃改ページ］)\n#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("前書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#erase_postscript(data) ⇒ `Object`

後書きを削除する

# File 'lib/converterbase.rb', line 990

def erase_postscript(data)
  del_count = 0
  data.gsub!(/#{AUTHOR_COMMENT_CHUKI[:postscript][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:postscript][:close]}\n(［＃改ページ］|\z)/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("後書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

アラビア数字を使うべきところはアラビア数字に戻す

# File 'lib/converterbase.rb', line 224

def exception_reconvert_kanji_to_num(data)
  return unless @setting.enable_convert_num_to_kanji
  data.gsub!(/([Ａ-Ｚａ-ｚ])([#{KANJI_NUM}・～]+)/) do   # ｖｅｒ１・０１ のようなパターンも許容する
    $1 + $2.tr(KANJI_NUM, "０-９")
  end
  data.gsub!(/([#{KANJI_NUM}・～]+)([Ａ-Ｚａ-ｚ#{RECONVERT_KANJI_TO_NUM_PATTERN_UNIT}])/) do
    $1.tr(KANJI_NUM, "０-９") + $2
  end
end

#find_introduction? ⇒ `Boolean`

前書きの検出

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 678

def find_introduction?
  pos = @read_fp.pos
  result = false
  @read_fp.each do |line|
    break if page_break?(line)
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
      break
    end
  end
  @read_fp.pos = pos
  result
end

#force_indent_special_chapter(line) ⇒ `Object`

章見出しっぽい文字列を字下げして前後に空行を入れる

TODO: 半角数字の縦中横注記をいれた影響で、2桁の半角数字が認識されてないのをどうにかする

# File 'lib/converterbase.rb', line 579

def force_indent_special_chapter(line)
  line.sub!(/^(?:[ 　\t]|［＃二分アキ］)*([－―<＜〈-]*)([0-9０-９#{KANJI_NUM}]+)([－―>＞〉-]*)$/) do
    @request_insert_blank_next_line = true
    top, chapter, bottom = $1, $2, $3
    if top != "" && "―－-".include?(top)
      top = "― "
      bottom = " ―"
    end
    (blank_line?(@before_line) ? "" : "\n") + "　　　［＃ゴシック体］" + \
    top + hankaku_num_to_zenkaku_num(zenkaku_num_to_hankaku_num(chapter)) + bottom + "［＃ゴシック体終わり］"
  end
end

#half_indent_bracket(data) ⇒ `Object`

行頭かぎ括弧(等)に二分アキを追加する

「や（などの前にカスタム注記（［＃二分アキ］）を追加し、半文字分字下げする(二分アキ)。 kindle paperwhite で鍵括弧のインデントがおかしいことへの対応



548
549
550

# File 'lib/converterbase.rb', line 548

def half_indent_bracket(data)
  data.gsub!(HALF_INDENT_TARGET, "［＃二分アキ］\\1") if @setting.enable_half_indent_bracket
end

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

半角アラビア数字の全角化

1桁、3桁以上：全角化 2桁：縦中横化

# File 'lib/converterbase.rb', line 526

def hankaku_num_to_zenkaku_num(data)
  data.gsub!(/\d+/) do |num|
    if num.length == 2
      tcy(num)
    elsif num.length == 3 && @text_type == "subtitle" && $`.empty?
      tcy(num)
    else
      num.tr("0-9", "０-９")
    end
  end
  data
end

#hankakukana_to_zenkakukana(data) ⇒ `Object`

半角カナと｢｣｡､･等を全角に変換



326
327
328

# File 'lib/converterbase.rb', line 326

def hankakukana_to_zenkakukana(data)
  data.replace(NKF.nkf("-wWX", data).tr("\u2014", "―"))
end

#inclusion_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 692

def inclusion_author_comment_block?(line)
  result = false
  if page_break?(line)
    if find_introduction?
      @in_author_comment_block = :introduction
      result = true
    end
  elsif line =~ AUTHOR_POSTSCRIPT_SPLITTER
    @in_author_comment_block = :postscript
    result = true
  end
  result
end

#initialize_member_values ⇒ `Object`

# File 'lib/converterbase.rb', line 40

def initialize_member_values
  @request_insert_blank_next_line = false
  @request_skip_output_line = false
  @before_line = ""
  @delay_outputs_buffer = ""
  @in_comment_block = false
  @english_sentences = []
  @url_list = []
  @illust_chuki_list = []
  @kanji_num_list = {}
  @num_and_comma_list = {}
  @in_author_comment_block = nil
end

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

■などの区切りの前後には空行が必ず存在するようにする

# File 'lib/converterbase.rb', line 613

def insert_blank_line_to_border_symbol(line)
  result = ""
  if border_symbol?(line)
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
    jisage(line, 4)
  end
  line.sub!(/\A/, result)
end

#insert_separate_space(data) ⇒ `Object`

特定の記号の直後は全角アキを挿入する

# File 'lib/converterbase.rb', line 277

def insert_separate_space(data)
  data.gsub!(/([!?！？]+)([^!?！？])/) do
    m1, m2 = $1, $2
    m2 = "　" if m2 == " "
    if m2 =~ /[^」］\]』】〉》〕＞>≫)）"”’〟　☆★♪［―]/
      "#{m1}　#{m2}"
    else
      "#{m1}#{m2}"
    end
  end
end

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

Returns:

(Boolean)



817
818
819

# File 'lib/converterbase.rb', line 817

def is_sesame?(str, ten, last_char)
  ten =~ /^[・、]+$/ && (str.include?("｜") || object_of_ruby?(last_char))
end

#jisage(line, num) ⇒ `Object`

行頭空白を考慮した字下げ



595
596
597

# File 'lib/converterbase.rb', line 595

def jisage(line, num)
  line.sub!(/^[ 　\t]*/, "　" * num)
end

#join_inner_bracket(str) ⇒ `Object`

改行を連結した文章を作る

改行がひとつもなかった場合は nil を返す

# File 'lib/converterbase.rb', line 742

def join_inner_bracket(str)
  joined_str = str.dup
  return nil if str.count("\n") == 0
  joined_str.gsub!(/([…―])\n/, "\\1。\n")
  joined_str = joined_str.split("\n").map { |s|
    s.sub(/^　+/, "")
  }.join
  joined_str
end

#kanji_num_to_integer(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 173

def kanji_num_to_integer(string)
  total = 0
  string.scan(/([#{KANJI_NUM}十百千]+)([万億兆京]*)/) do |num, units|
    total += (__calc_kanji_num_with_unit(num).to_s + units.each_char.map { |c| "0" * KANJI_NUM_UNITS_DIGIT[c] }.join).to_i
  end
  total
end

#leave_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 706

def leave_author_comment_block?(line)
  result = false
  case @in_author_comment_block
  when :introduction
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
    end
  when :postscript
    if page_break?(line)
      result = true
    end
  end
  result
end

#midashi(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 949

def midashi(str)
  midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
  @inspector.subtitle = midashi_title
  "［＃３字下げ］［＃ここから中見出し］#{midashi_title}［＃ここで中見出し終わり］"
end

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する



941
942
943

# File 'lib/converterbase.rb', line 941

def modify_kana_ni_to_kanji_ni(data)
  data.gsub!(/([^ァ-ヶー])ニ([^ァ-ヶー])/, "\\1二\\2")
end

#narou_ruby(data) ⇒ `Object`

小説家になろうのルビ対策

# File 'lib/converterbase.rb', line 796

def narou_ruby(data)
  if @text_type != "subtitle" && @text_type != "chapter"
    # 《》なルビの対処
    data.gsub!(/(.+?)≪([^≪]+?)≫/) do |match|
      to_ruby(match, $1, $2, ["≪", "≫"])
    end
    # （）なルビの対処
    data.gsub!(/(.+?)（([ぁ-んァ-ヶーゝゞ・Ａ-Ｚａ-ｚA-Za-z　]{,20})）/) do |match|
      to_ruby(match, $1, $2, ["（", "）"])
    end
  end
  data.replace(replace_tatesen(data))
  data.gsub!("［＃ルビ用縦線］", "｜")
end

#num_to_kanji(data) ⇒ `Object`

アラビア数字を漢数字に

カンマ区切りの数字はアラビア数字のままにしておくもともと漢数字なのは他の変換を受けないように退避させておく

# File 'lib/converterbase.rb', line 94

def num_to_kanji(data)
  stash_kanji_num(data)
  data.gsub!(/[\d０-９,，]+/) do |match|
    if match =~ /[,，]/
      if match =~ /[\d]/
        stash_hankaku_num_and_comma(match.tr("，", ","))
      else
        match
      end
    else
      zenkaku_num_to_kanji(match.tr("0-9", KANJI_NUM))
    end
  end
  data
end

#object_of_ruby?(char) ⇒ `Boolean`

Returns:

(Boolean)



813
814
815

# File 'lib/converterbase.rb', line 813

def object_of_ruby?(char)
  char =~ /[#{CHARACTER_OF_RUBY}]/
end

#outputs(data = "", force = false) ⇒ `Object`

# File 'lib/converterbase.rb', line 54

def outputs(data = "", force = false)
  if !@request_skip_output_line || force
    @write_fp.puts(data)
  end
end

#page_break?(line) ⇒ `Boolean`

改ページある？

Returns:

(Boolean)



628
629
630

# File 'lib/converterbase.rb', line 628

def page_break?(line)
  line =~ /［＃改ページ］/
end

#process_author_comment(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 647

def process_author_comment(line)
  if @setting.enable_author_comments
    if @in_author_comment_block
      if leave_author_comment_block?(line)
        outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
        if @in_author_comment_block == :introduction
          @request_skip_output_line = true
          line.clear
          @in_author_comment_block = nil
        elsif @in_author_comment_block == :postscript
          @in_author_comment_block = nil
          # ［＃改ページ］（前書きの開始位置）を検出したため、
          # 改めて前書きの検出をする
          process_author_comment(line)
        end
      end
    else
      if inclusion_author_comment_block?(line)
        # outputs を使うと改ページより前に注記が入ってしまうため、
        # delay_outputs を使って出力を line 出力の後に遅らせる
        delay_outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:open]) 
        if @in_author_comment_block == :postscript
          @request_skip_output_line = true
          line.clear
        end
      end
    end
  end
end

#rebuild_english_sentences(data) ⇒ `Object`

英文を再構成する

# File 'lib/converterbase.rb', line 487

def rebuild_english_sentences(data)
  @english_sentences.each_with_index do |sentence, id|
    data.sub!("［＃英文＝#{convert_numbers(id.to_s)}］", sentence)
  end
end

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 117

def rebuild_hankaku_num_and_comma(data)
  data.gsub!(/［＃半角数字＝(.+?)］/) do
    @num_and_comma_list[$1.to_i]
  end
end

#rebuild_illust(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 913

def rebuild_illust(data)
  @illust_chuki_list.each_with_index do |chuki, id|
    data.sub!("［＃挿絵＝#{convert_numbers(id.to_s)}］", chuki)
  end
end

#rebuild_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 133

def rebuild_kanji_num(data)
  data.gsub!(/［＃漢数字＝(.+?)］/) do
    @kanji_num_list[$1]
  end
end

#rebuild_kome_to_gaiji(data) ⇒ `Object`

※の外字注記化

stash_kome で2つにしておいた※を外字注記化する



420
421
422

# File 'lib/converterbase.rb', line 420

def rebuild_kome_to_gaiji(data)
  data.gsub!("※※", "※［＃米印、1-2-8］")
end

#rebuild_url(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 894

def rebuild_url(data)
  @url_list.each_with_index do |url, id|
    data.sub!("［＃ＵＲＬ＝#{convert_numbers(id.to_s)}］",
              "<a href=\"#{url}\">#{url}</a>")
  end
end

#replace_by_replace_txt(text) ⇒ `Object`

replace.txt により単純置換

# File 'lib/converterbase.rb', line 1157

def replace_by_replace_txt(text)
  @setting.replace_pattern.each do |pattern|
    src, dst = pattern
    text.gsub!(src, dst)
  end
end

#replace_illust_tag(data) ⇒ `Object`

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する

# File 'lib/converterbase.rb', line 905

def replace_illust_tag(data)
  @illustration.scanner(data) do |chuki|
    next "" unless @setting.enable_illust
    @illust_chuki_list << chuki
    "［＃挿絵＝#{@illust_chuki_list.count - 1}］\n"
  end
end

#replace_narou_tag(data) ⇒ `Object`

小説家になろう専用タグを置換

# File 'lib/converterbase.rb', line 292

def replace_narou_tag(data)
  data.gsub!("【改ページ】", "")
  data.gsub!(/<KBR>/i, "\n")
  data.gsub!(/<PBR>/i, "\n")
end

#replace_tatesen(str) ⇒ `Object`



836
837
838

# File 'lib/converterbase.rb', line 836

def replace_tatesen(str)
  str.gsub("｜", "※［＃縦線］")
end

#replace_url(data) ⇒ `Object`

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない

# File 'lib/converterbase.rb', line 887

def replace_url(data)
  data.gsub!(URI.regexp) do |match|
    @url_list << match
    "［＃ＵＲＬ＝#{@url_list.count - 1}］"
  end
end

#rstrip_all_lines(data) ⇒ `Object`

すべての行の行末空白を削除



69
70
71

# File 'lib/converterbase.rb', line 69

def rstrip_all_lines(data)
  data.gsub(/[ 　\t]+$/m, "")
end

#ruby_youon_to_big(ruby) ⇒ `Object`

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする

# File 'lib/converterbase.rb', line 868

def ruby_youon_to_big(ruby)
  result = ruby
  if @setting.enable_ruby_youon_to_big
    result = ruby.tr("ぁぃぅぇぉゃゅょゎっァィゥェォャュョヮッヵヶ",
                     "あいうえおやゆよわつアイウエオヤユヨワツカケ")
  end
  result
end

#sesame(str, ten) ⇒ `Object`

# File 'lib/converterbase.rb', line 821

def sesame(str, ten)
  if str.include?("｜")
    str.sub("｜", "［＃傍点］") + "［＃傍点終わり］"
  else
    str.sub(/([#{CHARACTER_OF_RUBY}　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃傍点］#{match_target[$1.length..-1]}"
      else
        "［＃傍点］#{match_target}"
      end
    } + "［＃傍点終わり］"
  end
end

#stash_hankaku_num_and_comma(num) ⇒ `Object`

# File 'lib/converterbase.rb', line 110

def stash_hankaku_num_and_comma(num)
  @@num_and_comma_list_counter ||= 0
  @@num_and_comma_list_counter += 1
  @num_and_comma_list[@@num_and_comma_list_counter] = num
  "［＃半角数字＝#{@@num_and_comma_list_counter}］"
end

#stash_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 123

def stash_kanji_num(data)
  data.gsub!(/[#{KANJI_NUM}十百千万億兆京]+/).with_index do |match, i|
    if "#{$`[-1]}#{$'[0]}" =~ /[\d０-９]/
      next match
    end
    @kanji_num_list[convert_numbers(i.to_s)] = match
    "［＃漢数字＝#{i}］"
  end
end

#stash_kome(data) ⇒ `Object`

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする



403
404
405

# File 'lib/converterbase.rb', line 403

def stash_kome(data)
  data.gsub!("※", "※※")
end

#symbols_to_zenkaku(data) ⇒ `Object`

半角記号を全角に変換

# File 'lib/converterbase.rb', line 333

def symbols_to_zenkaku(data)
  data.tr!("“”‘’〝〟", %!""''""!)
  data.gsub!(/"([^"\n]+)"/, "〝\\1〟")
  data.gsub!(/'([^'\n]+)'/, "〝\\1〟")   # MEMO: シングルミュート(ノノカギ)を表示出来るフォントはほとんど無い
  data.tr!("-=+/*《》'\"%$#&!?<>＜＞()|‐,._;:[]",
           "－＝＋／＊≪≫’”％＄＃＆！？〈〉〈〉（）｜－，．＿；：［］")
  data.gsub!("\\", "￥")
end

#tcy(str) ⇒ `Object`

縦中横注記取得



345
346
347

# File 'lib/converterbase.rb', line 345

def tcy(str)
  "［＃縦中横］#{str}［＃縦中横終わり］"
end

#to_ruby(match, m1, m2, openclose_symbols) ⇒ `Object`

# File 'lib/converterbase.rb', line 840

def to_ruby(match, m1, m2, openclose_symbols)
  last_char = m1[-1]
  case
  when last_char == "｜"
    # 直前に｜がある場合ルビ化は抑制される
    "#{m1[0...-1]}#{openclose_symbols[0]}#{m2}#{openclose_symbols[1]}"
  when is_sesame?(m1, m2, last_char)
    sesame(m1, m2)
  when m1.include?("｜")
    "#{m1.sub(/｜([^｜]*)$/, "［＃ルビ用縦線］\\1")}《#{m2}》"
  when object_of_ruby?(last_char)
    # なろうのルビ対象文字を辿って｜を挿入する（青空文庫となろうのルビ仕様の差異吸収のため）
    m1.sub(/([#{CHARACTER_OF_RUBY}　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃ルビ用縦線］#{match_target[$1.length..-1]}"
      else
        "［＃ルビ用縦線］#{match_target}"
      end
    } + "《#{ruby_youon_to_big(m2)}》"
  else
    match
  end
end

#zenkaku_num_to_hankaku_num(num) ⇒ `Object`

全角数字を半角アラビア数字に



516
517
518

# File 'lib/converterbase.rb', line 516

def zenkaku_num_to_hankaku_num(num)
  num.tr("０-９#{KANJI_NUM}", "0-90-9")
end

#zenkaku_num_to_kanji(str) ⇒ `Object`

全角アラビア数字を漢数字に



142
143
144

# File 'lib/converterbase.rb', line 142

def zenkaku_num_to_kanji(str)
  str.tr("０-９", KANJI_NUM)
end

#zenkaku_rstrip(line) ⇒ `Object`

全角版 String#rstrip!



880
881
882

# File 'lib/converterbase.rb', line 880

def zenkaku_rstrip(line)
  line.gsub!(/[　\s]+\z/, "")
end

Class: ConverterBase

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(setting, inspector, illustration) ⇒ ConverterBase

Instance Attribute Details

#use_dakuten_font ⇒ Object (readonly)

Class Method Details

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ Object

#__calc_sum_unit(units) ⇒ Object

#after(io, text_type) ⇒ Object

#after_convert(io) ⇒ Object

#alphabet_to_zenkaku(data, force = false) ⇒ Object

#author_comment_force_close ⇒ Object

#auto_indent(data) ⇒ Object

#auto_join_in_brackets(data) ⇒ Object

#auto_join_line(data) ⇒ Object

#before(io, text_type) ⇒ Object

#before_convert(io) ⇒ Object

#blank_line?(line) ⇒ Boolean

#border_symbol?(line) ⇒ Boolean

#comments_block?(line) ⇒ Boolean

#convert(text, text_type) ⇒ Object

#convert_arrow(data) ⇒ Object

#convert_dakuten_char_to_font(data) ⇒ Object

#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

#convert_for_all_data(data) ⇒ Object

#convert_fraction_and_date(data) ⇒ Object

#convert_horizontal_ellipsis(data) ⇒ Object

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

#convert_main(io) ⇒ Object

#convert_novel_rule(data) ⇒ Object

#convert_numbers(data) ⇒ Object

#convert_page_break(data) ⇒ Object

#convert_rome_numeric(data) ⇒ Object

#convert_special_characters(data) ⇒ Object

#convert_tatechuyoko(data) ⇒ Object

#delay_outputs(data = "") ⇒ Object

#delete_dust_char(data) ⇒ Object

#enchant_midashi(data) ⇒ Object

#erase_comments_block(data) ⇒ Object

#erase_introduction(data) ⇒ Object

#erase_postscript(data) ⇒ Object

#exception_reconvert_kanji_to_num(data) ⇒ Object

#find_introduction? ⇒ Boolean

#force_indent_special_chapter(line) ⇒ Object

#half_indent_bracket(data) ⇒ Object

#hankaku_num_to_zenkaku_num(data) ⇒ Object

#hankakukana_to_zenkakukana(data) ⇒ Object

#inclusion_author_comment_block?(line) ⇒ Boolean

#initialize_member_values ⇒ Object

#insert_blank_line_to_border_symbol(line) ⇒ Object

#insert_separate_space(data) ⇒ Object

#is_sesame?(str, ten, last_char) ⇒ Boolean

#jisage(line, num) ⇒ Object

#join_inner_bracket(str) ⇒ Object

#kanji_num_to_integer(string) ⇒ Object

#leave_author_comment_block?(line) ⇒ Boolean

#midashi(str) ⇒ Object

#modify_kana_ni_to_kanji_ni(data) ⇒ Object

#narou_ruby(data) ⇒ Object

#num_to_kanji(data) ⇒ Object

#object_of_ruby?(char) ⇒ Boolean

#outputs(data = "", force = false) ⇒ Object

#page_break?(line) ⇒ Boolean

#process_author_comment(line) ⇒ Object

#rebuild_english_sentences(data) ⇒ Object

#rebuild_hankaku_num_and_comma(data) ⇒ Object

#rebuild_illust(data) ⇒ Object

#rebuild_kanji_num(data) ⇒ Object

#rebuild_kome_to_gaiji(data) ⇒ Object

#rebuild_url(data) ⇒ Object

#replace_by_replace_txt(text) ⇒ Object

#replace_illust_tag(data) ⇒ Object

#replace_narou_tag(data) ⇒ Object

#replace_tatesen(str) ⇒ Object

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

#use_dakuten_font ⇒ `Object` (readonly)

.rebuild_brackets(data, stack) ⇒ `Object`

#__calc_kanji_num_with_unit(string) ⇒ `Object`

#__calc_sum_unit(units) ⇒ `Object`

#after(io, text_type) ⇒ `Object`

#after_convert(io) ⇒ `Object`

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

#author_comment_force_close ⇒ `Object`

#auto_indent(data) ⇒ `Object`

#auto_join_in_brackets(data) ⇒ `Object`

#auto_join_line(data) ⇒ `Object`

#before(io, text_type) ⇒ `Object`

#before_convert(io) ⇒ `Object`

#blank_line?(line) ⇒ `Boolean`

#border_symbol?(line) ⇒ `Boolean`

#comments_block?(line) ⇒ `Boolean`

#convert(text, text_type) ⇒ `Object`

#convert_arrow(data) ⇒ `Object`

#convert_dakuten_char_to_font(data) ⇒ `Object`

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

#convert_for_all_data(data) ⇒ `Object`

#convert_fraction_and_date(data) ⇒ `Object`

#convert_horizontal_ellipsis(data) ⇒ `Object`

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

#convert_main(io) ⇒ `Object`

#convert_novel_rule(data) ⇒ `Object`

#convert_numbers(data) ⇒ `Object`

#convert_page_break(data) ⇒ `Object`

#convert_rome_numeric(data) ⇒ `Object`

#convert_special_characters(data) ⇒ `Object`

#convert_tatechuyoko(data) ⇒ `Object`

#delay_outputs(data = "") ⇒ `Object`

#delete_dust_char(data) ⇒ `Object`

#enchant_midashi(data) ⇒ `Object`

#erase_comments_block(data) ⇒ `Object`

#erase_introduction(data) ⇒ `Object`

#erase_postscript(data) ⇒ `Object`

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

#find_introduction? ⇒ `Boolean`

#force_indent_special_chapter(line) ⇒ `Object`

#half_indent_bracket(data) ⇒ `Object`

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

#hankakukana_to_zenkakukana(data) ⇒ `Object`

#inclusion_author_comment_block?(line) ⇒ `Boolean`

#initialize_member_values ⇒ `Object`

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

#insert_separate_space(data) ⇒ `Object`

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

#jisage(line, num) ⇒ `Object`

#join_inner_bracket(str) ⇒ `Object`

#kanji_num_to_integer(string) ⇒ `Object`

#leave_author_comment_block?(line) ⇒ `Boolean`

#midashi(str) ⇒ `Object`

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

#narou_ruby(data) ⇒ `Object`

#num_to_kanji(data) ⇒ `Object`

#object_of_ruby?(char) ⇒ `Boolean`

#outputs(data = "", force = false) ⇒ `Object`

#page_break?(line) ⇒ `Boolean`

#process_author_comment(line) ⇒ `Object`

#rebuild_english_sentences(data) ⇒ `Object`

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

#rebuild_illust(data) ⇒ `Object`

#rebuild_kanji_num(data) ⇒ `Object`

#rebuild_kome_to_gaiji(data) ⇒ `Object`

#rebuild_url(data) ⇒ `Object`

#replace_by_replace_txt(text) ⇒ `Object`

#replace_illust_tag(data) ⇒ `Object`

#replace_narou_tag(data) ⇒ `Object`

#replace_tatesen(str) ⇒ `Object`

#replace_url(data) ⇒ `Object`

#rstrip_all_lines(data) ⇒ `Object`

#ruby_youon_to_big(ruby) ⇒ `Object`

#sesame(str, ten) ⇒ `Object`

#stash_hankaku_num_and_comma(num) ⇒ `Object`

#stash_kanji_num(data) ⇒ `Object`

#stash_kome(data) ⇒ `Object`

#symbols_to_zenkaku(data) ⇒ `Object`

#tcy(str) ⇒ `Object`