Class: ConverterBase

Inherits:

Object

Object
ConverterBase

show all

Defined in:: lib/converterbase.rb

Constant Summary collapse

KANJI_NUM =

"〇一二三四五六七八九"

ENGLISH_SENTENCES_CHARACTERS =

/[\w.,!?'" &:;_-]+/

ENGLISH_SENTENCES_MIN_LENGTH = この文字数以上アルファベットが続くと半角のまま

KANJI_NUM_UNITS =

%w(万 億 兆 京).unshift("")

KANJI_KURAI =

%w(十 百 千).unshift("")

KANJI_NUM_UNITS_DIGIT =

{
  "十" => 1, "百" => 2, "千" => 3, "万" => 4, "億" => 8, "兆" => 12, "京" => 16
}

RECONVERT_KANJI_TO_NUM_PATTERN_UNIT =

"％㎜㎝㎞㎎㎏㏄㎡㎥"

ROME_NUM_ALPHABET =

%w(II III IV VI VII VIII IX ii iii iv vi vii viii ix)

ROME_NUM =

%w(Ⅱ Ⅲ Ⅳ Ⅵ Ⅶ Ⅷ Ⅸ ⅱ ⅲ ⅳ ⅵ ⅶ ⅷ ⅸ)

SINGLE_MINUTE_FAMILY = ミュート（ノノカギ）化する記号定義

%!‘’'!

DOUBLE_MINUTE_FAMILY =

%!“”〝〟"!

HALF_INDENT_TARGET =

/^[ 　\t]*((?:[〔「『(（【〈《≪〝])|(?:※［＃始め二重山括弧］))/

FULL_INDENT_TARGET =

/^[ 　\t]*(――)/

AUTO_INDENT_IGNORE_INDENT_CHAR =

Inspector::IGNORE_INDENT_CHAR.sub("・", "")

AUTHOR_INTRODUCTION_SPLITTER = 前書き・後書きの検出及び処理 ==============================

/^　*[\*＊]{44}$/

AUTHOR_POSTSCRIPT_SPLITTER =

/^　*[\*＊]{48}$/

AUTHOR_COMMENT_CHUKI =

{
  introduction: {
    open: "［＃ここから前書き］", close: "［＃ここで前書き終わり］"
  },
  postscript: {
    open: "［＃ここから後書き］", close: "［＃ここで後書き終わり］"
  }
}

BRACKETS =

[%w(「 」), %w(『 』)]

OPENCLOSE_REGEXPS = ネストに対応したかぎ括弧の正規表現

BRACKETS.map { |bracket|
  bo, bc = bracket
  /(?<oc>#{bo}[^#{bo+bc}]*(?:\g<oc>[^#{bo+bc}]*)*#{bc})/m
}

CHARACTER_OF_RUBY =

"一-龠Ａ-Ｚａ-ｚA-Za-z"

AUTO_RUBY_CHARACTERS =

"([ぁ-んァ-ヶーゝゞ・ 　]{,20})"

KANA =

"ァ-ヶー"

WORD_SEPARATOR = zws = zero width space

"［＃zws］"

DASH_FILES =

%w(singledash.png doubledash.png)

Instance Attribute Summary collapse

#current_index ⇒ Object

現在処理してる subtitles 内でのインデックス.
#data_type ⇒ Object

Returns the value of attribute data_type.
#output_text_dir ⇒ Object

Returns the value of attribute output_text_dir.
#subtitles ⇒ Object

Returns the value of attribute subtitles.
#use_dakuten_font ⇒ Object readonly

Returns the value of attribute use_dakuten_font.

Class Method Summary collapse

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Summary collapse

#__calc_kanji_num_with_unit(string) ⇒ Object
#__calc_sum_unit(units) ⇒ Object
#after(io, text_type) ⇒ Object
#after_convert(io) ⇒ Object
#alphabet_to_zenkaku(data, force = false) ⇒ Object

半角アルファベットを全角に変換する.
#author_comment_force_close ⇒ Object
#auto_indent(data) ⇒ Object

行頭字下げ.
#auto_join_in_brackets(data) ⇒ Object

かぎ括弧内自動連結.
#auto_join_line(data) ⇒ Object

手動折り返しの自動連結.
#before(io, text_type) ⇒ Object
#before_convert(io) ⇒ Object
#blank_line?(line) ⇒ Boolean
#border_symbol?(line) ⇒ Boolean
#calc_cr_count(str) ⇒ Object
#comments_block?(line) ⇒ Boolean

コメントブロックを検出する.
#convert(text, text_type) ⇒ Object
#convert_arrow(data) ⇒ Object

おかしくなりやすい矢印文字の変換.
#convert_dakuten_char_to_font(data) ⇒ Object

濁点のついてない文字に濁点をつける表現を対応.
#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

ギュメを二重山括弧（の外字）に変換.
#convert_for_all_data(data) ⇒ Object

小説データ全体に対して施す変換.
#convert_fraction_and_date(data) ⇒ Object

分数表記を○分の○表記に変更、及び日付表記を検出.
#convert_head_half_spaces(data) ⇒ Object

間違えて行頭字下げに半角スペースを使ってるっぽいのを全角スペースにする.
#convert_horizontal_ellipsis(data) ⇒ Object

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換.
#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

漢数字を単位を使った表現に変換.
#convert_main(io) ⇒ Object

変換処理本体.
#convert_novel_rule(data) ⇒ Object

小説のルールに沿うように変換.
#convert_numbers(data) ⇒ Object

数字の変換.
#convert_page_break(data) ⇒ Object

一定以上の連続する空行を改ページに変換.
#convert_rome_numeric(data) ⇒ Object

ローマ数字っぽいアルファベットをローマ数字に変換.
#convert_special_characters(data) ⇒ Object

特定の表現・記号を変換していく.
#convert_tatechuyoko(data) ⇒ Object

縦中横にすべき表現を変換.
#copy_dash_images_to_local_setting_dir ⇒ Object
#dash_image_relative_paths(base_dir, output_text_dir) ⇒ Object
#delay_outputs(data = "") ⇒ Object
#delete_dust_char(data) ⇒ Object

表示上化けてしまうゴミ削除.
#double_dash_to_image(text, output_text_dir) ⇒ Object
#enchant_midashi(data) ⇒ Object

［＃改ページ］直後の行を見出しに設定する.
#erase_comments_block(data) ⇒ Object

コメントブロックを削除する.
#erase_introduction(data) ⇒ Object

前書きを削除する.
#erase_postscript(data) ⇒ Object

後書きを削除する.
#exception_reconvert_kanji_to_num(data) ⇒ Object

アラビア数字を使うべきところはアラビア数字に戻す.
#find_introduction? ⇒ Boolean

前書きの検出.
#force_indent_special_chapter(data) ⇒ Object

章見出しっぽい文字列を字下げする.
#half_indent_bracket(data) ⇒ Object

行頭かぎ括弧(等)に二分アキを追加する.
#hankaku_num_to_zenkaku_num(data) ⇒ Object

半角アラビア数字の全角化.
#hankakukana_to_zenkakukana(data) ⇒ Object

半角カナと｢｣｡､･等を全角に変換.
#inclusion_author_comment_block?(line) ⇒ Boolean
#initialize(setting, inspector, illustration) ⇒ ConverterBase constructor

A new instance of ConverterBase.
#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object
#insert_blank_line_to_border_symbol(line) ⇒ Object

■などの区切りの前後には空行が必ず存在するようにする.
#insert_char_separator(str) ⇒ Object

文字単位でzwsを挿入する.
#insert_separate_space(data) ⇒ Object

特定の記号の直後は全角アキを挿入する.
#insert_separator_for_selection(str) ⇒ Object

Kindle端末で単語選択がしやすいように０幅スペースを挿入する.
#insert_word_separator(str) ⇒ Object

単語単位でzwsを挿入する.
#is_sesame?(str, ten, last_char) ⇒ Boolean
#jisage(line, num) ⇒ Object

行頭空白を考慮した字下げ.
#join_inner_bracket(str) ⇒ Object

改行を連結した文章を作る.
#kanji_num_to_integer(string) ⇒ Object
#leave_author_comment_block?(line) ⇒ Boolean
#midashi(str) ⇒ Object
#modify_kana_ni_to_kanji_ni(data) ⇒ Object

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する.
#narou_ruby(data) ⇒ Object

小説家になろうのルビ対策.
#num_to_kanji(data) ⇒ Object

アラビア数字を漢数字に.
#object_of_ruby?(char) ⇒ Boolean
#outputs(data = "", force = false) ⇒ Object
#page_break?(line) ⇒ Boolean

改ページある？.
#process_author_comment(line) ⇒ Object
#rebuild_english_sentences(data) ⇒ Object

英文を再構成する.
#rebuild_force_indent_special_chapter(data) ⇒ Object
#rebuild_hankaku_num_and_comma(data) ⇒ Object
#rebuild_illust(data) ⇒ Object
#rebuild_kanji_num(data) ⇒ Object
#rebuild_kome_to_gaiji(data) ⇒ Object

※の外字注記化.
#rebuild_url(data) ⇒ Object
#replace_by_replace_txt(text) ⇒ Object

replace.txt により単純置換.
#replace_illust_tag(data) ⇒ Object

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する.
#replace_narou_tag(data) ⇒ Object

小説家になろう専用タグを置換.
#replace_tatesen(str) ⇒ Object
#replace_url(data) ⇒ Object

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない.
#reset_member_values ⇒ Object

.convert が実行されるたびに呼ばれるメンバ変数リセット用メソッドインスタンス作成時に一度だけ初期化したい場合は initialize で初期化する.
#rstrip_all_lines(data) ⇒ Object

すべての行の行末空白を削除.
#ruby_youon_to_big(ruby) ⇒ Object

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする.
#sesame(str) ⇒ Object
#stash_hankaku_num_and_comma(num) ⇒ Object
#stash_kanji_num(data) ⇒ Object
#stash_kome(data) ⇒ Object

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする.
#symbols_to_zenkaku(data) ⇒ Object

半角記号を全角に変換.
#tcy(str) ⇒ Object

縦中横注記取得.
#to_ruby(match, m1, m2, openclose_symbols) ⇒ Object
#zenkaku_num_to_hankaku_num(num) ⇒ Object

全角数字(漢数字含む)を半角アラビア数字に.
#zenkaku_num_to_kanji(str) ⇒ Object

全角アラビア数字を漢数字に.
#zenkaku_rstrip(line) ⇒ Object

全角版 String#rstrip!.

Constructor Details

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

Returns a new instance of ConverterBase.

# File 'lib/converterbase.rb', line 38

def initialize(setting, inspector, illustration)
  @setting = setting
  @inspector = inspector
  @illustration = illustration
  @use_dakuten_font = false
  @output_text_dir = nil
  @subtitles = nil
  @data_type = "text"
  @current_index = 0
  reset_member_values
end

Instance Attribute Details

#current_index ⇒ `Object`

現在処理してる subtitles 内でのインデックス



22
23
24

# File 'lib/converterbase.rb', line 22

def current_index
  @current_index
end

#data_type ⇒ `Object`

Returns the value of attribute data_type.



21
22
23

# File 'lib/converterbase.rb', line 21

def data_type
  @data_type
end

#output_text_dir ⇒ `Object`

Returns the value of attribute output_text_dir.



21
22
23

# File 'lib/converterbase.rb', line 21

def output_text_dir
  @output_text_dir
end

#subtitles ⇒ `Object`

Returns the value of attribute subtitles.



21
22
23

# File 'lib/converterbase.rb', line 21

def subtitles
  @subtitles
end

#use_dakuten_font ⇒ `Object` (readonly)

Returns the value of attribute use_dakuten_font.



20
21
22

# File 'lib/converterbase.rb', line 20

def use_dakuten_font
  @use_dakuten_font
end

Class Method Details

.rebuild_brackets(data, stack) ⇒ `Object`

# File 'lib/converterbase.rb', line 842

def self.rebuild_brackets(data, stack)
  data.gsub(/［＃かぎ括弧＝(\d+)］/) do
    stack[$1.to_i]
  end
end

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 174

def __calc_kanji_num_with_unit(string)
  total = 0
  string.scan(/([#{KANJI_NUM}]*)([十百千]*)/) do |num, units|
    break if num + units == ""
    num = "1" if num.empty?
    num_tr = num.tr(KANJI_NUM, "0-9")
    if units.empty?
      total += num_tr.to_i
    else
      total += (num_tr + __calc_sum_unit(units).to_s[1, 99]).to_i
    end
  end
  total
end

#__calc_sum_unit(units) ⇒ `Object`

# File 'lib/converterbase.rb', line 168

def __calc_sum_unit(units)
  units.each_char.inject(0) do |sum, c|
    sum + ("1" + "0" * KANJI_NUM_UNITS_DIGIT[c]).to_i
  end
end

#after(io, text_type) ⇒ `Object`



34
35
36

# File 'lib/converterbase.rb', line 34

def after(io, text_type)
  io
end

#after_convert(io) ⇒ `Object`



1156
1157
1158

# File 'lib/converterbase.rb', line 1156

def after_convert(io)
  after(io, @text_type)
end

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

半角アルファベットを全角に変換する

force : 強制的に全アルファベットを全角にするか？

false の場合、英文章（半角スペースで区切られた2単語以上）を半角のままにする
英文の定義： 1. 半角スペースで区切られた２単語以上の文章、
             2. 一定以上の長さの一文字以上アルファベットを含む文章

# File 'lib/converterbase.rb', line 497

def alphabet_to_zenkaku(data, force = false)
  if force
    data.gsub!(/[a-zA-Z]+/) do |match|
      match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
    end
  else
    data.gsub!(ENGLISH_SENTENCES_CHARACTERS) do |match|
      if match.split(" ").size >= 2 \
         || (match.length >= ENGLISH_SENTENCES_MIN_LENGTH && match.match(/[a-z]/i))
        @english_sentences << match
        "［＃英文＝#{@english_sentences.size - 1}］"
      else
        match.tr("a-zA-Z", "ａ-ｚＡ-Ｚ")
      end
    end
  end
end

#author_comment_force_close ⇒ `Object`

# File 'lib/converterbase.rb', line 784

def author_comment_force_close
  if @in_author_comment_block
    outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
  end
end

#auto_indent(data) ⇒ `Object`

行頭字下げ

必ず下げなければいけないところは強制的に字下げ他の部分は全体的に判断して字下げ enable_force_indent が有効なら強制字下げ

# File 'lib/converterbase.rb', line 599

def auto_indent(data)
  data.gsub!(FULL_INDENT_TARGET, "　\\1")
  if @setting.enable_force_indent || (@setting.enable_auto_indent && @inspector.inspect_indent(data))
    data.gsub!(/^([^#{AUTO_INDENT_IGNORE_INDENT_CHAR}])/) do
      # 行頭に三点リーダーの代わりに連続中黒（・・・）が来た場合の対策
      # https://github.com/whiteleaf7/narou/issues/35
      # 行頭に中黒１個だけの場合はよくある表現なので字下げしない
      if $1 == "・" && $'[0] != "・"
        "・"
      else
        $1 == " " || $1 == "　" ? "　" : "　#{$1}"
      end
    end
  end
end

#auto_join_in_brackets(data) ⇒ `Object`

かぎ括弧内自動連結

# File 'lib/converterbase.rb', line 818

def auto_join_in_brackets(data)
  if !@setting.enable_auto_join_in_brackets && !@setting.enable_inspect
    return
  end
  OPENCLOSE_REGEXPS.each_with_index do |openclose, i|
    stack = {}
    data.gsub!(openclose).with_index do |match, j|
      joined_str = join_inner_bracket(match)
      if @setting.enable_auto_join_in_brackets && joined_str
        error = @inspector.validate_joined_inner_brackets(match, joined_str, BRACKETS[i])
        stack[j] = error ? match : joined_str
      else
        stack[j] = match
      end
      "［＃かぎ括弧＝#{j}］"
    end
    if @setting.enable_inspect
      # 正しく閉じてないかぎ括弧だけが data に残ってる
      @inspector.inspect_invalid_openclose_brackets(data, BRACKETS[i], stack)
    end
    data.replace(ConverterBase.rebuild_brackets(data, stack))
  end
end

#auto_join_line(data) ⇒ `Object`

手動折り返しの自動連結

# File 'lib/converterbase.rb', line 851

def auto_join_line(data)
  # 次の行の冒頭が開き記号だったら意図的な改行だと判断して連結しない
  # 行頭の全角スペースが２個以上の場合も連結しない
  data.gsub!(/([^、])、\n　([^「『(（【<＜〈《≪…‥―　])/, "\\1、\\2")
end

#before(io, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 24

def before(io, text_type)
  data = io.string
  convert_page_break(data) if @text_type == "body" || @text_type == "textfile"
  if @text_type != "story" && @setting.enable_pack_blank_line
    data.gsub!("\n\n", "\n")
    data.gsub!(/(^\n){3}/m, "\n\n")   # 改行のみの行３つを２つに削減
  end
  io
end

#before_convert(io) ⇒ `Object`



1152
1153
1154

# File 'lib/converterbase.rb', line 1152

def before_convert(io)
  before(io, @text_type)
end

#blank_line?(line) ⇒ `Boolean`

Returns:

(Boolean)



669
670
671

# File 'lib/converterbase.rb', line 669

def blank_line?(line)
  line =~ /\A[ 　\t]*$/
end

#border_symbol?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 662

def border_symbol?(line)
  @@symbols ||= open(File.join(Narou.get_preset_dir, "bordersymbols.txt"), "r:BOM|UTF-8") { |fp|
    fp.read.strip
  }
  line =~ /^[ 　\t]*[#{@@symbols}]+$/
end

#calc_cr_count(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 1050

def calc_cr_count(str)
  head_cr_count = str.index(/[^\n]/)
  head_cr_count > 2 ? 2 : head_cr_count
end

#comments_block?(line) ⇒ `Boolean`

コメントブロックを検出する

コメントブロックの定義は - のみが50回以上連続された行に囲まれている間

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 529

def comments_block?(line)
  if line =~ /^-{50,}$/
    @in_comment_block ^= 1
    return true
  end
  @in_comment_block
end

#convert(text, text_type) ⇒ `Object`

# File 'lib/converterbase.rb', line 1289

def convert(text, text_type)
  return "" if text == ""
  output_text_dir = @output_text_dir || @setting.archive_path
  @text_type = text_type
  io = StringIO.new(rstrip_all_lines(text))
  (io = before_convert(io)).rewind
  (io = convert_main(io)).rewind
  (io = after_convert(io)).rewind
  data = replace_by_replace_txt(io.read)
  data = insert_separator_for_selection(data)
  data = double_dash_to_image(data, output_text_dir)
  return data
end

#convert_arrow(data) ⇒ `Object`

おかしくなりやすい矢印文字の変換

# File 'lib/converterbase.rb', line 415

def convert_arrow(data)
  # Kindle PW でしか確認してないのでとりあえず device=kindle の場合のみ変換
  if @device && @device.kindle?
    data.tr!("⇒⇐", "→←")
  end
end

#convert_dakuten_char_to_font(data) ⇒ `Object`

濁点のついてない文字に濁点をつける表現を対応

濁点つきフォントに部分的に切り替える

# File 'lib/converterbase.rb', line 461

def convert_dakuten_char_to_font(data)
  return unless @setting.enable_dakuten_font
  data.gsub!(/([ぁ-んァ-ヶι])[゛ﾞ]/) do
    @use_dakuten_font = true
    "［＃濁点］#{$1}［＃濁点終わり］"
  end
end

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

ギュメを二重山括弧（の外字）に変換

# File 'lib/converterbase.rb', line 442

def convert_double_angle_quotation_to_gaiji(data)
  data.gsub!("≪", "※［＃始め二重山括弧］")
  data.gsub!("≫", "※［＃終わり二重山括弧］")
end

#convert_for_all_data(data) ⇒ `Object`

小説データ全体に対して施す変換

# File 'lib/converterbase.rb', line 1127

def convert_for_all_data(data)
  hankakukana_to_zenkakukana(data)
  auto_join_in_brackets(data)
  auto_join_line(data) if @setting.enable_auto_join_line
  erase_comments_block(data)
  replace_illust_tag(data)
  replace_url(data)
  replace_narou_tag(data)
  convert_rome_numeric(data)
  alphabet_to_zenkaku(data, @setting.enable_alphabet_force_zenkaku)
  force_indent_special_chapter(data)
  convert_numbers(data)
  exception_reconvert_kanji_to_num(data)
  if @setting.enable_convert_num_to_kanji && @text_type != "subtitle" && @text_type != "chapter" \
     && @setting.enable_kanji_num_with_units
    convert_kanji_num_with_unit(data, @setting.kanji_num_with_units_lower_digit_zero)
  end
  rebuild_kanji_num(data)
  insert_separate_space(data)
  convert_special_characters(data)
  convert_fraction_and_date(data)
  modify_kana_ni_to_kanji_ni(data)
  convert_dakuten_char_to_font(data)
end

#convert_fraction_and_date(data) ⇒ `Object`

分数表記を○分の○表記に変更、及び日付表記を検出

スラッシュで区切られた数字が２個なら分数、３個なら日付と定義

# File 'lib/converterbase.rb', line 256

def convert_fraction_and_date(data)
  if !@setting.enable_transform_fraction && !@setting.enable_transform_date
    return
  end
  target_num = "\d０-９#{KANJI_NUM}十百千万億兆京垓"
  data.gsub!(/[#{target_num}\/／]+/) do |match|
    numerics = match.split(/[\/／]/)
    case numerics.size
    when 2
      # 分数
      if @setting.enable_transform_fraction
        "#{zenkaku_num_to_kanji(numerics[1])}分の#{zenkaku_num_to_kanji(numerics[0])}"
      else
        match
      end
    when 3
      # 日付
      if @setting.enable_transform_date
        begin
          date = Date.new(*numerics.map { |s|
            s.tr!("0-9０-９#{KANJI_NUM}", "0-90-90-9")
            s.to_i
          })
        rescue ArgumentError
          match
        else
          convert_numbers(date.strftime(@setting.date_format))
        end
      end
    else
      match
    end
  end
end

#convert_head_half_spaces(data) ⇒ `Object`

間違えて行頭字下げに半角スペースを使ってるっぽいのを全角スペースにする

# File 'lib/converterbase.rb', line 425

def convert_head_half_spaces(data)
  data.gsub!(/^ +/) do |match|
    # 半角スペースの数に応じて全角スペースの数も調整してみる
    "　" * (match.count(" ") / 2.0).ceil
  end
end

#convert_horizontal_ellipsis(data) ⇒ `Object`

中黒(・)や句読点を並べて三点リーダーもどきにしているのを三点リーダーに変換

# File 'lib/converterbase.rb', line 1001

def convert_horizontal_ellipsis(data)
  return if !@setting.enable_convert_horizontal_ellipsis || \
            @text_type == "subtitle" || @text_type == "chapter"
  %w(・ 。 、 ．).each do |char|
    data.gsub!(/#{char}{3,}/) do |match|
      pre_char, post_char = $`[-1], $'[0]
      if pre_char == "―" || post_char == "―"
        match
      else
        "…" * ((match.length / 3.0 / 2).ceil * 2)
      end
    end
  end
  data.gsub!("。。", "。")
  data.gsub!("、、", "、")
end

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

漢数字を単位を使った表現に変換

８００万１０００といったような表現は、内部一度で 8001000 に変換する。 lower_digit_zero はこの最後の 000 に適用される

# File 'lib/converterbase.rb', line 203

def convert_kanji_num_with_unit(data, lower_digit_zero = 0)
  data.gsub!(/([#{KANJI_NUM}十百千万億兆京]+)/) do |match|
    total = kanji_num_to_integer($1)
    next match if total.to_s.length > KANJI_NUM_UNITS_DIGIT["京"] + 4
    m1 = total.to_s.tr("0-9", KANJI_NUM)
    if m1 =~ /〇{#{lower_digit_zero},}$/
      digits = m1.reverse.scan(/.{1,4}/).map(&:reverse).reverse   # 下の桁から4桁ずつ区切った配列を作成
      keta = digits.size - 1
      digits.map.with_index { |nums, keta_i|
        four_digit_num = nums.scan(/./).map.with_index { |d, di|
          next "" if d == "〇"
          kurai = KANJI_KURAI[nums.length - di - 1]
          if d == "一"
            # 4桁の千の前は一は必須ではなく、5桁以上の場合の千の前には一をつける
            # 1100 → 千百、11100 → 一万一千百
            if kurai != "" && !(keta > 0 && kurai == "千")
              d = ""
            end
          end
          d + kurai
        }.join
        if four_digit_num.length > 0
          four_digit_num + KANJI_NUM_UNITS[keta - keta_i]
        else
          ""
        end
      }.join
    else
      match
    end
  end
end

#convert_main(io) ⇒ `Object`

変換処理本体

@text_type: 渡されるテキストの種類。

subtitle, introduction, body, postscript, textfile, chapter, story

# File 'lib/converterbase.rb', line 1309

def convert_main(io)
  @write_fp = StringIO.new
  case @text_type
  when "introduction"
    return @write_fp if @setting.enable_erase_introduction
  when "postscript"
    return @write_fp if @setting.enable_erase_postscript
  end
  title_and_author = nil
  if @text_type == "textfile"
    # タイトル・著者名スキップ
    title_and_author = io.gets + io.gets
    data = io.read
  else
    data = io.read
  end
  reset_member_values
  convert_for_all_data(data)
  progressbar = nil
  if @text_type == "textfile"
    # convert_for_all_data -> replace_narou_tag
    # で改行化を行わないと正確な改行数は分からない
    progressbar = ProgressBar.new(data.count("\n") + 1)
    progressbar.output(0)
  end
  @read_fp = StringIO.new(data)
  if @text_type == "subtitle"
    @write_fp.write(data)
  else
    @read_fp.each_with_index do |line, i|
      progressbar.output(i) if progressbar
      @request_skip_output_line = false
      zenkaku_rstrip(line)
      if @request_insert_blank_next_line
        outputs unless blank_line?(line)
        @request_insert_blank_next_line = false
        @before_line = ""
      end
      process_author_comment(line) if @text_type == "textfile"
      insert_blank_before_line_and_behind_to_special_chapter(line)
      insert_blank_line_to_border_symbol(line)

      outputs(line)
      unless @delay_outputs_buffer.empty?
        @write_fp.write(@delay_outputs_buffer)
        @before_line = @delay_outputs_buffer
        @delay_outputs_buffer = ""
      else
        @before_line = line
      end
    end
    author_comment_force_close if @text_type == "textfile"
  end

  @write_fp.rewind
  data = @write_fp.string
  if @text_type == "textfile"
    if @setting.enable_author_comments
      erase_introduction(data) if @setting.enable_erase_introduction
      erase_postscript(data) if @setting.enable_erase_postscript
    end
    if @setting.enable_enchant_midashi
      enchant_midashi(data)
    end
  end
  rebuild_illust(data)
  rebuild_url(data)
  rebuild_english_sentences(data)
  rebuild_hankaku_num_and_comma(data)
  rebuild_kome_to_gaiji(data)
  rebuild_force_indent_special_chapter(data)
  if @text_type == "body" || @text_type == "textfile"
    half_indent_bracket(data)
    auto_indent(data)
  end
  # 再構築された文章にルビがふられる可能性を考慮して、
  # この位置でルビの処理を行う
  narou_ruby(data) if @setting.enable_ruby
  # 三点リーダーの変換は、ルビで圏点として・・・を使っている場合を考慮して、ルビ処理後にする
  convert_horizontal_ellipsis(data)
  # ルビ化されなくて残ったギュメを二重山括弧（の外字）に変換
  convert_double_angle_quotation_to_gaiji(data)
  delete_dust_char(data)
  if title_and_author
    data.replace(title_and_author + data)
  end
  data.rstrip!
  @write_fp
ensure
  if @text_type == "textfile" && progressbar
    progressbar.clear
  end
end

#convert_novel_rule(data) ⇒ `Object`

小説のルールに沿うように変換

# File 'lib/converterbase.rb', line 472

def convert_novel_rule(data)
  # 括弧の閉じの直前の句点を消す
  data.gsub!(/。([」』）])/, "\\1")
  # 原則偶数個を１セットで使うべき文字を偶数個に補正
  # MEMO:（―も偶数個セットにするべきだが、記号的な意味で使われる場合もあるので無視）
  %w(… ‥).each do |target|
    data.gsub!(/#{target}+/) do |match|
      len = match.length
      len += 1 if len.odd?
      target * len
    end
  end
  # たまに見かける誤字対策
  data.gsub!(/。　/, "。")
  data.gsub!(/([？！])。/, "\\1")
end

#convert_numbers(data) ⇒ `Object`

数字の変換

# File 'lib/converterbase.rb', line 92

def convert_numbers(data)
  # 小数点を・に
  data.gsub!(/([\d０-９#{KANJI_NUM}]+?)[\.．]([\d０-９#{KANJI_NUM}]+?)/, "\\1・\\2")
  if @setting.enable_convert_num_to_kanji &&
     @text_type != "subtitle" && @text_type != "chapter" && @text_type != "story"
    num_to_kanji(data)
  else
    hankaku_num_to_zenkaku_num(data)
  end
  data
end

#convert_page_break(data) ⇒ `Object`

一定以上の連続する空行を改ページに変換

# File 'lib/converterbase.rb', line 1108

def convert_page_break(data)
  if @setting.enable_convert_page_break
    threshold = @setting.to_page_break_threshold
    # `改ページ' を使うと見出し付与等で混乱するので自動生成したものは区別する
    data.gsub!(/(^\n){#{threshold},}/, "［＃改頁］\n")
  end
end

#convert_rome_numeric(data) ⇒ `Object`

ローマ数字っぽいアルファベットをローマ数字に変換

※alphabet_to_zenkaku の前に実行する必要あり

# File 'lib/converterbase.rb', line 322

def convert_rome_numeric(data)
  ROME_NUM_ALPHABET.each_with_index do |rome, i|
    data.gsub!(/([^a-zA-Z])#{rome}([^a-zA-Z])/, "\\1#{ROME_NUM[i]}\\2")
  end
end

#convert_special_characters(data) ⇒ `Object`

特定の表現・記号を変換していく

# File 'lib/converterbase.rb', line 331

def convert_special_characters(data)
  stash_kome(data)
  convert_double_angle_quotation_to_gaiji(data)   # 最初からギュメなのはルビ対象外なので外字注記に
  symbols_to_zenkaku(data)
  convert_tatechuyoko(data)
  convert_novel_rule(data)
  convert_arrow(data)
  convert_head_half_spaces(data)
end

#convert_tatechuyoko(data) ⇒ `Object`

縦中横にすべき表現を変換

# File 'lib/converterbase.rb', line 375

def convert_tatechuyoko(data)
  # 感嘆符及び疑問符の縦中横化
  # AozoraEPUB3の縦中横設定を使えば明示的に注記を使う必要はないが、
  # 見出しの中では自動で縦中横にはならないため、明示的指定をしておく
  # 事前に !? は全角にしておく
  data.gsub!(/！+/) do |match|
    if "#{$`[-1]}#{$'[0]}".include?("？")
      next match
    end
    len = match.length
    if len == 3
      tcy("!!!")
    elsif len >= 4
      # 4個以上なら偶数になるように調整（奇数だった場合増やす方向（+1））して2個ずつ縦中横
      len += 1 if len.odd?
      tcy("!!") * (len / 2)
    else
      match
    end
  end
  data.gsub!(/[！？]+/) do |match|
    case match.length
    when 2
      tcy(match.tr("！？", "!?"))
    when 3
      # 見た目的にこのパターンだけ縦中横化を許容する
      if %w(！！？ ？！！).find { |v| v == match }
        tcy(match.tr("！？", "!?"))
      else
        match
      end
    else
      match
    end
  end
end

#copy_dash_images_to_local_setting_dir ⇒ `Object`

# File 'lib/converterbase.rb', line 1450

def copy_dash_images_to_local_setting_dir
  DASH_FILES.each do |name|
    path = File.join(Narou.get_local_setting_dir, name)
    unless File.exist?(path)
      FileUtils.copy(File.join(Narou.get_preset_dir, name), path)
    end
  end
end

#dash_image_relative_paths(base_dir, output_text_dir) ⇒ `Object`

# File 'lib/converterbase.rb', line 1443

def dash_image_relative_paths(base_dir, output_text_dir)
  DASH_FILES.map do |name|
    pathname = Pathname(File.join(base_dir, name))
    pathname.relative_path_from(Pathname(output_text_dir)).to_s
  end
end

#delay_outputs(data = "") ⇒ `Object`

# File 'lib/converterbase.rb', line 76

def delay_outputs(data = "")
  unless @request_skip_output_line
    @delay_outputs_buffer << data + "\n"
  end
end

#delete_dust_char(data) ⇒ `Object`

表示上化けてしまうゴミ削除

# File 'lib/converterbase.rb', line 1119

def delete_dust_char(data)
  data.gsub!("︎", "")
  data.gsub!("︎", "")
end

#double_dash_to_image(text, output_text_dir) ⇒ `Object`

# File 'lib/converterbase.rb', line 1417

def double_dash_to_image(text, output_text_dir)
  return text unless @setting.enable_double_dash_to_image
  begin
    # AozoraEpub3 は相対パスじゃないとエラーになるので相対パスに変換
    dash_paths = dash_image_relative_paths(Narou.get_preset_dir, output_text_dir)
  rescue ArgumentError => e
    if e.message =~ /^different prefix/
      # Windowsにおいて、スクリプト本体のあるドライブと小説フォルダがあるドライブが
      # 違う場合、相対パスを計算できなくなる。そのための対処として、.narou ディレクトリ
      # に画像データをコピーし、同一ドライブ内で相対パスを取れるようにする
      copy_dash_images_to_local_setting_dir
      dash_paths = dash_image_relative_paths(Narou.get_local_setting_dir, output_text_dir)
    else
      raise
    end
  end
  text.gsub(/―{2,}/) do |match|
    len = match.length
    result = "※［＃（#{dash_paths[1]}）］" * (len / 2)
    if len.odd?
      result += "※［＃（#{dash_paths[0]}）］"
    end
    result
  end
end

#enchant_midashi(data) ⇒ `Object`

［＃改ページ］直後の行を見出しに設定する

# File 'lib/converterbase.rb', line 1043

def enchant_midashi(data)
  def midashi(str)
    midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
    @inspector.subtitle = midashi_title
    "［＃３字下げ］［＃中見出し］#{midashi_title}［＃中見出し終わり］"
  end

  def calc_cr_count(str)
    head_cr_count = str.index(/[^\n]/)
    head_cr_count > 2 ? 2 : head_cr_count
  end

  # 実際に見出しを付与する
  data.gsub!(/［＃改ページ］\n(.+?)\n/) do |match|
    m1 = $1
    rest = $'
    # 前書きがある場合は今回は保留して、次の処理で見出しを付与する
    if $1 =~ /#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}/
      match
    else
      # 見出しの次の行が空行ではない場合空行を追加する
      add_tail = "\n" * (2 - calc_cr_count(rest))
      # 見出しと本文の間には空行を２行挟む
      "［＃改ページ］\n\n#{midashi(m1)}\n#{add_tail}"
    end
  end
  # 前書きがある場合は、前書き→見出しの順番を見出し→前書きに入れ替えて置換
  data.gsub!(/(［＃改ページ］\n)(#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}\n)(.+?\n)/m) do
    m1, m2, m3 = $1, $2, $3
    add_tail = $' =~ /\A$/ ? "" : "\n"
    "#{m1 + midashi(m3) + m2}#{add_tail}"
  end
end

#erase_comments_block(data) ⇒ `Object`

コメントブロックを削除する

# File 'lib/converterbase.rb', line 540

def erase_comments_block(data)
  if @text_type == "textfile"
    data.gsub!(/^-{50,}\n.*?^-{50,}\n/m, "")
  end
  data
end

#erase_introduction(data) ⇒ `Object`

前書きを削除する

# File 'lib/converterbase.rb', line 1080

def erase_introduction(data)
  del_count = 0
  data.gsub!(/(［＃改ページ］)\n#{AUTHOR_COMMENT_CHUKI[:introduction][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:introduction][:close]}/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("前書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#erase_postscript(data) ⇒ `Object`

後書きを削除する

# File 'lib/converterbase.rb', line 1094

def erase_postscript(data)
  del_count = 0
  data.gsub!(/#{AUTHOR_COMMENT_CHUKI[:postscript][:open]}.+?#{AUTHOR_COMMENT_CHUKI[:postscript][:close]}\n(［＃改ページ］|\z)/m) do
    del_count += 1
    $1
  end
  if del_count > 0
    @inspector.info("後書きをすべて削除しました。削除した数は#{del_count}個です。")
  end
end

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

アラビア数字を使うべきところはアラビア数字に戻す

# File 'lib/converterbase.rb', line 241

def exception_reconvert_kanji_to_num(data)
  return unless @setting.enable_convert_num_to_kanji
  data.gsub!(/([Ａ-Ｚａ-ｚ])([#{KANJI_NUM}・～]+)/) do   # ｖｅｒ１・０１ のようなパターンも許容する
    $1 + $2.tr(KANJI_NUM, "０-９")
  end
  data.gsub!(/([#{KANJI_NUM}・～]+)([Ａ-Ｚａ-ｚ#{RECONVERT_KANJI_TO_NUM_PATTERN_UNIT}])/) do
    $1.tr(KANJI_NUM, "０-９") + $2
  end
end

#find_introduction? ⇒ `Boolean`

前書きの検出

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 741

def find_introduction?
  pos = @read_fp.pos
  result = false
  @read_fp.each do |line|
    break if page_break?(line)
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
      break
    end
  end
  @read_fp.pos = pos
  result
end

#force_indent_special_chapter(data) ⇒ `Object`

章見出しっぽい文字列を字下げする

# File 'lib/converterbase.rb', line 618

def force_indent_special_chapter(data)
  return unless @text_type == "body" || @text_type == "textfile"
  @@count_of_rebuild_container ||= 0
  data.gsub!(/^[ 　\t]*([－―<＜〈-]*)([0-9０-９#{KANJI_NUM}]{1,3})([－―>＞〉-]*)$/) do
    top, chapter, bottom = $1, $2, $3
    if top != "" && "―－-".include?(top)   # include?は空文字("")だとtrueなのでチェック必須
      top = "― "
      bottom = " ―"
    end
    str = "　　　［＃ゴシック体］#{top}"
    str += hankaku_num_to_zenkaku_num(chapter.tr("０-９", "0-9"))
    str += "#{bottom}［＃ゴシック体終わり］"
    # 前後に空行を入れたいが、それは行処理ループ中に行う
    symbols_to_zenkaku(str)
    index = @@count_of_rebuild_container += 1
    @force_indent_special_chapter_list[convert_numbers(index.to_s.rjust(10,"0"))] = str
    "［＃章見出しっぽい文＝#{index.to_s.rjust(10,"0")}］"
  end
end

#half_indent_bracket(data) ⇒ `Object`

行頭かぎ括弧(等)に二分アキを追加する

「や（などの前にカスタム注記（［＃二分アキ］）を追加し、半文字分字下げする(二分アキ)。 kindle paperwhite で鍵括弧のインデントがおかしいことへの対応

# File 'lib/converterbase.rb', line 582

def half_indent_bracket(data)
  data.gsub!(HALF_INDENT_TARGET) do
    if @setting.enable_half_indent_bracket
      "［＃二分アキ］#{$1}"
    else
      $1
    end
  end
end

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

半角アラビア数字の全角化

1桁、3桁以上：全角化 2桁：縦中横化

# File 'lib/converterbase.rb', line 560

def hankaku_num_to_zenkaku_num(data)
  data.gsub!(/\d+/) do |num|
    if num.length == 2
      tcy(num)
    elsif num.length == 3 && @text_type == "subtitle" && $`.empty?
      tcy(num)
    else
      num.tr("0-9", "０-９")
    end
  end
  data
end

#hankakukana_to_zenkakukana(data) ⇒ `Object`

半角カナと｢｣｡､･等を全角に変換



344
345
346

# File 'lib/converterbase.rb', line 344

def hankakukana_to_zenkakukana(data)
  data.replace(NKF.nkf("-wWX", data).tr("\u2014", "―"))
end

#inclusion_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 755

def inclusion_author_comment_block?(line)
  result = false
  if page_break?(line)
    if find_introduction?
      @in_author_comment_block = :introduction
      result = true
    end
  elsif line =~ AUTHOR_POSTSCRIPT_SPLITTER
    @in_author_comment_block = :postscript
    result = true
  end
  result
end

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 644

def insert_blank_before_line_and_behind_to_special_chapter(line)
  result = ""
  if line =~ /［＃章見出しっぽい文＝/
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
  end
  line.sub!(/\A/, result)
end

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

■などの区切りの前後には空行が必ず存在するようにする

# File 'lib/converterbase.rb', line 676

def insert_blank_line_to_border_symbol(line)
  result = ""
  if border_symbol?(line)
    unless blank_line?(@before_line)
      result << "\n"
    end
    @request_insert_blank_next_line = true
    jisage(line, 4)
  end
  line.sub!(/\A/, result)
end

#insert_char_separator(str) ⇒ `Object`

文字単位でzwsを挿入する

# File 'lib/converterbase.rb', line 1241

def insert_char_separator(str)
  buffer = ""
  ss = StringScanner.new(str)
  before_symbol = false
  while char = ss.getch
    symbol = false
    case char
    when "｜"
      buffer << char
      if ss.scan(/.+?》/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "［"
      buffer << char
      if ss.scan(/^＃.+?］/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "<"
      if ss.scan(/.+?>/)
        buffer << "<#{ss.matched}"
        next
      end
      symbol = true
    when /[〔「『\(（【〈《≪〝]/
      buffer << char
      before_symbol = false
      next
    when /[―…!?！？※]/
      symbol = true
    end
    if before_symbol && !symbol
      buffer << WORD_SEPARATOR
    end
    buffer << char
    unless symbol
      buffer << WORD_SEPARATOR
    end
    before_symbol = symbol
  end
  buffer
end

#insert_separate_space(data) ⇒ `Object`

特定の記号の直後は全角アキを挿入する

# File 'lib/converterbase.rb', line 294

def insert_separate_space(data)
  data.gsub!(/([!?！？]+)([^!?！？])/) do
    m1, m2 = $1, $2
    m2 = "　" if m2 == " "
    if m2 =~ /[^」］｝\]\}』】〉》〕＞>≫)）"”’〟　☆★♪［―]/
      "#{m1}　#{m2}"
    else
      "#{m1}#{m2}"
    end
  end
end

#insert_separator_for_selection(str) ⇒ `Object`

Kindle端末で単語選択がしやすいように０幅スペースを挿入する

# File 'lib/converterbase.rb', line 1165

def insert_separator_for_selection(str)
  return str unless @device && @device.kindle?
  return str if @text_type != "body" && @text_type != "textfile"
  if @setting.enable_insert_word_separator
    insert_word_separator(str)
  elsif @setting.enable_insert_char_separator
    insert_char_separator(str)
  else
    str
  end
end

#insert_word_separator(str) ⇒ `Object`

単語単位でzwsを挿入する

# File 'lib/converterbase.rb', line 1180

def insert_word_separator(str)
  buffer = ""
  ss = StringScanner.new(str)
  before_symbol = false

  if @text_type == "textfile"
    buffer << ss.scan(/(.+\n){2}/)
  end

  while char = ss.getch
    symbol = false
    case char
    when "｜"
      ss.scan(/.+?》/)
    when "［"
      buffer << char
      if ss.scan(/^＃.+?］/)
        buffer << "#{ss.matched}"
      else
        before_symbol = false
      end
      next
    when "<"
      if ss.scan(/.+?>/)
        buffer << "<#{ss.matched}"
        next
      end
      symbol = true
    when /[\d０-９]/
      ss.scan(/[\d０-９]+/)
    when /[ぁ-んゝゞ]/
      ss.scan(/[ぁ-んゝゞー]+/)
    when /[ァ-ヶ]/
      ss.scan(/[ァ-ヶー・]+/)
    when /[Ａ-Ｚａ-ｚA-Za-z]/
      ss.scan(/[Ａ-Ｚａ-ｚA-Za-z ]+/)
    when /[一-龥朗-鶴]/
      ss.scan(/[一-龥朗-鶴]+/)
    when /[〔「『\(（【〈《≪〝]/
      buffer << char
      before_symbol = false
      next
    else
      symbol = true
    end
    if before_symbol && !symbol
      buffer << WORD_SEPARATOR
    end
    buffer << char
    unless symbol
      buffer << ss.matched if ss.matched?
      buffer << WORD_SEPARATOR
    end
    before_symbol = symbol
  end
  buffer
end

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

Returns:

(Boolean)



884
885
886

# File 'lib/converterbase.rb', line 884

def is_sesame?(str, ten, last_char)
  ten =~ /^[・、]+$/ && (str.include?("｜") || object_of_ruby?(last_char))
end

#jisage(line, num) ⇒ `Object`

行頭空白を考慮した字下げ



658
659
660

# File 'lib/converterbase.rb', line 658

def jisage(line, num)
  line.sub!(/^[ 　\t]*/, "　" * num)
end

#join_inner_bracket(str) ⇒ `Object`

改行を連結した文章を作る

改行がひとつもなかった場合は nil を返す

# File 'lib/converterbase.rb', line 805

def join_inner_bracket(str)
  joined_str = str.dup
  return nil if str.count("\n") == 0
  joined_str.gsub!(/([…―])\n/, "\\1。\n")
  joined_str = joined_str.split("\n").map { |s|
    s.sub(/^　+/, "")
  }.join
  joined_str
end

#kanji_num_to_integer(string) ⇒ `Object`

# File 'lib/converterbase.rb', line 189

def kanji_num_to_integer(string)
  total = 0
  string.scan(/([#{KANJI_NUM}十百千]+)([万億兆京]*)/) do |num, units|
    total += (__calc_kanji_num_with_unit(num).to_s + units.each_char.map { |c| "0" * KANJI_NUM_UNITS_DIGIT[c] }.join).to_i
  end
  total
end

#leave_author_comment_block?(line) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/converterbase.rb', line 769

def leave_author_comment_block?(line)
  result = false
  case @in_author_comment_block
  when :introduction
    if line =~ AUTHOR_INTRODUCTION_SPLITTER
      result = true
    end
  when :postscript
    if page_break?(line)
      result = true
    end
  end
  result
end

#midashi(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 1044

def midashi(str)
  midashi_title = str.gsub("［＃半字下げ］", "").gsub(/^[　\s]+/, "").gsub(/[　\s]+$/, "")
  @inspector.subtitle = midashi_title
  "［＃３字下げ］［＃中見出し］#{midashi_title}［＃中見出し終わり］"
end

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

漢字の二じゃなくて間違えてカタカナのニを使ってるのを校正する

# File 'lib/converterbase.rb', line 1023

def modify_kana_ni_to_kanji_ni(data)
  if @setting.enable_kana_ni_to_kanji_ni
    data.gsub!(/([^#{KANA}]{2})ニ([^#{KANA}]{2})/) do
      prefix = $`.tap { |it|
        break it[-10, 10] if it.length > 10
      }
      @inspector.info(<<-EOS % (prefix + $1 + "ニ" + $2 + $'[0, 10]))
カタカナのニを漢字の二に修正しました
≫≫≫ 該当箇所
...%s...
      EOS
      "#{$1}二#{$2}"
    end
  end
  data
end

#narou_ruby(data) ⇒ `Object`

小説家になろうのルビ対策

# File 'lib/converterbase.rb', line 863

def narou_ruby(data)
  if @text_type != "subtitle" && @text_type != "chapter"
    # 《》なルビの対処
    data.gsub!(/(.+?)≪([^≪]+?)≫/) do |match|
      to_ruby(match, $1, $2, ["≪", "≫"])
    end
    if @data_type == "text"
      # （）なルビの対処
      data.gsub!(/(.+?)（#{AUTO_RUBY_CHARACTERS}）/) do |match|
        to_ruby(match, $1, $2, ["（", "）"])
      end
    end
  end
  data.replace(replace_tatesen(data))
  data.gsub!("［＃ルビ用縦線］", "｜")
end

#num_to_kanji(data) ⇒ `Object`

アラビア数字を漢数字に

カンマ区切りの数字はアラビア数字のままにしておくもともと漢数字なのは他の変換を受けないように退避させておく

# File 'lib/converterbase.rb', line 110

def num_to_kanji(data)
  stash_kanji_num(data)
  data.gsub!(/[\d０-９,，]+/) do |match|
    if match =~ /[,，]/
      if match =~ /[\d]/
        stash_hankaku_num_and_comma(match.tr("，", ","))
      else
        match
      end
    else
      zenkaku_num_to_kanji(match.tr("0-9", KANJI_NUM))
    end
  end
  data
end

#object_of_ruby?(char) ⇒ `Boolean`

Returns:

(Boolean)



880
881
882

# File 'lib/converterbase.rb', line 880

def object_of_ruby?(char)
  char =~ /[#{CHARACTER_OF_RUBY}]/
end

#outputs(data = "", force = false) ⇒ `Object`

# File 'lib/converterbase.rb', line 70

def outputs(data = "", force = false)
  if !@request_skip_output_line || force
    @write_fp.puts(data)
  end
end

#page_break?(line) ⇒ `Boolean`

改ページある？

Returns:

(Boolean)



691
692
693

# File 'lib/converterbase.rb', line 691

def page_break?(line)
  line =~ /［＃改ページ］/
end

#process_author_comment(line) ⇒ `Object`

# File 'lib/converterbase.rb', line 710

def process_author_comment(line)
  if @setting.enable_author_comments
    if @in_author_comment_block
      if leave_author_comment_block?(line)
        outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:close])
        if @in_author_comment_block == :introduction
          @request_skip_output_line = true
          line.clear
          @in_author_comment_block = nil
        elsif @in_author_comment_block == :postscript
          @in_author_comment_block = nil
          # ［＃改ページ］（前書きの開始位置）を検出したため、
          # 改めて前書きの検出をする
          process_author_comment(line)
        end
      end
    else
      if inclusion_author_comment_block?(line)
        # outputs を使うと改ページより前に注記が入ってしまうため、
        # delay_outputs を使って出力を line 出力の後に遅らせる
        delay_outputs(AUTHOR_COMMENT_CHUKI[@in_author_comment_block][:open]) 
        if @in_author_comment_block == :postscript
          @request_skip_output_line = true
          line.clear
        end
      end
    end
  end
end

#rebuild_english_sentences(data) ⇒ `Object`

英文を再構成する

# File 'lib/converterbase.rb', line 518

def rebuild_english_sentences(data)
  @english_sentences.each_with_index do |sentence, id|
    data.sub!("［＃英文＝#{convert_numbers(id.to_s)}］", sentence)
  end
end

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 638

def rebuild_force_indent_special_chapter(data)
  data.gsub!(/［＃章見出しっぽい文＝(.+?)］/) do
    @force_indent_special_chapter_list[$1]
  end
end

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 133

def rebuild_hankaku_num_and_comma(data)
  data.gsub!(/［＃半角数字＝(.+?)］/) do
    @num_and_comma_list[$1.to_i]
  end
end

#rebuild_illust(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 992

def rebuild_illust(data)
  @illust_chuki_list.each_with_index do |chuki, id|
    data.sub!("［＃挿絵＝#{convert_numbers(id.to_s)}］", chuki)
  end
end

#rebuild_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 149

def rebuild_kanji_num(data)
  data.gsub!(/［＃漢数字＝(.+?)］/) do
    @kanji_num_list[$1]
  end
end

#rebuild_kome_to_gaiji(data) ⇒ `Object`

※の外字注記化

stash_kome で2つにしておいた※を外字注記化する



452
453
454

# File 'lib/converterbase.rb', line 452

def rebuild_kome_to_gaiji(data)
  data.gsub!("※※", "※［＃米印、1-2-8］")
end

#rebuild_url(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 973

def rebuild_url(data)
  @url_list.each_with_index do |url, id|
    data.sub!("［＃ＵＲＬ＝#{convert_numbers(id.to_s)}］",
              "<a href=\"#{url}\">#{url}</a>")
  end
end

#replace_by_replace_txt(text) ⇒ `Object`

replace.txt により単純置換

# File 'lib/converterbase.rb', line 1406

def replace_by_replace_txt(text)
  result = text.dup
  (@setting.replace_pattern + Narou.global_replace_pattern).each do |pattern|
    src, dst = pattern
    result.gsub!(src, dst)
  end
  result
end

#replace_illust_tag(data) ⇒ `Object`

挿絵タグやimgタグ等を挿絵注釈に変換挿絵画像が存在しなければダウンロードして保存する

# File 'lib/converterbase.rb', line 984

def replace_illust_tag(data)
  @illustration.scanner(data) do |chuki|
    next "" unless @setting.enable_illust
    @illust_chuki_list << chuki
    "［＃挿絵＝#{@illust_chuki_list.size - 1}］\n"
  end
end

#replace_narou_tag(data) ⇒ `Object`

小説家になろう専用タグを置換

# File 'lib/converterbase.rb', line 309

def replace_narou_tag(data)
  data.gsub!("【改ページ】", "")
  data.gsub!(/<KBR>/i, "\n")
  data.gsub!(/<PBR>/i, "\n")
end

#replace_tatesen(str) ⇒ `Object`



903
904
905

# File 'lib/converterbase.rb', line 903

def replace_tatesen(str)
  str.gsub("｜", "※［＃縦線］")
end

#replace_url(data) ⇒ `Object`

URL っぽい文字列を一旦別のIDに置き換えてあとで復元することで、変換処理の影響を受けさせない

# File 'lib/converterbase.rb', line 966

def replace_url(data)
  data.gsub!(URI.regexp(%w(http https))) do |match|
    @url_list << match
    "［＃ＵＲＬ＝#{@url_list.size - 1}］"
  end
end

#reset_member_values ⇒ `Object`

.convert が実行されるたびに呼ばれるメンバ変数リセット用メソッドインスタンス作成時に一度だけ初期化したい場合は initialize で初期化する

# File 'lib/converterbase.rb', line 54

def reset_member_values
  @request_insert_blank_next_line = false
  @request_skip_output_line = false
  @before_line = ""
  @delay_outputs_buffer = ""
  @in_comment_block = false
  @english_sentences = []
  @url_list = []
  @illust_chuki_list = []
  @kanji_num_list = {}
  @num_and_comma_list = {}
  @force_indent_special_chapter_list = {}
  @in_author_comment_block = nil
  @device = Narou.get_device
end

#rstrip_all_lines(data) ⇒ `Object`

すべての行の行末空白を削除



85
86
87

# File 'lib/converterbase.rb', line 85

def rstrip_all_lines(data)
  data.gsub(/[ 　\t]+$/m, "")
end

#ruby_youon_to_big(ruby) ⇒ `Object`

ルビの拗音(ぁ、ぃ等)を商業書籍のように大きくする

# File 'lib/converterbase.rb', line 947

def ruby_youon_to_big(ruby)
  result = ruby
  if @setting.enable_ruby_youon_to_big
    result = ruby.tr("ぁぃぅぇぉゃゅょゎっァィゥェォャュョヮッヵヶ",
                     "あいうえおやゆよわつアイウエオヤユヨワツカケ")
  end
  result
end

#sesame(str) ⇒ `Object`

# File 'lib/converterbase.rb', line 888

def sesame(str)
  if str.include?("｜")
    str.sub("｜", "［＃傍点］") + "［＃傍点終わり］"
  else
    str.sub(/([#{CHARACTER_OF_RUBY}　]+)$/) {
      match_target = $1
      if match_target =~ /^(　+)/
        "#{$1}［＃傍点］#{match_target[$1.length..-1]}"
      else
        "［＃傍点］#{match_target}"
      end
    } + "［＃傍点終わり］"
  end
end

#stash_hankaku_num_and_comma(num) ⇒ `Object`

# File 'lib/converterbase.rb', line 126

def stash_hankaku_num_and_comma(num)
  @@num_and_comma_list_counter ||= 0
  @@num_and_comma_list_counter += 1
  @num_and_comma_list[@@num_and_comma_list_counter] = num
  "［＃半角数字＝#{@@num_and_comma_list_counter}］"
end

#stash_kanji_num(data) ⇒ `Object`

# File 'lib/converterbase.rb', line 139

def stash_kanji_num(data)
  data.gsub!(/[#{KANJI_NUM}十百千万億兆京]+/).with_index do |match, i|
    if "#{$`[-1]}#{$'[0]}" =~ /[\d０-９]/
      next match
    end
    @kanji_num_list[convert_numbers(i.to_s)] = match
    "［＃漢数字＝#{i}］"
  end
end

#stash_kome(data) ⇒ `Object`

先に外字注記にしてしまうと border_symbol? 等で困るので、あとで外字注記化出来るようにする



435
436
437

# File 'lib/converterbase.rb', line 435

def stash_kome(data)
  data.gsub!("※", "※※")
end

#symbols_to_zenkaku(data) ⇒ `Object`

半角記号を全角に変換

# File 'lib/converterbase.rb', line 355

def symbols_to_zenkaku(data)
  data.gsub!(/[#{SINGLE_MINUTE_FAMILY}]([^"\n]+?)[#{SINGLE_MINUTE_FAMILY}]/, "〝\\1〟")
  # MEMO: シングルミュートを表示出来るフォントはほとんど無いためダブルにする
  data.gsub!(/[#{DOUBLE_MINUTE_FAMILY}]([^"\n]+?)[#{DOUBLE_MINUTE_FAMILY}]/, "〝\\1〟")
  data.tr!("-=+/*《》'\"%$#&!?<>＜＞()|‐,._;:\[\]{}",
           "－＝＋／＊≪≫’〝％＄＃＆！？〈〉〈〉（）｜－，．＿；：［］")
  data.gsub!("\\", "￥")
  data
end

#tcy(str) ⇒ `Object`

縦中横注記取得



368
369
370

# File 'lib/converterbase.rb', line 368

def tcy(str)
  "［＃縦中横］#{str}［＃縦中横終わり］"
end

#to_ruby(match, m1, m2, openclose_symbols) ⇒ `Object`

# File 'lib/converterbase.rb', line 907

def to_ruby(match, m1, m2, openclose_symbols)
  last_char = m1[-1]
  case
  when m2[0] == " "
    # 先頭が半角スペースはNG
    match
  when m2 =~ / {2,}$/
    # 末尾の半角スペースが2個以上はNG（1個はOK）
    match
  when last_char == "｜"
    # 直前に｜がある場合ルビ化は抑制される
    "#{m1[0...-1]}#{openclose_symbols[0]}#{m2}#{openclose_symbols[1]}"
  when is_sesame?(m1, m2, last_char)
    sesame(m1)
  when m1.include?("｜")
    "#{m1.sub(/｜([^｜]*)$/, "［＃ルビ用縦線］\\1")}《#{ruby_youon_to_big(m2)}》"
  when object_of_ruby?(last_char)
    if openclose_symbols[0] == "≪" && m2 !~ /^#{AUTO_RUBY_CHARACTERS}$/
      # 《 》タイプのルビであっても、｜が存在しない場合の自動ルビ化対象はひらがな等だけである
      match
    else
      # なろうのルビ対象文字を辿って｜を挿入する（青空文庫となろうのルビ仕様の差異吸収のため）
      # 空白もルビ対象文字に含むのはなろうの仕様である
      m1.sub(/([#{CHARACTER_OF_RUBY} 　]+)$/) {
        match_target = $1
        if match_target =~ /^(　+)/
          "#{$1}［＃ルビ用縦線］#{match_target[$1.length..-1]}"
        else
          "［＃ルビ用縦線］#{match_target}"
        end
      } + "《#{ruby_youon_to_big(m2)}》"
    end
  else
    match
  end
end

#zenkaku_num_to_hankaku_num(num) ⇒ `Object`

全角数字(漢数字含む)を半角アラビア数字に



550
551
552

# File 'lib/converterbase.rb', line 550

def zenkaku_num_to_hankaku_num(num)
  num.tr("０-９#{KANJI_NUM}", "0-90-9")
end

#zenkaku_num_to_kanji(str) ⇒ `Object`

全角アラビア数字を漢数字に



158
159
160

# File 'lib/converterbase.rb', line 158

def zenkaku_num_to_kanji(str)
  str.tr("０-９", KANJI_NUM)
end

#zenkaku_rstrip(line) ⇒ `Object`

全角版 String#rstrip!



959
960
961

# File 'lib/converterbase.rb', line 959

def zenkaku_rstrip(line)
  line.gsub!(/[　\s]+\z/, "")
end

Class: ConverterBase

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(setting, inspector, illustration) ⇒ ConverterBase

Instance Attribute Details

#current_index ⇒ Object

#data_type ⇒ Object

#output_text_dir ⇒ Object

#subtitles ⇒ Object

#use_dakuten_font ⇒ Object (readonly)

Class Method Details

.rebuild_brackets(data, stack) ⇒ Object

Instance Method Details

#__calc_kanji_num_with_unit(string) ⇒ Object

#__calc_sum_unit(units) ⇒ Object

#after(io, text_type) ⇒ Object

#after_convert(io) ⇒ Object

#alphabet_to_zenkaku(data, force = false) ⇒ Object

#author_comment_force_close ⇒ Object

#auto_indent(data) ⇒ Object

#auto_join_in_brackets(data) ⇒ Object

#auto_join_line(data) ⇒ Object

#before(io, text_type) ⇒ Object

#before_convert(io) ⇒ Object

#blank_line?(line) ⇒ Boolean

#border_symbol?(line) ⇒ Boolean

#calc_cr_count(str) ⇒ Object

#comments_block?(line) ⇒ Boolean

#convert(text, text_type) ⇒ Object

#convert_arrow(data) ⇒ Object

#convert_dakuten_char_to_font(data) ⇒ Object

#convert_double_angle_quotation_to_gaiji(data) ⇒ Object

#convert_for_all_data(data) ⇒ Object

#convert_fraction_and_date(data) ⇒ Object

#convert_head_half_spaces(data) ⇒ Object

#convert_horizontal_ellipsis(data) ⇒ Object

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ Object

#convert_main(io) ⇒ Object

#convert_novel_rule(data) ⇒ Object

#convert_numbers(data) ⇒ Object

#convert_page_break(data) ⇒ Object

#convert_rome_numeric(data) ⇒ Object

#convert_special_characters(data) ⇒ Object

#convert_tatechuyoko(data) ⇒ Object

#copy_dash_images_to_local_setting_dir ⇒ Object

#dash_image_relative_paths(base_dir, output_text_dir) ⇒ Object

#delay_outputs(data = "") ⇒ Object

#delete_dust_char(data) ⇒ Object

#double_dash_to_image(text, output_text_dir) ⇒ Object

#enchant_midashi(data) ⇒ Object

#erase_comments_block(data) ⇒ Object

#erase_introduction(data) ⇒ Object

#erase_postscript(data) ⇒ Object

#exception_reconvert_kanji_to_num(data) ⇒ Object

#find_introduction? ⇒ Boolean

#force_indent_special_chapter(data) ⇒ Object

#half_indent_bracket(data) ⇒ Object

#hankaku_num_to_zenkaku_num(data) ⇒ Object

#hankakukana_to_zenkakukana(data) ⇒ Object

#inclusion_author_comment_block?(line) ⇒ Boolean

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ Object

#insert_blank_line_to_border_symbol(line) ⇒ Object

#insert_char_separator(str) ⇒ Object

#insert_separate_space(data) ⇒ Object

#insert_separator_for_selection(str) ⇒ Object

#insert_word_separator(str) ⇒ Object

#is_sesame?(str, ten, last_char) ⇒ Boolean

#jisage(line, num) ⇒ Object

#join_inner_bracket(str) ⇒ Object

#kanji_num_to_integer(string) ⇒ Object

#leave_author_comment_block?(line) ⇒ Boolean

#midashi(str) ⇒ Object

#modify_kana_ni_to_kanji_ni(data) ⇒ Object

#narou_ruby(data) ⇒ Object

#num_to_kanji(data) ⇒ Object

#object_of_ruby?(char) ⇒ Boolean

#outputs(data = "", force = false) ⇒ Object

#initialize(setting, inspector, illustration) ⇒ `ConverterBase`

#current_index ⇒ `Object`

#data_type ⇒ `Object`

#output_text_dir ⇒ `Object`

#subtitles ⇒ `Object`

#use_dakuten_font ⇒ `Object` (readonly)

.rebuild_brackets(data, stack) ⇒ `Object`

#__calc_kanji_num_with_unit(string) ⇒ `Object`

#__calc_sum_unit(units) ⇒ `Object`

#after(io, text_type) ⇒ `Object`

#after_convert(io) ⇒ `Object`

#alphabet_to_zenkaku(data, force = false) ⇒ `Object`

#author_comment_force_close ⇒ `Object`

#auto_indent(data) ⇒ `Object`

#auto_join_in_brackets(data) ⇒ `Object`

#auto_join_line(data) ⇒ `Object`

#before(io, text_type) ⇒ `Object`

#before_convert(io) ⇒ `Object`

#blank_line?(line) ⇒ `Boolean`

#border_symbol?(line) ⇒ `Boolean`

#calc_cr_count(str) ⇒ `Object`

#comments_block?(line) ⇒ `Boolean`

#convert(text, text_type) ⇒ `Object`

#convert_arrow(data) ⇒ `Object`

#convert_dakuten_char_to_font(data) ⇒ `Object`

#convert_double_angle_quotation_to_gaiji(data) ⇒ `Object`

#convert_for_all_data(data) ⇒ `Object`

#convert_fraction_and_date(data) ⇒ `Object`

#convert_head_half_spaces(data) ⇒ `Object`

#convert_horizontal_ellipsis(data) ⇒ `Object`

#convert_kanji_num_with_unit(data, lower_digit_zero = 0) ⇒ `Object`

#convert_main(io) ⇒ `Object`

#convert_novel_rule(data) ⇒ `Object`

#convert_numbers(data) ⇒ `Object`

#convert_page_break(data) ⇒ `Object`

#convert_rome_numeric(data) ⇒ `Object`

#convert_special_characters(data) ⇒ `Object`

#convert_tatechuyoko(data) ⇒ `Object`

#copy_dash_images_to_local_setting_dir ⇒ `Object`

#dash_image_relative_paths(base_dir, output_text_dir) ⇒ `Object`

#delay_outputs(data = "") ⇒ `Object`

#delete_dust_char(data) ⇒ `Object`

#double_dash_to_image(text, output_text_dir) ⇒ `Object`

#enchant_midashi(data) ⇒ `Object`

#erase_comments_block(data) ⇒ `Object`

#erase_introduction(data) ⇒ `Object`

#erase_postscript(data) ⇒ `Object`

#exception_reconvert_kanji_to_num(data) ⇒ `Object`

#find_introduction? ⇒ `Boolean`

#force_indent_special_chapter(data) ⇒ `Object`

#half_indent_bracket(data) ⇒ `Object`

#hankaku_num_to_zenkaku_num(data) ⇒ `Object`

#hankakukana_to_zenkakukana(data) ⇒ `Object`

#inclusion_author_comment_block?(line) ⇒ `Boolean`

#insert_blank_before_line_and_behind_to_special_chapter(line) ⇒ `Object`

#insert_blank_line_to_border_symbol(line) ⇒ `Object`

#insert_char_separator(str) ⇒ `Object`

#insert_separate_space(data) ⇒ `Object`

#insert_separator_for_selection(str) ⇒ `Object`

#insert_word_separator(str) ⇒ `Object`

#is_sesame?(str, ten, last_char) ⇒ `Boolean`

#jisage(line, num) ⇒ `Object`

#join_inner_bracket(str) ⇒ `Object`

#kanji_num_to_integer(string) ⇒ `Object`

#leave_author_comment_block?(line) ⇒ `Boolean`

#midashi(str) ⇒ `Object`

#modify_kana_ni_to_kanji_ni(data) ⇒ `Object`

#narou_ruby(data) ⇒ `Object`

#num_to_kanji(data) ⇒ `Object`

#object_of_ruby?(char) ⇒ `Boolean`

#outputs(data = "", force = false) ⇒ `Object`

#page_break?(line) ⇒ `Boolean`

#process_author_comment(line) ⇒ `Object`

#rebuild_english_sentences(data) ⇒ `Object`

#rebuild_force_indent_special_chapter(data) ⇒ `Object`

#rebuild_hankaku_num_and_comma(data) ⇒ `Object`

#rebuild_illust(data) ⇒ `Object`

#rebuild_kanji_num(data) ⇒ `Object`

#rebuild_kome_to_gaiji(data) ⇒ `Object`

#rebuild_url(data) ⇒ `Object`