Class: Replace

Inherits:
Object
  • Object
show all
Defined in:
lib/replace.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(string) ⇒ Replace

Returns a new instance of Replace.



8
9
10
# File 'lib/replace.rb', line 8

def initialize(string)
  @string = string
end

Instance Attribute Details

#scanObject (readonly)

Returns the value of attribute scan.



6
7
8
# File 'lib/replace.rb', line 6

def scan
  @scan
end

#stringObject (readonly)

Returns the value of attribute string.



6
7
8
# File 'lib/replace.rb', line 6

def string
  @string
end

Instance Method Details

#add_line_breakObject

增加一些必要的分行



228
229
230
231
232
233
234
235
236
# File 'lib/replace.rb', line 228

def add_line_break
  replace(@string) do
    s /(\p{Han})[[:blank:]]*([:,])[[:blank:]]*(\p{Han})/, '\1\2 \3'
    s /(\p{Han})[[:blank:]]*([。.!?;])[[:blank:]]*(\p{Han})/, '\1\2'"\n"'\3'
    s /(\p{Han})[[:blank:]]*(\p{Ps})/, '\1 \2'
    s /(\p{Pe})[[:blank:]]*(\p{Han})/, '\1 \2'
  end
  self
end

#ancient_literatureObject



365
366
367
368
369
370
371
# File 'lib/replace.rb', line 365

def ancient_literature
  replace(@string) do
    s /_古诗文网/, ''
    s /作者:.*\r?\n/, ''
  end
  del_head_blank
end

#ascii2Object

双字节 ASCII 字符转为单字节字符 (通过验证, 危险等级: 0) !"#$%&'()*+,-./ 0123456789:;<=>? @ABCDEFGHIJKLMNO PQRSTUVWXYZ[\]^_ `abcdefghijklmno pqrstuvwxyz{|}~ !“#$%&‘()*+,-./ 0123456789:;<=>? PQRSTUVWXYZ^_ `abcdefghijklmno pqrstuvwxyz{|}~



203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/replace.rb', line 203

def ascii2
  replace(@string) do
    s /([\u{FF01}-\u{FF5E}])/ do
      bytes = $1.bytes
      bytes[1] -= 0xBC
      bytes[2] -= 0x60
      bytes[2] += 64*bytes[1]
      bytes[2..2].pack("c*")
    end
  end
  self
end

#batch_replace(regexps = {}) ⇒ Object

批量逐个替换第一个匹配项



49
50
51
52
53
54
55
56
# File 'lib/replace.rb', line 49

def batch_replace(regexps = {})
  regexps.each do |key, value|
    replace(@string) do
      sub! Regexp.new("\\G(.*?)#{key}", Regexp::MULTILINE), '\1'" ^[#{value}] "
    end
  end
  self
end

#blankObject

删除汉字之间的空格 (通过验证, 危险等级: 3) 添加汉字与数字、英文之间的空格 del_head_blank.del_blank_line



241
242
243
244
245
246
247
248
249
250
# File 'lib/replace.rb', line 241

def blank
  replace(@string) do
    # 删除汉字之间的空格, "无 法 处 理 这 种 情 况"
    s /(\p{Han})[[:blank:]]+(\p{Han})/, '\1\2'
    # 添加汉字与数字、英文之间的空格
    s /(\p{Han})(\w)/, '\1 \2'
    s /(\w)(\p{Han})/, '\1 \2'
  end
  del_head_blank.del_blank_line
end

#chapterObject

判定章节标题 (通过验证, 危险等级: 0)



382
383
384
385
386
387
388
389
390
391
# File 'lib/replace.rb', line 382

def chapter
  replace(@string) do
    s /^第[一二三四五六七八九十]+[卷部篇]/, 'PART: '
    s /^第[一二三四五六七八九十]+[章]/, '# '
    s /^第[一二三四五六七八九十]+[节]/, '## '
    s /^[一二三四五六七八九十]+、/, '### '
    s /^\([一二三四五六七八九十]+\)/, '#### '
  end
  self
end

#codeObject

行内代码两边各留一个空格 (未通过验证, 危险等级: 4) jekyll_code



300
301
302
303
304
305
306
# File 'lib/replace.rb', line 300

def code
  replace(@string) do
    # 行内代码两边各留一个空格
    s /([[:alnum:]])`([^`]+?)`([[:alnum:]])/, '\1 `\2` \3'
  end
  jekyll_code
end

#del_blank_lineObject

删除多余的空行 (通过验证, 危险等级: 0) del_tail_blank



272
273
274
275
276
277
# File 'lib/replace.rb', line 272

def del_blank_line
  replace(@string) do
    s /(^[[:blank:]]*\r?\n){2,}/, "\n"
  end
  del_tail_blank
end

#del_head_blankObject

删除行首的空白 (通过验证, 危险等级: 3, 可能是 Markdown 缩进) 将看上去像空白的行转化为真真的空白行



254
255
256
257
258
259
# File 'lib/replace.rb', line 254

def del_head_blank
  replace(@string) do
    s /^[[:blank:]]+/, ''
  end
  self
end

#del_italics_and_boldObject

删除加粗斜体样式 (通过验证, 危险等级: 3, 可能是 Markdown 加粗斜体)



345
346
347
348
349
350
351
# File 'lib/replace.rb', line 345

def del_italics_and_bold
  replace(@string) do
    s /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/, '\1\3\4'
    s /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/, '\1\3\4'
  end
  self
end

#del_line_breakObject

删除一些没必要的分行



217
218
219
220
221
222
223
224
225
# File 'lib/replace.rb', line 217

def del_line_break
  replace(@string) do
    # "无\n法\n处\n理\n这\n种\n情\n况"
    s /(\p{Han})\r?\n(\p{Han})/, '\1\2'
    s /(\p{Han})\r?\n([[:punct:]])/, '\1\2'
    s /…{3,}(\r?\n)+/, ''
  end
  self
end

#del_tail_blankObject

删除行尾的空白 (通过验证, 危险等级: 0) 将看上去像空白的行转化为真真的空白行



263
264
265
266
267
268
# File 'lib/replace.rb', line 263

def del_tail_blank
  replace(@string) do
    s /[[:blank:]]+\r?\n/, "\n"
  end
  self
end

#footnoteObject



58
59
60
# File 'lib/replace.rb', line 58

def footnote
  batch_replace(scan_note)
end

#foreign_literatureObject



353
354
355
356
357
358
359
360
361
362
363
# File 'lib/replace.rb', line 353

def foreign_literature
  replace(@string) do
    s /\s*\n/, "\n\n"
    s /\${4,}\s*/, '#### '
    s /[ \u{001A}]/, ''
    s /# [0-9]+.\s*/, '## '
    s /#### 第[^\r\n]+[卷部]\s*(.*)\s*\n/, "PART: "'\1'"\n\n"
    s /#### 第[^\r\n]+[章]\s*(.*)\s*\n/, "# "'\1'"\n\n"
  end
  del_head_blank
end

#format_markdownObject



401
402
403
# File 'lib/replace.rb', line 401

def format_markdown
  markdown2html.html2markdown
end

#head_footObject

删除页眉页脚



289
290
291
292
293
294
295
296
# File 'lib/replace.rb', line 289

def head_foot
  replace(@string) do
    s /\A(^[^\r\n]*\r?\n){11}\s*/m, ''
    s /^\[«.*?\z/m, ''
    # s /(^.*?\r?\n){4}\z/, ''
  end
  self
end

#helpObject



12
13
14
15
16
17
18
19
20
# File 'lib/replace.rb', line 12

def help
  method_comments = {}
  replace(@string) do
    s /((.*#.*\r?\n)*)\s*def\s+(\w+)/ do
      method_comments[$3.to_sym] = $1
    end
  end
  method_comments
end

#html2markdownObject



411
412
413
414
415
# File 'lib/replace.rb', line 411

def html2markdown
  converter = PandocRuby.new(@string, from: :html, to: :markdown)
  @string = converter.convert('chapters', 'atx-headers', 'normalize', 'no-wrap')
  self
end

#imageObject

处理插图路径 (通过验证, 危险等级: 0)



280
281
282
283
284
285
286
# File 'lib/replace.rb', line 280

def image
  replace(@string) do
    s /Insert\s(18333fig\d+)\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, '![\2](\1-tn.png)'
    s /!\[(.*?)\]\(\S*\/(\S*?)( ".*")?\)/, '![\1](\2)'
  end
  self
end

#jekyll_codeObject

Jekyll 代码格式转为 Fenced 代码格式 (通过验证, 危险等级: 0)



309
310
311
312
313
314
315
# File 'lib/replace.rb', line 309

def jekyll_code
  replace(@string) do
    s /\s*\{%\s*highlight\s+(\w+)\s*%\}\s*/, "\n\n"'```{.\1}'"\n"
    s /\s*\{%\s*endhighlight\s*%\}\s*/, "\n"'```'"\n\n"
  end
  self
end

#listObject



393
394
395
396
397
398
399
# File 'lib/replace.rb', line 393

def list
  replace(@string) do
    s /^(\d.)\s*/, '\1'"\t"
    s /^[●]\s*/, "-\t"
  end
  self
end

#markdown2htmlObject



405
406
407
408
409
# File 'lib/replace.rb', line 405

def markdown2html
  converter = PandocRuby.new(@string, from: :markdown, to: :html)
  @string = converter.convert('chapters', 'indented-code-classes' => 'sourceCode')
  self
end

#paragraphObject

判定段落的起始 (通过验证, 危险等级: 0)



374
375
376
377
378
379
# File 'lib/replace.rb', line 374

def paragraph
  replace(@string) do
    s /^[[:blank:]]{2,}/, "\n"
  end
  self
end

#pdftotextObject

处理 pdftotext 的转换结果 (未通过验证, 危险等级: 4) paragraph.blank.del_line_break.chapter.list.punct2.add_line_break



112
113
114
115
116
117
118
# File 'lib/replace.rb', line 112

def pdftotext
  replace(@string) do
    # 删除页码行
    s /^[[:blank:]]*[0-9]+[[:blank:]]*\r?\n/, ''
  end
  paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
end

#post_pandoc_for_latexObject



94
95
96
97
98
99
100
101
102
# File 'lib/replace.rb', line 94

def post_pandoc_for_latex
  replace(@string) do
    s /\{verbatim\}/, '{Verbatim}'
    s /\\begin\{center\}\\rule\{(.*?)\}\{(.*?)\}\\end\{center\}/, '\newpage'
    s /\s*\\footnote\{(.*?)\}\s*/, '\footnote{\1}'
    s /\\footnote\{(.*?)[::]\s*(.*?)\}/, '〔{\kaishu \1: \2}〕'
  end
  theorem
end

#pre_pandoc_for_latexObject



90
91
92
# File 'lib/replace.rb', line 90

def pre_pandoc_for_latex
  title
end

#punct1Object

中文标点转为英文标点



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/replace.rb', line 121

def punct1
  replace(@string) do
    s //, ', '
    s /:([^\r\n])/, ":\n"'\1'
    s /;([^\r\n])/, ";\n"'\1'
    s /。([^\r\n])/, ".\n"'\1'
    s /?([^\r\n])/, "?\n"'\1'
    s /!([^\r\n])/, "!\n"'\1'
    s /:\r?\n/, ":\n"
    s /;\r?\n/, ";\n"
    s /。\r?\n/, ".\n"
    s /?\r?\n/, "?\n"
    s /!\r?\n/, "!\n"
    s //, ' ('
    s //, ') '
    s /\) ([,.])/, ')\1'
  end
  self
end

#punct2Object

中文标点转为英文标点 (通过验证, 危险等级: 3, 可能需要用中文标点) 保留部分中文符号: 、《》〈〉【】〖〗〔〕 ascii2: ?!,;:()



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/replace.rb', line 144

def punct2
  replace(@string) do
    # ‐‑‒–—―‖‗‘’‚‛“”„‟
    # †‡•‣․‥…‧
    # ‰‱′″‴‵‶‷‸‹›※‼‽‾‿
    # ⁀⁁⁂⁃
    # ⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏
    # ⁐⁑
    # ⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞
    # ⁽⁾
    # 、。〃
    # 〈〉《》「」『』
    # 【】
    # 〔〕〖〗〘〙〚〛〜〝〞〟
    # 〰
    # 〽
    # \p{S}: $+<=>^`|~⁄⁒
    # \p{Sm}: +<=>|~⁄⁒
    # \p{Sc}: $
    # \p{Sk}: ^`
    # \p{Pi}: ‘‛“‟
    # \p{Pf}: ’”
    # 句末符号 .!?;:
    # 标点符号 `$()''""
    # 句中符号 ,、
    s //, '.'
    s /[“”]/, '"'
    s /[‘’]/, "'"
    s /──/, '---'
    s //, '--'
  end
  ascii2
end

#renameObject



80
81
82
83
84
85
86
87
88
# File 'lib/replace.rb', line 80

def rename
  replace(@string) do
    s /!\[\]\(image(\d+).jpg\)/ do
      i = $1.to_i - 1
      "![](image%03d.jpg)" % i
    end
  end
  self
end

#scan_imageObject



30
31
32
# File 'lib/replace.rb', line 30

def scan_image
  @scan = @string.scan(/!\[.*?\]\(([^\s]+?)(?:\s+.*?)?\)/)
end

#scan_noteObject

扫描注释列表生成替换字典



35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/replace.rb', line 35

def scan_note
  del_head_blank
  note = {}
  # @string.scan(/^[((]\d+[))]\s*(.*?)[::]\s*(.*?)\\?\r?\n/) do |key, value|
  @string.scan(/^(.*?)〔(.*?〕.*?)\r?\n/) do |key, value|
    # key_stem = key.gsub(/[((](.*?)[))]/, '')
    key_stem = "\\^#{key}\\^"
    # note[key_stem] = "#{key}: #{value}"
    note[key_stem] = value.sub(//, ': ')
  end
  note
end

#scan_testObject



22
23
24
# File 'lib/replace.rb', line 22

def scan_test
  @scan = @string.scan(/\w+/)
end

#scan_urlObject



26
27
28
# File 'lib/replace.rb', line 26

def scan_url
  @scan = @string.scan(/href=['"](.*?)['"]/)
end

#simpleObject



62
63
64
65
66
67
68
# File 'lib/replace.rb', line 62

def simple
  replace(@string) do
    s /cc/, 'dd'
    s /aa/, 'bb'
  end
  self
end

#standardObject

标准化 Markdown 文件, 处理 HTML 文件的转换结果 (未通过验证, 危险等级: 4) code.punct2.blank



106
107
108
# File 'lib/replace.rb', line 106

def standard
  blank.del_line_break.punct2.code.add_line_break.format_markdown
end

#taiwanObject

台湾标点转大陆标点 (通过验证, 危险等级: 0) ascii2



180
181
182
183
184
185
186
187
188
# File 'lib/replace.rb', line 180

def taiwan
  replace(@string) do
    s //, ''
    s //, ''
    s //, ''
    s //, ''
  end
  ascii2
end

#theoremObject

定理环境, LaTeX 命令 (未通过验证, 危险等级: 2)



318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/replace.rb', line 318

def theorem
  replace(@string) do
    s /^(ASSUMPTION|DEFINITION|CONCLUSION|ALGORITHM|EXPERIMENT|EXAMPLE|REMARK|NNOTE|THEOREM|AXIOM|LEMMA|PROPERTY|COROLLARY|PROPOSITION|CLAIM|PROBLEM|QUESTION|CONJECTURE|PROOF|SOLUTION|ANSWER|ANALYSIS)[.:](.*?)(\n(?=\n)|\Z)/mi do
      css_class = $1.downcase
      "\\begin{#{css_class}}\n#{$2.strip}\n\\end{#{css_class}}\n"
    end
  end
  replace(@string) do
    s /^(PART)[.:](.*?)(\n(?=\n)|\Z)/mi do
      "\\#{$1.downcase}{#{$2.strip}}\n"
    end
  end
  self
end

#titleObject

转换 YAML 标题信息 (通过验证, 危险等级: 0)



334
335
336
337
338
339
340
341
342
# File 'lib/replace.rb', line 334

def title
  replace(@string) do
    s /\A^-{3,}\r?\n(.*?)^-{3,}\r?\n/m do
      doc = YAML::load($1)
      "# #{doc['title']}\n\n" if doc['title']
    end
  end
  self
end

#treeObject

处理 Shell 命令 tree 的输出 (通过验证, 危险等级: 0)



71
72
73
74
75
76
77
78
# File 'lib/replace.rb', line 71

def tree
  replace(@string) do
    s /[│├]/, '|'
    s /[└]/, '\\'
    s /[─]/, '-'
  end
  self
end

#tw2sObject

台湾正体到简体 brew install opencc sudo gem install ropencc



420
421
422
423
424
# File 'lib/replace.rb', line 420

def tw2s
  converter = Ropencc.open('tw2s.json')
  @string = converter.convert(@string)
  self
end