Module: PragmaticSegmenter::Cleaner::Rules
- Included in:
- PragmaticSegmenter::Cleaner
- Defined in:
- lib/pragmatic_segmenter/cleaner/rules.rb
Defined Under Namespace
Constant Summary collapse
- NewLineInMiddleOfWordRule =
Rubular: rubular.com/r/V57WnM9Zut
Rule.new(/\n(?=[a-zA-Z]{1,2}\n)/, '')
- DoubleNewLineWithSpaceRule =
Rubular: rubular.com/r/dMxp5MixFS
Rule.new(/\n \n/, "\r")
- DoubleNewLineRule =
Rubular: rubular.com/r/H6HOJeA8bq
Rule.new(/\n\n/, "\r")
- NewLineFollowedByPeriodRule =
Rubular: rubular.com/r/FseyMiiYFT
Rule.new(/\n(?=\.(\s|\n))/, '')
- ReplaceNewlineWithCarriageReturnRule =
Rule.new(/\n/, "\r")
- EscapedNewLineRule =
Rule.new(/\\n/, "\n")
- EscapedCarriageReturnRule =
Rule.new(/\\r/, "\r")
- TypoEscapedNewLineRule =
Rule.new(/\\\ n/, "\n")
- TypoEscapedCarriageReturnRule =
Rule.new(/\\\ r/, "\r")
- InlineFormattingRule =
Rubular: rubular.com/r/bAJrhyLNeZ
Rule.new(/\{b\^>\d*<b\^\}|\{b\^>\d*<b\^\}/, '')
- TableOfContentsRule =
Rubular: rubular.com/r/8mc1ArOIGy
Rule.new(/\.{5,}\s*\d+-*\d*/, "\r")
- ConsecutivePeriodsRule =
Rubular: rubular.com/r/DwNSuZrNtk
Rule.new(/\.{5,}/, ' ')
- ConsecutiveForwardSlashRule =
Rubular: rubular.com/r/IQ4TPfsbd8
Rule.new(/\/{3}/, '')
- NO_SPACE_BETWEEN_SENTENCES_REGEX =
Rubular: rubular.com/r/6dt98uI76u
/(?<=[a-z])\.(?=[A-Z])/
- NoSpaceBetweenSentencesRule =
Rule.new(NO_SPACE_BETWEEN_SENTENCES_REGEX, '. ')
- NO_SPACE_BETWEEN_SENTENCES_DIGIT_REGEX =
Rubular: rubular.com/r/l6KN6rH5XE
/(?<=\d)\.(?=[A-Z])/
- NoSpaceBetweenSentencesDigitRule =
Rule.new(NO_SPACE_BETWEEN_SENTENCES_DIGIT_REGEX, '. ')
- URL_EMAIL_KEYWORDS =
['@', 'http', '.com', 'net', 'www', '//']
- NEWLINE_IN_MIDDLE_OF_SENTENCE_REGEX =
Rubular: rubular.com/r/3GiRiP2IbD
/(?<=\s)\n(?=([a-z]|\())/
- NewLineFollowedByBulletRule =
Rubular: rubular.com/r/Gn18aAnLdZ
Rule.new(/\n(?=•)/, "\r")
- QuotationsFirstRule =
Rule.new(/''/, '"')
- QuotationsSecondRule =
Rule.new(/``/, '"')