Module: MdToNotion::Tokens
- Included in:
- Lexer
- Defined in:
- lib/md_to_notion/tokens.rb
Constant Summary collapse
- HEADING_1 =
/^# (.+)/.freeze
- HEADING_2 =
/^## (.+)/.freeze
- HEADING_3 =
/^### (.+)/.freeze
- CODE_BLOCK =
/^```(?:[^\n]+\n)?(.+?)\n```$/m.freeze
- BULLET_LIST =
/^- (.+)/.freeze
- NUMBERED_LIST =
/^([0-9]+)\. (.+)/.freeze
- IMAGE =
/!\[([^\]]+)\]\(([^)]+)\)/.freeze
- QUOTE =
/^> (.+)/.freeze
- GH_EMBED_FILE =
%r{https://user-images\.githubusercontent\.com/.+\.[a-zA-Z]+}.freeze
- EMBED_FILE_REGEXES =
[GH_EMBED_FILE].freeze
Instance Method Summary collapse
- #bold(match) ⇒ Object
- #bullet_list(match, nesting: 0) ⇒ Object
- #code(text) ⇒ Object
- #code_block(match) ⇒ Object
- #embeded_file(match) ⇒ Object
- #heading_1(match) ⇒ Object
- #heading_2(match) ⇒ Object
- #heading_3(match) ⇒ Object
- #image(match) ⇒ Object
- #italic(match) ⇒ Object
- #numbered_list(match, nesting: 0) ⇒ Object
- #paragraph(match) ⇒ Object
- #quote(match) ⇒ Object
- #strikethrough(match) ⇒ Object
- #text(match) ⇒ Object
-
#tokenize_rich_text(text) ⇒ Object
rich text objects.
Instance Method Details
#bold(match) ⇒ Object
106 107 108 |
# File 'lib/md_to_notion/tokens.rb', line 106 def bold(match) { type: :bold, text: match.gsub(/\*/, "") } end |
#bullet_list(match, nesting: 0) ⇒ Object
36 37 38 39 40 41 42 |
# File 'lib/md_to_notion/tokens.rb', line 36 def bullet_list(match, nesting: 0) { type: :bullet_list, rich_texts: tokenize_rich_text(match.gsub(/^- /, "")), nesting: nesting } end |
#code(text) ⇒ Object
98 99 100 |
# File 'lib/md_to_notion/tokens.rb', line 98 def code(text) { type: :code, text: text.gsub(/^`/, "").gsub(/`$/, "") } end |
#code_block(match) ⇒ Object
28 29 30 31 32 33 34 |
# File 'lib/md_to_notion/tokens.rb', line 28 def code_block(match) { type: :code_block, text: match.gsub(/^```[^\n]*\n/, "").gsub(/\n```$/, ""), lang: match.gsub(/^```/, "").gsub(/\n.+$/m, "") } end |
#embeded_file(match) ⇒ Object
68 69 70 71 72 73 |
# File 'lib/md_to_notion/tokens.rb', line 68 def (match) { type: :embeded_file, url: match } end |
#heading_1(match) ⇒ Object
16 17 18 |
# File 'lib/md_to_notion/tokens.rb', line 16 def heading_1(match) { type: :heading_1, rich_texts: tokenize_rich_text(match.gsub(/^# /, "")) } end |
#heading_2(match) ⇒ Object
20 21 22 |
# File 'lib/md_to_notion/tokens.rb', line 20 def heading_2(match) { type: :heading_2, rich_texts: tokenize_rich_text(match.gsub(/^## /, "")) } end |
#heading_3(match) ⇒ Object
24 25 26 |
# File 'lib/md_to_notion/tokens.rb', line 24 def heading_3(match) { type: :heading_3, rich_texts: tokenize_rich_text(match.gsub(/^### /, "")) } end |
#image(match) ⇒ Object
53 54 55 56 57 58 |
# File 'lib/md_to_notion/tokens.rb', line 53 def image(match) { type: :image, url: match.gsub(/!\[([^\]]+)\]\(([^)]+)\)/, '\2') } end |
#italic(match) ⇒ Object
102 103 104 |
# File 'lib/md_to_notion/tokens.rb', line 102 def italic(match) { type: :italic, text: match.gsub(/\*/, "") } end |
#numbered_list(match, nesting: 0) ⇒ Object
44 45 46 47 48 49 50 51 |
# File 'lib/md_to_notion/tokens.rb', line 44 def numbered_list(match, nesting: 0) { type: :numbered_list, rich_texts: tokenize_rich_text(match.gsub(/^[0-9]+\. /, "")), number: match.gsub(/\..+$/, "").to_i, nesting: nesting } end |
#paragraph(match) ⇒ Object
60 61 62 |
# File 'lib/md_to_notion/tokens.rb', line 60 def paragraph(match) { type: :paragraph, rich_texts: tokenize_rich_text(match) } end |
#quote(match) ⇒ Object
64 65 66 |
# File 'lib/md_to_notion/tokens.rb', line 64 def quote(match) { type: :quote, rich_texts: tokenize_rich_text(match.gsub(/^> /, "")) } end |
#strikethrough(match) ⇒ Object
114 115 116 |
# File 'lib/md_to_notion/tokens.rb', line 114 def strikethrough(match) { type: :strikethrough, text: match.gsub(/~~/, "") } end |
#text(match) ⇒ Object
110 111 112 |
# File 'lib/md_to_notion/tokens.rb', line 110 def text(match) { type: :text, text: match } end |
#tokenize_rich_text(text) ⇒ Object
rich text objects
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/md_to_notion/tokens.rb', line 77 def tokenize_rich_text(text) # use a regular expression to capture all the rich text elements and the text between them as separate groups groups = text.scan(/(`[^`]*`|\*\*[^*]*\*\*|\*[^*]*\*|~~[^~]*~~|[^`*~]+)/).flatten # map the groups to tokens groups.map do |group| case group when /^`/ code(group) when /^\*\*/ bold(group) when /^\*/ italic(group) when /^~~/ strikethrough(group) else text(group) end end end |