Module: MdToNotion::Tokens

Included in:
Lexer
Defined in:
lib/md_to_notion/tokens.rb

Constant Summary collapse

HEADING_1 =
/^# (.+)/.freeze
HEADING_2 =
/^## (.+)/.freeze
HEADING_3 =
/^### (.+)/.freeze
CODE_BLOCK =
/^```(?:[^\n]+\n)?(.+?)\n```$/m.freeze
BULLET_LIST =
/^- (.+)/.freeze
NUMBERED_LIST =
/^([0-9]+)\. (.+)/.freeze
IMAGE =
/!\[([^\]]+)\]\(([^)]+)\)/.freeze
QUOTE =
/^> (.+)/.freeze
GH_EMBED_FILE =
%r{https://user-images\.githubusercontent\.com/.+\.[a-zA-Z]+}.freeze
EMBED_FILE_REGEXES =
[GH_EMBED_FILE].freeze

Instance Method Summary collapse

Instance Method Details

#bold(match) ⇒ Object



106
107
108
# File 'lib/md_to_notion/tokens.rb', line 106

def bold(match)
  { type: :bold, text: match.gsub(/\*/, "") }
end

#bullet_list(match, nesting: 0) ⇒ Object



36
37
38
39
40
41
42
# File 'lib/md_to_notion/tokens.rb', line 36

def bullet_list(match, nesting: 0)
  {
    type: :bullet_list,
    rich_texts: tokenize_rich_text(match.gsub(/^- /, "")),
    nesting: nesting
  }
end

#code(text) ⇒ Object



98
99
100
# File 'lib/md_to_notion/tokens.rb', line 98

def code(text)
  { type: :code, text: text.gsub(/^`/, "").gsub(/`$/, "") }
end

#code_block(match) ⇒ Object



28
29
30
31
32
33
34
# File 'lib/md_to_notion/tokens.rb', line 28

def code_block(match)
  {
    type: :code_block,
    text: match.gsub(/^```[^\n]*\n/, "").gsub(/\n```$/, ""),
    lang: match.gsub(/^```/, "").gsub(/\n.+$/m, "")
  }
end

#embeded_file(match) ⇒ Object



68
69
70
71
72
73
# File 'lib/md_to_notion/tokens.rb', line 68

def embeded_file(match)
  {
    type: :embeded_file,
    url: match
  }
end

#heading_1(match) ⇒ Object



16
17
18
# File 'lib/md_to_notion/tokens.rb', line 16

def heading_1(match)
  { type: :heading_1, rich_texts: tokenize_rich_text(match.gsub(/^# /, "")) }
end

#heading_2(match) ⇒ Object



20
21
22
# File 'lib/md_to_notion/tokens.rb', line 20

def heading_2(match)
  { type: :heading_2, rich_texts: tokenize_rich_text(match.gsub(/^## /, "")) }
end

#heading_3(match) ⇒ Object



24
25
26
# File 'lib/md_to_notion/tokens.rb', line 24

def heading_3(match)
  { type: :heading_3, rich_texts: tokenize_rich_text(match.gsub(/^### /, "")) }
end

#image(match) ⇒ Object



53
54
55
56
57
58
# File 'lib/md_to_notion/tokens.rb', line 53

def image(match)
  {
    type: :image,
    url: match.gsub(/!\[([^\]]+)\]\(([^)]+)\)/, '\2')
  }
end

#italic(match) ⇒ Object



102
103
104
# File 'lib/md_to_notion/tokens.rb', line 102

def italic(match)
  { type: :italic, text: match.gsub(/\*/, "") }
end

#numbered_list(match, nesting: 0) ⇒ Object



44
45
46
47
48
49
50
51
# File 'lib/md_to_notion/tokens.rb', line 44

def numbered_list(match, nesting: 0)
  {
    type: :numbered_list,
    rich_texts: tokenize_rich_text(match.gsub(/^[0-9]+\. /, "")),
    number: match.gsub(/\..+$/, "").to_i,
    nesting: nesting
  }
end

#paragraph(match) ⇒ Object



60
61
62
# File 'lib/md_to_notion/tokens.rb', line 60

def paragraph(match)
  { type: :paragraph, rich_texts: tokenize_rich_text(match) }
end

#quote(match) ⇒ Object



64
65
66
# File 'lib/md_to_notion/tokens.rb', line 64

def quote(match)
  { type: :quote, rich_texts: tokenize_rich_text(match.gsub(/^> /, "")) }
end

#strikethrough(match) ⇒ Object



114
115
116
# File 'lib/md_to_notion/tokens.rb', line 114

def strikethrough(match)
  { type: :strikethrough, text: match.gsub(/~~/, "") }
end

#text(match) ⇒ Object



110
111
112
# File 'lib/md_to_notion/tokens.rb', line 110

def text(match)
  { type: :text, text: match }
end

#tokenize_rich_text(text) ⇒ Object

rich text objects



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/md_to_notion/tokens.rb', line 77

def tokenize_rich_text(text)
  # use a regular expression to capture all the rich text elements and the text between them as separate groups
  groups = text.scan(/(`[^`]*`|\*\*[^*]*\*\*|\*[^*]*\*|~~[^~]*~~|[^`*~]+)/).flatten

  # map the groups to tokens
  groups.map do |group|
    case group
    when /^`/
      code(group)
    when /^\*\*/
      bold(group)
    when /^\*/
      italic(group)
    when /^~~/
      strikethrough(group)
    else
      text(group)
    end
  end
end