Class: Khaleesi::Generator

Inherits:
Object
  • Object
show all
Defined in:
lib/khaleesi/generator.rb

Defined Under Namespace

Classes: HTML

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Generator

The constructor accepts all settings then keep them as fields, lively in whole processing job.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/khaleesi/generator.rb', line 5

def initialize(opts={})
  # source directory path (must absolutely).
  @src_dir = opts[:src_dir].to_s

  # destination directory path (must absolutely).
  @dest_dir = opts[:dest_dir].to_s

  # setting to tell syntax highlighter output line numbers.
  $line_numbers = opts[:line_numbers].to_s.eql?('true')

  # a css class name which developer wants to customizable.
  $css_class = opts[:css_class] || 'highlight'

  # a full time pattern used to including date and time like '2014-08-22 16:45'.
  # see http://www.ruby-doc.org/core-2.1.2/Time.html#strftime-method for pattern details.
  @time_pattern = opts[:time_pattern] || '%a %e %b %H:%M %Y'

  # a short time pattern used to display only date like '2014-08-22'.
  @date_pattern = opts[:date_pattern] || '%F'

  # we just pick on those pages who changed but haven't commit
  # to git repository to generate, ignore the unchanged pages.
  # this action could be a huge benefit when you were creating
  # a new page and you want just to focusing that page at all.
  @diff_plus = opts[:diff_plus].to_s.eql?('true')

  # indicating which syntax highlighter would be used, default is Rouge.
  $use_pygments = opts[:highlighter].to_s.eql?('pygments')

  # specify which headers will generate a "Table of Contents" id, leave empty means disable TOC generation.
  $toc_selection = opts[:toc_selection].to_s

  @decrt_regexp = produce_variable_regex('decorator')
  @title_regexp = produce_variable_regex('title')
  @var_regexp = /(\p{Word}+):(\p{Word}+)/
  @doc_regexp = /^‡{6,}$/

  @page_dir = "#{@src_dir}/_pages/"

  # a cascaded variable stack that storing a set of page's variable while generating,
  # able for each handling page to grab it parent's variable and parent's variable.
  @variable_stack = Array.new

  # a queue that storing valid pages, use to avoid invalid page(decorator file)
  # influence the page link, page times generation.
  @page_stack = Array.new
end

Class Method Details

.fetch_create_time(page_file) ⇒ Object



438
439
440
441
# File 'lib/khaleesi/generator.rb', line 438

def self.fetch_create_time(page_file)
  # fetch the first Git versioned time as create time.
  fetch_git_time(page_file, 'tail')
end

.fetch_git_time(page_file, cmd) ⇒ Object

Enter into the file container and take the Git time, if something wrong with executing(Git didn’t install?), we’ll use current time as replacement.



451
452
453
454
455
456
457
458
459
460
461
# File 'lib/khaleesi/generator.rb', line 451

def self.fetch_git_time(page_file, cmd)
  Dir.chdir(File.expand_path('..', page_file)) do
    commit_time = %x[git log --date=iso --pretty='%cd' #{File.basename(page_file)} 2>&1 | #{cmd} -1]
    begin
      # the rightful time looks like this : "2014-08-18 18:44:41 +0800"
      Time.parse(commit_time)
    rescue
      Time.now
    end
  end
end

.fetch_modify_time(page_file) ⇒ Object



443
444
445
446
# File 'lib/khaleesi/generator.rb', line 443

def self.fetch_modify_time(page_file)
  # fetch the last Git versioned time as modify time.
  fetch_git_time(page_file, 'head')
end


428
429
430
431
432
433
434
435
436
# File 'lib/khaleesi/generator.rb', line 428

def self.format_as_legal_link(text)
  text.strip!
  # delete else of characters if not [alpha, number, whitespace, dashes, underscore].
  text.gsub!(/[^0-9a-z \-_]/i, '')
  # replace whitespace to dashes.
  text.gsub!(' ', '-')
  text.squeeze!('-')
  text.downcase!
end

.humanize(secs) ⇒ Object



571
572
573
574
575
576
577
578
579
# File 'lib/khaleesi/generator.rb', line 571

def self.humanize(secs) # http://stackoverflow.com/a/4136485/1294681
  secs = secs * 1000
  [[1000, :milliseconds], [60, :seconds], [60, :minutes]].map { |count, name|
    if secs > 0
      secs, n = secs.divmod(count)
      n.to_i > 0 ? "#{n.to_i} #{name}" : ''
    end
  }.compact.reverse.join(' ').squeeze(' ').strip
end

Instance Method Details

#extract_page_structure(page_file) ⇒ Object

Split by separators, extract page’s variables and content.



388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/khaleesi/generator.rb', line 388

def extract_page_structure(page_file)
  begin
    document = IO.read(page_file)
  rescue Exception => e
    puts e.message
    document = ''
  end

  index = document.index(@doc_regexp).to_i
  if index > 0
    @variable_stack.push(document[0, index])
    document[index..-1].sub(@doc_regexp, '').strip
  else
    # we must hold the variable stack.
    @variable_stack.push(nil)
    document
  end
end


407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
# File 'lib/khaleesi/generator.rb', line 407

def gen_link(page_path, variables)
  # only generate link for title-present page.
  title = variables[@title_regexp, 3] if variables
  return unless title

  relative_path = File.dirname(page_path[/(\p{Graph}+)\/_pages(\p{Graph}+)/, 2])
  relative_path << '/' unless relative_path.end_with? '/'

  # fetch and use the pre-define page name if legal.
  page_name = variables[produce_variable_regex('slug'), 3]
  return File.expand_path(relative_path << page_name) unless page_name.strip.empty? if page_name

  # we shall use the page title to generating a link.
  Generator.format_as_legal_link(title)

  # may hunting down all title's characters, use file name as safe way.
  title = File.basename(page_path, '.*') if title.empty?

  File.expand_path(relative_path << title << '.html')
end

#generateObject

Main entry of Generator that generates all the pages of the site, it scan the source directory files that obey the rule of page, evaluates and applies all predefine logical, writes the final content into destination directory cascaded.



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/khaleesi/generator.rb', line 57

def generate
  start_time = Time.now

  Dir.glob("#{@page_dir}/**/*") do |page_file|
    next unless File.readable? page_file
    next unless is_valid_file page_file

    $toc_index = 0
    @page_stack.clear
    @page_stack.push File.expand_path(page_file)
    single_start_time = Time.now

    if @diff_plus
      file_status = nil
      base_name = File.basename(page_file)
      Dir.chdir(File.expand_path('..', page_file)) do
        file_status = %x[git status -s #{base_name} 2>&1]
      end
      file_status = file_status.to_s.strip

      # only haven't commit pages available, Git will return nothing if page committed.
      next if file_status.empty?

      # a correct message from Git should included the file name, may occur errors
      # in command running such as Git didn't install if not include.
      unless file_status.include? base_name
        puts file_status
        next
      end
    end

    extract_page_structure(page_file)

    variables = @variable_stack.pop
    # page can't stand without decorator
    next unless variables and variables[@decrt_regexp, 3]

    # isn't legal page if title missing
    next unless variables[@title_regexp, 3]

    content = is_html_file(page_file) ? parse_html_file(page_file) : parse_markdown_file(page_file)

    page_path = File.expand_path(@dest_dir + gen_link(page_file, variables))
    page_dir_path = File.dirname(page_path)
    unless File.directory?(page_dir_path)
      FileUtils.mkdir_p(page_dir_path)
    end

    bytes = IO.write(page_path, content)
    puts "Done (#{Generator.humanize(Time.now - single_start_time)}) => '#{page_path}' bytes[#{bytes}]."
  end

  puts "Generator time elapsed : #{Generator.humanize(Time.now - start_time)}."
end

#handle_chain_snippet(chain_snippet) ⇒ Object

Chain, just as its name meaning, we take the previous or next page from the ordered list which same of foreach snippet, of course that list contained current page we just generating on, so we took the near item for it, just make it like a chain.

examples :

#if chain:prev($theme)

<div class="prev">Prev Theme : <a href="${theme:link}">${theme:title}</a></div>

#end

#if chain:next($theme)

<div class="next">Next Theme : <a href="${theme:link}">${theme:title}</a></div>

#end



342
343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/khaleesi/generator.rb', line 342

def handle_chain_snippet(chain_snippet)
  cmd = chain_snippet[2]
  var_name = chain_snippet[3]
  loop_body = chain_snippet[4]

  page_ary = take_page_array(File.expand_path('..', @page_stack.first))
  page_ary.each_with_index do |page, index|
    next unless page.to_s.eql? @page_stack.first

    page = cmd.eql?('prev') ? page_ary.prev(index) : page_ary.next(index)
    return page ? handle_snippet_page(page, loop_body, var_name) : nil
  end
  nil
end

#handle_foreach_snippet(foreach_snippet) ⇒ Object

Foreach loop was design for traversal all files of directory which inside the “_pages” directory, each time through the loop, the segment who planning to repeat would be evaluate and output as parsed text. at the beginning, we’ll gather all files and sort by sequence or create time finally produce an ordered list. NOTE: sub-directory writing was acceptable, also apply order-by-limit mode like SQL to manipulate that list.

examples :

loop the whole list : <ul>

#foreach ($theme : $themes)
  <li>${theme:name}</li>
  <li>${theme:description}</li>
#end

</ul>

loop the whole list but sortby descending and limit 5 items. <ul>

#foreach ($theme : $themes desc 5)
  <li>${theme:name}</li>
  <li>${theme:description}</li>
#end

</ul>



306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
# File 'lib/khaleesi/generator.rb', line 306

def handle_foreach_snippet(foreach_snippet)
  dir_path = foreach_snippet[3].prepend(@page_dir)
  return unless Dir.exists? dir_path

  loop_body = foreach_snippet[6]
  var_name = foreach_snippet[2]
  order_by = foreach_snippet[4]
  limit = foreach_snippet[5].to_i
  limit = -1 if limit == 0

  page_ary = take_page_array(dir_path)
  # if sub-term enable descending order, we'll reversing the page stack.
  page_ary.reverse! if order_by.eql?('desc')

  parsed_body = ''
  page_ary.each_with_index do |page, index|
    # abort loop if has limitation.
    break if index == limit
    parsed_body << handle_snippet_page(page, loop_body, var_name)
  end
  parsed_body
end

#handle_html_content(html_content, added_scope = nil) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/khaleesi/generator.rb', line 166

def handle_html_content(html_content, added_scope=nil)
  page_file = @page_stack.last
  parsed_text = ''
  sub_script = ''

  # char by char to evaluate html content.
  html_content.each_char do |char|
    is_valid = sub_script.start_with?('${')
    case char
      when '$'
        # if met the variable expression beginner, we'll append precede characters to parsed_text
        # so the invalid part of expression still output as usual text rather than erase them.
        parsed_text << sub_script unless sub_script.empty?
        sub_script.clear << char

      when '{', ':'
        is_valid = sub_script.eql? '$' if char == '{'
        is_valid = is_valid && sub_script.length > 3 if char == ':'
        if is_valid
          sub_script << char
        else
          parsed_text << sub_script << char
          sub_script.clear
        end

      when '}'
        is_valid = is_valid && sub_script.length > 4
        sub_script << char
        if is_valid

          # parsing variable expressions such as :
          # ${variable:title}, ${variable:description}, ${custom_scope:custom_value} etc.
          form_scope = sub_script[@var_regexp, 1]
          form_value = sub_script[@var_regexp, 2]

          case form_scope
            when 'variable', added_scope

              case form_value
                when 'createtime'
                  create_time = Generator.fetch_create_time(page_file)
                  parsed_text << (create_time ? create_time.strftime(@time_pattern) : sub_script)

                when 'createdate'
                  create_time = Generator.fetch_create_time(page_file)
                  parsed_text << (create_time ? create_time.strftime(@date_pattern) : sub_script)

                when 'modifytime'
                  modify_time = Generator.fetch_modify_time(page_file)
                  parsed_text << (modify_time ? modify_time.strftime(@time_pattern) : sub_script)

                when 'modifydate'
                  modify_time = Generator.fetch_modify_time(page_file)
                  parsed_text << (modify_time ? modify_time.strftime(@date_pattern) : sub_script)

                when 'link'
                  page_link = gen_link(page_file, @variable_stack.last)
                  parsed_text << (page_link ? page_link : sub_script)

                else
                  text = nil
                  if form_value.eql?('content') and form_scope.eql?(added_scope)
                    text = parse_html_file(page_file) if is_html_file(page_file)
                    text = parse_markdown_file(page_file) if is_markdown_file(page_file)

                  else
                    regexp = /^#{form_value}(\p{Blank}?):(.+)$/
                    @variable_stack.reverse_each do |var|
                      text = var[regexp, 2] if var
                      break if text
                    end

                  end

                  parsed_text << (text ? text.strip : sub_script)

              end

            when 'page'
              match_page = nil
              Dir.glob("#{@page_dir}/**/#{form_value}.*") do |inner_page|
                match_page = inner_page
                break
              end

              if is_html_file(match_page)
                @page_stack.push match_page
                inc_content = parse_html_file(match_page)
                @page_stack.pop
              end
              inc_content = parse_markdown_file(match_page) if is_markdown_file(match_page)

              parsed_text << (inc_content ? inc_content : sub_script)

            else
              parsed_text << sub_script
          end

        else
          parsed_text << sub_script
        end

        sub_script.clear

      else
        is_valid = is_valid && char.index(/\p{Graph}/)
        if is_valid
          sub_script << char
        else
          parsed_text << sub_script << char
          sub_script.clear
        end
    end
  end

  parsed_text
end

#handle_markdown(text) ⇒ Object



549
550
551
552
553
# File 'lib/khaleesi/generator.rb', line 549

def handle_markdown(text)
  return '' if text.to_s.empty?
  markdown = Redcarpet::Markdown.new(HTML, fenced_code_blocks: true, autolink: true, no_intra_emphasis: true, strikethrough: true, tables: true)
  markdown.render(text)
end

#handle_snippet_page(page, loop_body, var_name) ⇒ Object



357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/khaleesi/generator.rb', line 357

def handle_snippet_page(page, loop_body, var_name)
  # make current page properties occupy atop for two stacks while processing such sub-level files.
  @variable_stack.push(page.instance_variable_get(:@page_variables))
  @page_stack.push page.to_s

  parsed_body = handle_html_content(loop_body, var_name)

  # abandon that properties immediately.
  @variable_stack.pop
  @page_stack.pop

  parsed_body
end

#is_html_file(file_path) ⇒ Object



567
568
569
# File 'lib/khaleesi/generator.rb', line 567

def is_html_file(file_path)
  file_path and file_path.end_with? '.html'
end

#is_markdown_file(file_path) ⇒ Object



563
564
565
# File 'lib/khaleesi/generator.rb', line 563

def is_markdown_file(file_path)
  file_path and file_path.end_with? '.md'
end

#is_valid_file(file_path) ⇒ Object



559
560
561
# File 'lib/khaleesi/generator.rb', line 559

def is_valid_file(file_path)
  is_markdown_file(file_path) or is_html_file(file_path)
end

#parse_decorator_file(bore_content) ⇒ Object



119
120
121
122
123
# File 'lib/khaleesi/generator.rb', line 119

def parse_decorator_file(bore_content)
  variables = @variable_stack.last
  decorator = variables ? variables[@decrt_regexp, 3] : nil
  decorator ? parse_html_file("#{@src_dir}/_decorators/#{decorator.strip}.html", bore_content) : bore_content
end

#parse_html_content(html_content, bore_content) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/khaleesi/generator.rb', line 135

def parse_html_content(html_content, bore_content)
  # http://www.ruby-doc.org/core-2.1.0/Regexp.html#class-Regexp-label-Repetition use '.+?' to disable greedy match.
  regexp = /(#foreach\p{Blank}?\(\$(\p{Graph}+)\p{Blank}?:\p{Blank}?\$(\p{Graph}+)\p{Blank}?(asc|desc)?\p{Blank}?(\d*)\)(.+?)#end)/m
  while (foreach_snippet = html_content.match(regexp))
    foreach_snippet = handle_foreach_snippet(foreach_snippet)

    # because the Regexp cannot skip a unhandled foreach snippet, so we claim every
    # snippet must done successfully, and if not, we shall use blank instead.
    html_content.sub!(regexp, foreach_snippet.to_s)
  end


  regexp = /(#if\p{Blank}chain:(prev|next)\(\$(\p{Graph}+)\)(.+?)#end)/m
  while (chain_snippet = html_content.match(regexp))
    chain_snippet = handle_chain_snippet(chain_snippet)
    html_content.sub!(regexp, chain_snippet.to_s)
  end


  # handle the html content after foreach and chain logical, to avoid that
  # logical included after this handle, such as including markdown files.
  html_content = handle_html_content(html_content)


  # we deal with decorator's content at final because it may slow down
  # the process even cause errors for the "foreach" and "chain" scripts.
  html_content.sub!(/\$\{decorator:content}/, bore_content) if bore_content

  html_content
end

#parse_html_file(file_path, bore_content = nil) ⇒ Object



125
126
127
128
129
130
131
132
133
# File 'lib/khaleesi/generator.rb', line 125

def parse_html_file(file_path, bore_content=nil)
  content = extract_page_structure(file_path)

  content = parse_html_content(content.to_s, bore_content)
  content = parse_decorator_file(content) # recurse parse

  @variable_stack.pop
  content
end

#parse_markdown_file(file_path) ⇒ Object



112
113
114
115
116
117
# File 'lib/khaleesi/generator.rb', line 112

def parse_markdown_file(file_path)
  content = extract_page_structure(file_path)
  content = parse_decorator_file(handle_markdown(content))
  @variable_stack.pop
  content
end

#produce_variable_regex(var_name) ⇒ Object



555
556
557
# File 'lib/khaleesi/generator.rb', line 555

def produce_variable_regex(var_name)
  /^#{var_name}(\p{Blank}?):(\p{Blank}?)(.+)$/
end

#take_page_array(dir_path) ⇒ Object

Search that directory and it’s sub-directories, collecting all valid files, then sorting by sequence or create time before return.



373
374
375
376
377
378
379
380
381
382
383
384
385
# File 'lib/khaleesi/generator.rb', line 373

def take_page_array(dir_path)
  page_ary = Array.new
  Dir.glob("#{dir_path}/**/*") do |page_file|
    next unless is_valid_file(page_file)

    extract_page_structure(page_file)
    page_ary.push Page.new(page_file, @variable_stack.pop)
  end

  page_ary.sort! do |left, right|
    right <=> left
  end
end