Class: EventDb::EventReader::MarkdownParser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/eventdb/reader.rb

Constant Summary collapse

MONTH_EN_TO_MM =
{
'Jan' => '1',
'Feb' => '2',
'Mar' => '3', 'March' => '3',
'Apr' => '4', 'April' => '4',
'May' => '5',
'Jun' => '6', 'June' => '6',
'Jul' => '7', 'July' => '7',
'Aug' => '8',
'Sep' => '9', 'Sept' => '9',
'Oct' => '10',
'Nov' => '11',
'Dec' => '12' }
MONTH_EN =

e.g. ‘Jan|Feb|March|Mar|…’

MONTH_EN_TO_MM.keys.join('|')
DATE_ENTRY_RE =

examples:

  • 2015 @ Salzburg, Austria; Oct/17+18

  • 2015 @ Brussels / Brussel / Bruxelles; Jan/31+Feb/1

  • 2014 @ Porto de Galinhas, Pernambuco; Apr/24-27 (formerly: Abril Pro Ruby)

/(?<year>20\d\d)   ## year
\s+
 @            ## at location
\s+
[^;]+        ##  use ; as separator between place and date
;
\s+
(?<start_month_en>#{MONTH_EN})
\/
(?<start_day>[0-9]{1,2})          ## start date
(?:
  [+\-]     ## use + for two days, - for more than two days
  (?:
    (?<end_month_en>#{MONTH_EN})
    \/
  )?   ## optional end_month
  (?<end_day>[0-9]{1,2})
)? ## optional end_date
/x
LINK_ENTRY_RE =

example:

/\[
    [^\]]+
  \]
  \(
   [^\)]+
  \)
/x

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ MarkdownParser

Returns a new instance of MarkdownParser.



151
152
153
# File 'lib/eventdb/reader.rb', line 151

def initialize( text )
  @text = text
end

Class Method Details

.parse(text) ⇒ Object



146
# File 'lib/eventdb/reader.rb', line 146

def self.parse( text )  new( text ).parse; end

Instance Method Details

helper



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/eventdb/reader.rb', line 326

def find_title_and_link( line )
  title = nil
  link  = nil

    ## note: extract title and link from line

    ### 1) try "new" format first e.g.
    ##    - **European Ruby Konference - EuRuKo** (web: [euruko.org](http://euruko.org), t: [euruko](https://twitter.com/euruko)) - _since 2003_
    if m = (line =~ /^\*{2}([^*]+)\*{2}/)   ## note: **title** must start line
      title = $1
      puts "  adding (new/modern format) => #{title}"
    ## 2) try "old" classic format - get title from first (markdown) link e.g.
    ##   - [Oktoberfest ("Die Wiesn")](http://www.muenchen.de/veranstaltungen/oktoberfest.html)
    elsif m = (line =~ /^\[([^\]]+)\]/)    ## note: [title](link) must start line
      title = $1
      puts "  adding (old/classic format) => #{title}"
    else
      puts "*** !! ERROR !!: cannot find event title in #{line}"
      exit 1
    end

    ## try extract link - use first (markdown) link
    ##   todo/fix: use shared markdown link regex!!!!!
    if m = (line =~ /\[[^\]]+\]\(([^\)]+)\)/)
      link = $1
      puts "                               => @ #{link}"
    else
      link = nil
      puts "*** !! WARN !!: cannot find event link in #{line}"
    end

    [title,link]
end

#parseObject



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# File 'lib/eventdb/reader.rb', line 210

def parse
  events = []
  stack  = []   ## header/heading stack;  note: last_stack is stack.size; starts w/ 0

  last_link_entry = nil


  nodes = OutlineReader.parse( @text )
  nodes.each do |node|

    if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
      heading = node[1]
      #  stop when hitting >## More< or <Calendar> or <Thanks> etc. section
      #   note: must escape # e.g. #{2,} must be escaped to \#{2,}
      break if heading =~ /^(More|Calendar|Thanks|Meta)\b/

      # skip "pseudo" headings (for contribs etc.)
      ##  e.g. #### _Contributions welcome. Anything missing? Send in a pull request. Thanks._
      next if heading =~ /Contributions welcome\.|Anything Missing\?/


      level = node[0][1].to_i

        logger.debug " heading level: #{level}, title: >#{heading}<"

        level_diff = level - stack.size

        if level_diff > 0
          logger.debug "[EventReader]    up  +#{level_diff}"
          if level_diff > 1
            logger.error "fatal: level step must be one (+1) is +#{level_diff}"
            fail "[EventReader] level step must be one (+1) is +#{level_diff}"
          end
        elsif level_diff < 0
          logger.debug "[EventReader]    down #{level_diff}"
          level_diff.abs.times { stack.pop }
          stack.pop
        else
          ## same level
          stack.pop
        end
        stack.push( [level, heading] )
        logger.debug "  stack: #{stack.inspect}"

    elsif [:li].include?( node[0] )    ## list item
      line = node[1]

      if LINK_ENTRY_RE.match( line )
        logger.debug " link entry: #{line}"

        last_link_entry = line
      elsif m=DATE_ENTRY_RE.match( line )
        year           = m[:year]

        start_month_en = m[:start_month_en]
        start_day      = m[:start_day]

        start_month    = MONTH_EN_TO_MM[ start_month_en ]
        start_date = Date.new( year.to_i, start_month.to_i, start_day.to_i )


        end_month_en   = m[:end_month_en]
        end_month_en    = start_month_en    if end_month_en.nil? # no end month; use same as start

        end_day        = m[:end_day]
        end_day        = start_day   if end_day.nil?  # no end day; single day event (use start day)

        end_month     = MONTH_EN_TO_MM[ end_month_en ]
        end_date = Date.new( year.to_i, end_month.to_i, end_day.to_i )

        ## pp start_date

        logger.debug " date entry: #{line}"
        logger.debug "   start_date: #{start_date}, year: #{year}, start_month_en: #{start_month_en}, start_month: #{start_month} start_day: #{start_day} => #{last_link_entry}"
        logger.debug "   end_date: #{end_date}, end_month_en: #{end_month_en}, end_day_en: #{end_day}"


        s = StringScanner.new( line )
        s.skip_until( /@/ )

        place = s.scan( /[^;]+/ ) ## get place (everything until ; (separator))
        place = place.strip
        logger.debug "  place: #{place}, rest: >#{s.rest}<"

        ## todo/fix: make place uniform e.g. change
        ##    Vienna, Austria  =>
        ##    Vienna › Austria       - why? why not?

        ## note: cut of heading 1 (e.g. page title)
        more_places = stack[1..-1].reverse.map {|it| it[1] }.join(', ')     ## was: join(' › ')
        place = "#{place}, #{more_places}"
        logger.debug "  place: #{place}"


        title, link = find_title_and_link( last_link_entry )


        event = Event.new( title, link,
                           place,
                           start_date, end_date )
        ## pp event

        events << event
      else
        logger.debug "  *** skip list item line: #{line}"
      end
    else
      logger.debug "  *** skip node:"
      pp node
    end
  end

  events
end