Class: JekyllImageData::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/jekyll-image-data/crawler.rb

Instance Method Summary collapse

Constructor Details

#initializeCrawler

Returns a new instance of Crawler.



3
4
5
6
7
8
9
10
11
12
13
14
# File 'lib/jekyll-image-data/crawler.rb', line 3

def initialize
  src = %r{(?:https|http|mailto)?(?:\:/)?/\S+}
  alt = %r{[[[:alnum:]][[:space:]]`~¡!@\#\$%^&*\(\)+=\[\]\{\}\\\|;\:',\.¿\?/_-]+}

  md_image = %r{!\[(.*)\]\((.*)\)}
  md_image_ref = %r{!\[(.*)\]\[(.*)\]}
  html_image = %r{<img.*(src="(#{src})".*alt="(#{alt})"|alt="(#{alt})".*src="(#{src})"|src="(#{src})")}
  include_image = %r{\{\%\s*include\s*image.(liquid|html)\s*(src="(#{src})".*alt="(#{alt})"|alt="(#{alt})".*src="(#{src})")}

  @image = %r{#{md_image}|#{md_image_ref}|#{html_image}|#{include_image}}
  @image_ref = %r{\[(.*)\]:\s*(\S*)}
end

Instance Method Details

#crawl(content, config) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/jekyll-image-data/crawler.rb', line 16

def crawl(content, config)
  images = []
  exclude = config.dig("image_data", "exclude") || nil

  content.scan(@image) do |match|
    src = match[1] || match[5] || match[8] || match[9] || match[12] || match[15] || ""
    alt = match[0] || match[2] || match[6] || match[7] || match[13] || match[14] || ""
    ref = match[3] || ""
    images << { "url" => src, "alt" => alt, "ref" => ref }
  end

  content.scan(@image_ref) do |match|
    images.each do |image|
      image["url"] = match[1] if image["ref"] == match[0]
    end
  end

  images.each do |image|
    image.delete("ref")
    images.delete(image) if exclude and Regexp.new(exclude).match(image["url"])
  end

  images
end