Class: Publishr::HtmlProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/publishr/html_processor.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(markup = '', inpath = '', metadata = {}, rails_resources_url = '') ⇒ HtmlProcessor

Returns a new instance of HtmlProcessor.



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/publishr/html_processor.rb', line 4

def initialize(markup='',inpath='',={},rails_resources_url='')
  @markup = markup
  @lines = markup.split("\n")
  @line = ''
  @inpath = inpath
  @metadata = 
  @rails_resources_url = rails_resources_url

  @custom_fixes = File.open(File.join(@inpath,'html_postprocessing.rb'), 'r').read if File.exists?(File.join(@inpath,'html_postprocessing.rb'))

  @depth = 0
  @quotetype = nil
  @add_footnote = false
  @process_footnotes = false
  @footnote_number = 0
  @footnote_reference = ''
end

Class Method Details

.sanitize(html) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/publishr/html_processor.rb', line 22

def self.sanitize(html)
  modified_lines = []
  quote_enabled = false
  html.split("\n").each do |line|
    line.gsub!('<br>', '</p><p>')
    if quote_enabled == false and (line.include?('MsoQuote') or line.include?('BodyTextQuote') or line.include?('BodyTextTranscript') or line.include?('BodyTextEmail') or line.include?('body-text-transcript') or line.include?('body-text-email') or line.include?('QuoteExcerpt') or line.include?('quotetranscripts') or line.include?('ARIAL'))
      modified_lines << "<blockquote>\n"
      quote_enabled = true
    end
    if quote_enabled == true and (line.include?('MsoBodyText') or line.include?('BodyTextMod') or line.include?('margin-bottom'))
      modified_lines << "</blockquote>\n"
      quote_enabled = false
    end
    modified_lines << line
  end
  modified_lines = modified_lines.join("\n")
  sanitized_html = Sanitize.clean(modified_lines, :elements => ['b','i','em','strong','code','br','var','p','blockquote','img'], :attributes => { 'img' => ['src', 'alt'] })
  sanitized_html.gsub! /\[HK.*?\]/, ''
  return sanitized_html
end

Instance Method Details

#add_footnoteObject

Kindle doesn’t display <ol> list numbers when jumping to a footnote, so replace them with conventional text



116
117
118
119
# File 'lib/publishr/html_processor.rb', line 116

def add_footnote
  @line.gsub! /<p>/, "<hr><p #{ @footnote_reference }><b>[#{ @footnote_number }]</b>: "
  @add_footnote = false
end

#add_image_captionsObject



136
137
138
139
140
141
142
# File 'lib/publishr/html_processor.rb', line 136

def add_image_captions
  # @line  = "<p class=\"H\"><img src=\"image.jpg\" alt=\"Image description\" /></p>"
  # @line  = "<p width=\"50\" class=\"H\"><img src=\"image.jpg\" alt=\"Image description\" /></p>"
  @line.gsub! /<p.*?width="(.*?)".*?><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\2" width="\1%" style="width: \1%"/><br /><code>\3</code></p>'
  @line.gsub! /<p.*?class="(.*?)".*?><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\2" /><br /><code>\3</code></p>'
  @line.gsub! /<p><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\1" /><br /><code>\2</code></p>'
end

#annotate_blockquoteObject

Kindle doesn’t recognize <blockquote>, so add class to p tags depending on the blockquote depth



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/publishr/html_processor.rb', line 80

def annotate_blockquote
  if @line.include?('<blockquote')
    @depth += 1
    @quotetype = /<blockquote class="(.*?)">/.match(@line) if @depth == 1
  end
  if @line.include?('</blockquote')
    @depth -= 1
    @quotetype = nil if @depth.zero?
  end
  @line.gsub!(/<p/,"<p class=\"blockquote_#{ @quotetype[1] if @quotetype }_#{ @depth }\"") unless @depth.zero?
end

#change_footnote_referencesObject



98
99
100
101
102
103
# File 'lib/publishr/html_processor.rb', line 98

def change_footnote_references
  @line.gsub! /<sup id="fnref:.*?">/, ''
  @line.gsub! '</sup>', ''
  @line.gsub! /rel="footnote">(.*?)<\/a>/, '> [\1]</a>'
  @line.gsub!(/(<div class=.footnotes.>)/){ "<br style='page-break-before:always;'>#{ $1 }<h4>#{ @metadata['footnote_heading'] }</h4>" }
end

#change_resources_url_for_railsObject



125
126
127
128
# File 'lib/publishr/html_processor.rb', line 125

def change_resources_url_for_rails
 @line.gsub!(/href="(.*?.css)/){ "href=\"#{ @rails_resources_url }#{ $1 }" }
 @line.gsub!(/src="(.*?.jpg)/){ "src=\"#{ @rails_resources_url }#{ $1 }" }
end

#degradeObject



43
44
45
46
47
48
49
50
51
# File 'lib/publishr/html_processor.rb', line 43

def degrade
  processed_lines = []
  @lines.each do |l|
    @line = l
    process_line
    processed_lines << @line
  end
  processed_lines.join("\n")
end

#improve_typographyObject



72
73
74
75
76
# File 'lib/publishr/html_processor.rb', line 72

def improve_typography
  @line.gsub!(/title\((.*?)\)/,'<cite>\1</cite>')
  @line.gsub!(/name\((.*?)\)/,'<var>\1</var>')
  @line.gsub!(/(\(\w\))/,'<i>\1</i>')
end

#make_footnote_paragraphObject



121
122
123
# File 'lib/publishr/html_processor.rb', line 121

def make_footnote_paragraph
  @line.gsub! /<p/, "<p class='footnote' "
end

#make_uppercaseObject

Kindle doesn’t recognize text-transform: uppercase;



93
94
95
96
# File 'lib/publishr/html_processor.rb', line 93

def make_uppercase
  @line.gsub!(/<var>(.*?)<\/var>/){ "<cite>#{ $1.upcase }</cite>" }
  @line.gsub!(/<h1(.*?)>(.*?)<\/h1>/){ "<h1#{ $1 }>#{ $2.upcase }</h1><hr />" }
end

#mark_merge_conflictsObject



130
131
132
133
134
# File 'lib/publishr/html_processor.rb', line 130

def mark_merge_conflicts
 @line.gsub! /«««.*$/, '<span style="color:red;">'
 @line.gsub! '=======', '</span></p><p><span style="color:orange;">'
 @line.gsub! /»»».*$/, '</span></p>'
end

#process_footnotesObject



105
106
107
108
109
110
111
112
113
# File 'lib/publishr/html_processor.rb', line 105

def process_footnotes
  @line.gsub!(/<p(.*?)>/){ "<p>" }
  @footnote_reference = /<li (id="fn.*")/.match(@line)[1] if @line.include?('<li id="fn')
  @line.gsub! /<li id="fn.*>/, ''
  @line.gsub! /<\/li>/, ''
  @line.gsub! /<ol>/, ''
  @line.gsub! /<\/ol>/, ''
  @line.gsub! /<a href="#fnref.*<\/a>/, ''
end

#process_lineObject



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/publishr/html_processor.rb', line 53

def process_line
  @process_footnotes = true if @line.include?('<div class="footnotes">')
  @process_footnotes = false if @process_footnotes == true and @line.include?('</div>')
  @add_footnote = true and @footnote_number += 1 if @line.include?('<li id="fn')

  eval(@custom_fixes, binding) if @custom_fixes

  annotate_blockquote
  improve_typography
  make_uppercase
  change_footnote_references
  mark_merge_conflicts
  add_image_captions if @line.include?('<img')
  change_resources_url_for_rails unless @rails_resources_url.to_s.empty?
  process_footnotes if @process_footnotes == true
  add_footnote if @add_footnote == true and @line.include?('<p>')
  make_footnote_paragraph if @process_footnotes == true and @line.include?('<p')
end