Class: Publishr::HtmlProcessor
- Inherits:
-
Object
- Object
- Publishr::HtmlProcessor
- Defined in:
- lib/publishr/html_processor.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#add_footnote ⇒ Object
Kindle doesn’t display <ol> list numbers when jumping to a footnote, so replace them with conventional text.
- #add_image_captions ⇒ Object
-
#annotate_blockquote ⇒ Object
Kindle doesn’t recognize <blockquote>, so add class to p tags depending on the blockquote depth.
- #change_footnote_references ⇒ Object
- #change_resources_url_for_rails ⇒ Object
- #degrade ⇒ Object
- #improve_typography ⇒ Object
-
#initialize(markup = '', inpath = '', metadata = {}, rails_resources_url = '') ⇒ HtmlProcessor
constructor
A new instance of HtmlProcessor.
- #make_footnote_paragraph ⇒ Object
-
#make_uppercase ⇒ Object
Kindle doesn’t recognize text-transform: uppercase;.
- #mark_merge_conflicts ⇒ Object
- #process_footnotes ⇒ Object
- #process_line ⇒ Object
Constructor Details
#initialize(markup = '', inpath = '', metadata = {}, rails_resources_url = '') ⇒ HtmlProcessor
Returns a new instance of HtmlProcessor.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/publishr/html_processor.rb', line 4 def initialize(markup='',inpath='',={},rails_resources_url='') @markup = markup @lines = markup.split("\n") @line = '' @inpath = inpath @metadata = @rails_resources_url = rails_resources_url @custom_fixes = File.open(File.join(@inpath,'html_postprocessing.rb'), 'r').read if File.exists?(File.join(@inpath,'html_postprocessing.rb')) @depth = 0 @quotetype = nil @add_footnote = false @process_footnotes = false @footnote_number = 0 @footnote_reference = '' end |
Class Method Details
.sanitize(html) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/publishr/html_processor.rb', line 22 def self.sanitize(html) modified_lines = [] quote_enabled = false html.split("\n").each do |line| line.gsub!('<br>', '</p><p>') if quote_enabled == false and (line.include?('MsoQuote') or line.include?('BodyTextQuote') or line.include?('BodyTextTranscript') or line.include?('BodyTextEmail') or line.include?('body-text-transcript') or line.include?('body-text-email') or line.include?('QuoteExcerpt') or line.include?('quotetranscripts') or line.include?('ARIAL')) modified_lines << "<blockquote>\n" quote_enabled = true end if quote_enabled == true and (line.include?('MsoBodyText') or line.include?('BodyTextMod') or line.include?('margin-bottom')) modified_lines << "</blockquote>\n" quote_enabled = false end modified_lines << line end modified_lines = modified_lines.join("\n") sanitized_html = Sanitize.clean(modified_lines, :elements => ['b','i','em','strong','code','br','var','p','blockquote','img'], :attributes => { 'img' => ['src', 'alt'] }) sanitized_html.gsub! /\[HK.*?\]/, '' return sanitized_html end |
Instance Method Details
#add_footnote ⇒ Object
Kindle doesn’t display <ol> list numbers when jumping to a footnote, so replace them with conventional text
116 117 118 119 |
# File 'lib/publishr/html_processor.rb', line 116 def add_footnote @line.gsub! /<p>/, "<hr><p #{ @footnote_reference }><b>[#{ @footnote_number }]</b>: " @add_footnote = false end |
#add_image_captions ⇒ Object
136 137 138 139 140 141 142 |
# File 'lib/publishr/html_processor.rb', line 136 def # @line = "<p class=\"H\"><img src=\"image.jpg\" alt=\"Image description\" /></p>" # @line = "<p width=\"50\" class=\"H\"><img src=\"image.jpg\" alt=\"Image description\" /></p>" @line.gsub! /<p.*?width="(.*?)".*?><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\2" width="\1%" style="width: \1%"/><br /><code>\3</code></p>' @line.gsub! /<p.*?class="(.*?)".*?><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\2" /><br /><code>\3</code></p>' @line.gsub! /<p><img src="(.*?)" alt="(.*?)".*?\/><\/p>/, '<p class="image"><img src="\1" /><br /><code>\2</code></p>' end |
#annotate_blockquote ⇒ Object
Kindle doesn’t recognize <blockquote>, so add class to p tags depending on the blockquote depth
80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/publishr/html_processor.rb', line 80 def annotate_blockquote if @line.include?('<blockquote') @depth += 1 @quotetype = /<blockquote class="(.*?)">/.match(@line) if @depth == 1 end if @line.include?('</blockquote') @depth -= 1 @quotetype = nil if @depth.zero? end @line.gsub!(/<p/,"<p class=\"blockquote_#{ @quotetype[1] if @quotetype }_#{ @depth }\"") unless @depth.zero? end |
#change_footnote_references ⇒ Object
98 99 100 101 102 103 |
# File 'lib/publishr/html_processor.rb', line 98 def change_footnote_references @line.gsub! /<sup id="fnref:.*?">/, '' @line.gsub! '</sup>', '' @line.gsub! /rel="footnote">(.*?)<\/a>/, '> [\1]</a>' @line.gsub!(/(<div class=.footnotes.>)/){ "<br style='page-break-before:always;'>#{ $1 }<h4>#{ @metadata['footnote_heading'] }</h4>" } end |
#change_resources_url_for_rails ⇒ Object
125 126 127 128 |
# File 'lib/publishr/html_processor.rb', line 125 def change_resources_url_for_rails @line.gsub!(/href="(.*?.css)/){ "href=\"#{ @rails_resources_url }#{ $1 }" } @line.gsub!(/src="(.*?.jpg)/){ "src=\"#{ @rails_resources_url }#{ $1 }" } end |
#degrade ⇒ Object
43 44 45 46 47 48 49 50 51 |
# File 'lib/publishr/html_processor.rb', line 43 def degrade processed_lines = [] @lines.each do |l| @line = l process_line processed_lines << @line end processed_lines.join("\n") end |
#improve_typography ⇒ Object
72 73 74 75 76 |
# File 'lib/publishr/html_processor.rb', line 72 def improve_typography @line.gsub!(/title\((.*?)\)/,'<cite>\1</cite>') @line.gsub!(/name\((.*?)\)/,'<var>\1</var>') @line.gsub!(/(\(\w\))/,'<i>\1</i>') end |
#make_footnote_paragraph ⇒ Object
121 122 123 |
# File 'lib/publishr/html_processor.rb', line 121 def make_footnote_paragraph @line.gsub! /<p/, "<p class='footnote' " end |
#make_uppercase ⇒ Object
Kindle doesn’t recognize text-transform: uppercase;
93 94 95 96 |
# File 'lib/publishr/html_processor.rb', line 93 def make_uppercase @line.gsub!(/<var>(.*?)<\/var>/){ "<cite>#{ $1.upcase }</cite>" } @line.gsub!(/<h1(.*?)>(.*?)<\/h1>/){ "<h1#{ $1 }>#{ $2.upcase }</h1><hr />" } end |
#mark_merge_conflicts ⇒ Object
130 131 132 133 134 |
# File 'lib/publishr/html_processor.rb', line 130 def mark_merge_conflicts @line.gsub! /«««.*$/, '<span style="color:red;">' @line.gsub! '=======', '</span></p><p><span style="color:orange;">' @line.gsub! /»»».*$/, '</span></p>' end |
#process_footnotes ⇒ Object
105 106 107 108 109 110 111 112 113 |
# File 'lib/publishr/html_processor.rb', line 105 def process_footnotes @line.gsub!(/<p(.*?)>/){ "<p>" } @footnote_reference = /<li (id="fn.*")/.match(@line)[1] if @line.include?('<li id="fn') @line.gsub! /<li id="fn.*>/, '' @line.gsub! /<\/li>/, '' @line.gsub! /<ol>/, '' @line.gsub! /<\/ol>/, '' @line.gsub! /<a href="#fnref.*<\/a>/, '' end |
#process_line ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/publishr/html_processor.rb', line 53 def process_line @process_footnotes = true if @line.include?('<div class="footnotes">') @process_footnotes = false if @process_footnotes == true and @line.include?('</div>') @add_footnote = true and @footnote_number += 1 if @line.include?('<li id="fn') eval(@custom_fixes, binding) if @custom_fixes annotate_blockquote improve_typography make_uppercase change_footnote_references mark_merge_conflicts if @line.include?('<img') change_resources_url_for_rails unless @rails_resources_url.to_s.empty? process_footnotes if @process_footnotes == true add_footnote if @add_footnote == true and @line.include?('<p>') make_footnote_paragraph if @process_footnotes == true and @line.include?('<p') end |