Class: Display

Inherits:

Object

Object
Display

show all

Includes:: CharacterCleaner

Defined in:: lib/display.rb

Overview

This class formats stuff for human consumption

Instance Attribute Summary collapse

#width ⇒ Object readonly

Returns the value of attribute width.

Instance Method Summary collapse

#display_entries(entries) ⇒ Object
#display_entry(entry, show_title_and_feed = true) ⇒ Object
#display_feed(entry) ⇒ Object
#display_raw_entry_content(entry) ⇒ Object
#display_title(entry) ⇒ Object
#display_title_and_feed(entry) ⇒ Object
#divider ⇒ Object
#html_to_text(html) ⇒ Object
#initialize(options = {}) ⇒ Display constructor

In the future, may allow configuration options here, so that you can customize the Display object.
#links_to_footnotes(html) ⇒ Object
#list_feeds(feeds) ⇒ Object
#normalize_blank_lines(text) ⇒ Object

Make sure there is no more than one blank line anywhere.
#tags_to_text(html) ⇒ Object
#unwrap(text) ⇒ Object
#wrap_text(txt, col = @width) ⇒ Object

From blog.macromates.com/2006/wrapping-text-with-regular-expressions/.

Methods included from CharacterCleaner

#process_entities_and_utf

Constructor Details

#initialize(options = {}) ⇒ `Display`

In the future, may allow configuration options here, so that you can customize the Display object.

# File 'lib/display.rb', line 13

def initialize(options={})
  # could opt for :no_links
  @options = options
  @width = options[:width] || 80
end

Instance Attribute Details

#width ⇒ `Object` (readonly)

Returns the value of attribute width.



10
11
12

# File 'lib/display.rb', line 10

def width
  @width
end

Instance Method Details

#display_entries(entries) ⇒ `Object`

# File 'lib/display.rb', line 28

def display_entries(entries)
  if @options[:curses]
    CursesController.new(self).show_entries(entries)
  else
    entries.each do |e| 
      puts '-' * @width
      puts display_entry(e)
    end
  end
end

#display_entry(entry, show_title_and_feed = true) ⇒ `Object`

# File 'lib/display.rb', line 39

def display_entry(entry, show_title_and_feed = true)
  out = []
  if show_title_and_feed
    out << "#{entry.feed.title} | #{entry.last_updated.strftime('%A, %B %d %Y')}"
    out << wrap_text(html_to_text(entry.title.strip))
  end
  
  # description is the summary, so mark it as such 
  if entry.description && entry.content && (entry.description.strip.length < entry.content.strip.length)
    out << "Entry Summary:\n\n" + html_to_text(entry.description).strip
    out << divider.strip
    out << "Entry Content:"
  end

  # If there is no content, just print the description
  if entry.content.nil? || entry.content.strip == ''
    out << html_to_text(entry.description || '').strip
  else
    out << html_to_text(entry.content || '').strip
  end
 
  unless @options[:simple]
    out << entry.url
  end
  unless entry.categories.empty?
    out << "Categories: #{entry.categories.join(", ")}"
  end
  out.join("\n\n")
end

#display_feed(entry) ⇒ `Object`



80
81
82

# File 'lib/display.rb', line 80

def display_feed(entry)
  "#{entry.feed.title} | #{entry.date_published.strftime('%A, %B %d %Y')}"
end

#display_raw_entry_content(entry) ⇒ `Object`

# File 'lib/display.rb', line 84

def display_raw_entry_content(entry)
  out = []
  if entry.description
    out << divider + "Entry Summary\n\n" + process_entities_and_utf(entry.description)
  end
  if entry.content
    out << "Entry Content\n\n" + process_entities_and_utf(entry.content)
  end
  wrap_text(out.join(divider)).strip
end

#display_title(entry) ⇒ `Object`



76
77
78

# File 'lib/display.rb', line 76

def display_title(entry)
  wrap_text(html_to_text(entry.title.strip))
end

#display_title_and_feed(entry) ⇒ `Object`

# File 'lib/display.rb', line 69

def display_title_and_feed(entry)
  out = []
  out << "#{entry.feed.title} | #{entry.last_updated.strftime('%A, %B %d %Y')}"
  out << wrap_text(html_to_text(entry.title.strip))
  out.join("\n\n")
end

#divider ⇒ `Object`



246
247
248

# File 'lib/display.rb', line 246

def divider
  "\n" + '-' * @width + "\n"
end

#html_to_text(html) ⇒ `Object`

# File 'lib/display.rb', line 95

def html_to_text(html)
  LOGGER.debug("html_to_text:\n #{html}")
  html.strip!
  # convert utf-8 to ascii

  html = process_entities_and_utf(html) 

  # if there are no tags, and this is a body, wrap what looks like paragraph
  # in paragarph tags, so it can be processed with the html paragraph rule
  # below.
  if html =~ /\n/ && html !~ /<[^>]+>/  && html !~ /<\/[^>]+>/  
    html = html.split("\n\n").collect {|x| "<p>#{x.strip}</p>"}.join("\n")
  end
 
  html, *links = links_to_footnotes(html)
  html = tags_to_text(html)
  html = normalize_blank_lines(html)
  html = wrap_text(html)
  html = [html, links.join("\n")].join("\n\n")
  #puts out
  #out = normalize_blank_lines(out)
  html.strip
end

#links_to_footnotes(html) ⇒ `Object`

# File 'lib/display.rb', line 218

def links_to_footnotes(html)
  doc = Hpricot(html)
  footnotes = []
  doc.search('//a[@href]') do |link|
    if @options[:no_links]
      link.swap(
        if link.inner_text == ''
          ''
        else
          "[#{link.inner_text}]"
        end
      )
    else
      href = link.attributes['href']
      footnotes << "  [#{footnotes.size + 1}] #{href}"
      link.swap(link.inner_text + "[#{footnotes.size}]")
    end
  end
  [doc.to_s, *footnotes]
end

#list_feeds(feeds) ⇒ `Object`

# File 'lib/display.rb', line 19

def list_feeds(feeds)
  if @options[:curses]
    CursesController.new(self).show_feeds(feeds)
  else
    feeds.each {|f| display_feed(f)}
  end
end

#normalize_blank_lines(text) ⇒ `Object`

Make sure there is no more than one blank line anywhere

# File 'lib/display.rb', line 120

def normalize_blank_lines(text)
  # get rid of ms line feeds
  text.gsub(/\r\n/, "\n").
  # compress 3 or more blank lines to one
  gsub(/^\s*$/, "\n").
  split(/\n\n\n*/).join( "\n\n")
  # hflush everything left to begin with
  #gsub(/^\s+(\w)/, '\1')
end

#tags_to_text(html) ⇒ `Object`

# File 'lib/display.rb', line 130

def tags_to_text(html)
  doc = Hpricot(html)

  doc.search('//comment()').remove

  doc.search('blockquote, pre') do |x|
    # compress extra spaces
    text = x.inner_text.squeeze(' ').strip
    # collapse the spacing in the text
    text.gsub!(/\s{2,}/, ' ')
    text = wrap_text(text, @width - 4).gsub(/^/, '    ') # indent 4 spaces
    x.swap("\n\n" + text + "\n\n")
  end

  doc.search('big, h1,h2,h3,h4') do |p|
    if p.inner_text.strip == ''
      p.swap('')
    else
      p.swap( "\n\n= #{unwrap(p.inner_text)}\n\n" )
    end
  end
  doc.search('//img') do |img|
    img.swap( "(img)" )
  end
  
  doc.search('svg, object').remove
  doc.search('table').remove
  doc.search('script').remove

  doc.search('//br') do |p|
    p.swap( "\n" )
  end

  doc.search('i, b, strong, em') do |p|
    p.swap( "*#{p.inner_text}*" )
  end
  doc.search('abbr, code') do |p|
    p.swap( "+#{p.inner_text}+" )
  end
  #
  # anchor tags are processed after real links 
  doc.search('a') do |p|
    p.swap( "#{p.inner_text}" )
  end
  doc.search('dt') do |x|
    x.swap( "#{x.inner_html}:\n" )
  end
  # This could be improved to insure an indentation even if there are nested
  # tag elements
  doc.search('dd') do |x|
    x.swap( "#{x.inner_text}\n" )
  end


  doc.search('//span') do |s|
    s.swap( s.inner_text )
  end
  doc.search('hr') do |s|
    s.swap( '-' * @width)
  end
  # Do this before erasing the enclosing <ul> or <ol> tags
  doc.search('li') do |s|
    text = s.inner_text.strip
    # wrap the text and indent it 2 spaces
    text = wrap_text(text, @width - 2).gsub(/^/, '  ') 
    # don't indent 1st line
    text.lstrip!
    s.swap( "* " + text  + "\n" )
  end
  doc.search('ul, ol, dl') do |s|
    s.swap(  s.inner_text.strip  )
  end

  doc.search('p') do |p|
    p.swap( "\n\n" + unwrap(p.inner_text) + "\n\n" )
  end

  doc.search('div') do |p|
    p.swap( "\n\n" + p.inner_text.strip + "\n\n" )
  end

  doc.to_s
end

#unwrap(text) ⇒ `Object`



214
215
216

# File 'lib/display.rb', line 214

def unwrap(text)
  text.gsub("\n", ' ').squeeze(' ').strip
end

#wrap_text(txt, col = @width) ⇒ `Object`

From blog.macromates.com/2006/wrapping-text-with-regular-expressions/



241
242
243

# File 'lib/display.rb', line 241

def wrap_text(txt, col = @width)
  txt.chars.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n") 
end

Class: Display

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from CharacterCleaner

Constructor Details

#initialize(options = {}) ⇒ Display

Instance Attribute Details

#width ⇒ Object (readonly)

Instance Method Details

#display_entries(entries) ⇒ Object

#display_entry(entry, show_title_and_feed = true) ⇒ Object

#display_feed(entry) ⇒ Object

#display_raw_entry_content(entry) ⇒ Object

#display_title(entry) ⇒ Object

#display_title_and_feed(entry) ⇒ Object

#divider ⇒ Object

#html_to_text(html) ⇒ Object

#links_to_footnotes(html) ⇒ Object

#list_feeds(feeds) ⇒ Object

#normalize_blank_lines(text) ⇒ Object

#tags_to_text(html) ⇒ Object

#unwrap(text) ⇒ Object

#wrap_text(txt, col = @width) ⇒ Object