Class: Display

Inherits:
Object
  • Object
show all
Includes:
CharacterCleaner
Defined in:
lib/display.rb

Overview

This class formats stuff for human consumption

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from CharacterCleaner

#process_entities_and_utf

Constructor Details

#initialize(options = {}) ⇒ Display

In the future, may allow configuration options here, so that you can customize the Display object.



13
14
15
16
17
# File 'lib/display.rb', line 13

def initialize(options={})
  # could opt for :no_links
  @options = options
  @width = options[:width] || 80
end

Instance Attribute Details

#widthObject (readonly)

Returns the value of attribute width.



10
11
12
# File 'lib/display.rb', line 10

def width
  @width
end

Instance Method Details

#display_entries(entries) ⇒ Object



28
29
30
31
32
33
34
35
36
37
# File 'lib/display.rb', line 28

def display_entries(entries)
  if @options[:curses]
    CursesController.new(self).show_entries(entries)
  else
    entries.each do |e| 
      puts '-' * @width
      puts display_entry(e)
    end
  end
end

#display_entry(entry, show_title_and_feed = true) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/display.rb', line 39

def display_entry(entry, show_title_and_feed = true)
  out = []
  if show_title_and_feed
    out << "#{entry.feed.title} | #{entry.last_updated.strftime('%A, %B %d %Y')}"
    out << wrap_text(html_to_text(entry.title.strip))
  end
  
  # description is the summary, so mark it as such 
  if entry.description && entry.content && (entry.description.strip.length < entry.content.strip.length)
    out << "Entry Summary:\n\n" + html_to_text(entry.description).strip
    out << divider.strip
    out << "Entry Content:"
  end

  # If there is no content, just print the description
  if entry.content.nil? || entry.content.strip == ''
    out << html_to_text(entry.description || '').strip
  else
    out << html_to_text(entry.content || '').strip
  end
 
  unless @options[:simple]
    out << entry.url
  end
  unless entry.categories.empty?
    out << "Categories: #{entry.categories.join(", ")}"
  end
  out.join("\n\n")
end

#display_feed(entry) ⇒ Object



80
81
82
# File 'lib/display.rb', line 80

def display_feed(entry)
  "#{entry.feed.title} | #{entry.date_published.strftime('%A, %B %d %Y')}"
end

#display_raw_entry_content(entry) ⇒ Object



84
85
86
87
88
89
90
91
92
93
# File 'lib/display.rb', line 84

def display_raw_entry_content(entry)
  out = []
  if entry.description
    out << divider + "Entry Summary\n\n" + process_entities_and_utf(entry.description)
  end
  if entry.content
    out << "Entry Content\n\n" + process_entities_and_utf(entry.content)
  end
  wrap_text(out.join(divider)).strip
end

#display_title(entry) ⇒ Object



76
77
78
# File 'lib/display.rb', line 76

def display_title(entry)
  wrap_text(html_to_text(entry.title.strip))
end

#display_title_and_feed(entry) ⇒ Object



69
70
71
72
73
74
# File 'lib/display.rb', line 69

def display_title_and_feed(entry)
  out = []
  out << "#{entry.feed.title} | #{entry.last_updated.strftime('%A, %B %d %Y')}"
  out << wrap_text(html_to_text(entry.title.strip))
  out.join("\n\n")
end

#dividerObject



246
247
248
# File 'lib/display.rb', line 246

def divider
  "\n" + '-' * @width + "\n"
end

#html_to_text(html) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/display.rb', line 95

def html_to_text(html)
  LOGGER.debug("html_to_text:\n #{html}")
  html.strip!
  # convert utf-8 to ascii

  html = process_entities_and_utf(html) 

  # if there are no tags, and this is a body, wrap what looks like paragraph
  # in paragarph tags, so it can be processed with the html paragraph rule
  # below.
  if html =~ /\n/ && html !~ /<[^>]+>/  && html !~ /<\/[^>]+>/  
    html = html.split("\n\n").collect {|x| "<p>#{x.strip}</p>"}.join("\n")
  end
 
  html, *links = links_to_footnotes(html)
  html = tags_to_text(html)
  html = normalize_blank_lines(html)
  html = wrap_text(html)
  html = [html, links.join("\n")].join("\n\n")
  #puts out
  #out = normalize_blank_lines(out)
  html.strip
end


218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/display.rb', line 218

def links_to_footnotes(html)
  doc = Hpricot(html)
  footnotes = []
  doc.search('//a[@href]') do |link|
    if @options[:no_links]
      link.swap(
        if link.inner_text == ''
          ''
        else
          "[#{link.inner_text}]"
        end
      )
    else
      href = link.attributes['href']
      footnotes << "  [#{footnotes.size + 1}] #{href}"
      link.swap(link.inner_text + "[#{footnotes.size}]")
    end
  end
  [doc.to_s, *footnotes]
end

#list_feeds(feeds) ⇒ Object



19
20
21
22
23
24
25
# File 'lib/display.rb', line 19

def list_feeds(feeds)
  if @options[:curses]
    CursesController.new(self).show_feeds(feeds)
  else
    feeds.each {|f| display_feed(f)}
  end
end

#normalize_blank_lines(text) ⇒ Object

Make sure there is no more than one blank line anywhere



120
121
122
123
124
125
126
127
128
# File 'lib/display.rb', line 120

def normalize_blank_lines(text)
  # get rid of ms line feeds
  text.gsub(/\r\n/, "\n").
  # compress 3 or more blank lines to one
  gsub(/^\s*$/, "\n").
  split(/\n\n\n*/).join( "\n\n")
  # hflush everything left to begin with
  #gsub(/^\s+(\w)/, '\1')
end

#tags_to_text(html) ⇒ Object



130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/display.rb', line 130

def tags_to_text(html)
  doc = Hpricot(html)

  doc.search('//comment()').remove

  doc.search('blockquote, pre') do |x|
    # compress extra spaces
    text = x.inner_text.squeeze(' ').strip
    # collapse the spacing in the text
    text.gsub!(/\s{2,}/, ' ')
    text = wrap_text(text, @width - 4).gsub(/^/, '    ') # indent 4 spaces
    x.swap("\n\n" + text + "\n\n")
  end

  doc.search('big, h1,h2,h3,h4') do |p|
    if p.inner_text.strip == ''
      p.swap('')
    else
      p.swap( "\n\n= #{unwrap(p.inner_text)}\n\n" )
    end
  end
  doc.search('//img') do |img|
    img.swap( "(img)" )
  end
  
  doc.search('svg, object').remove
  doc.search('table').remove
  doc.search('script').remove

  doc.search('//br') do |p|
    p.swap( "\n" )
  end

  doc.search('i, b, strong, em') do |p|
    p.swap( "*#{p.inner_text}*" )
  end
  doc.search('abbr, code') do |p|
    p.swap( "+#{p.inner_text}+" )
  end
  #
  # anchor tags are processed after real links 
  doc.search('a') do |p|
    p.swap( "#{p.inner_text}" )
  end
  doc.search('dt') do |x|
    x.swap( "#{x.inner_html}:\n" )
  end
  # This could be improved to insure an indentation even if there are nested
  # tag elements
  doc.search('dd') do |x|
    x.swap( "#{x.inner_text}\n" )
  end


  doc.search('//span') do |s|
    s.swap( s.inner_text )
  end
  doc.search('hr') do |s|
    s.swap( '-' * @width)
  end
  # Do this before erasing the enclosing <ul> or <ol> tags
  doc.search('li') do |s|
    text = s.inner_text.strip
    # wrap the text and indent it 2 spaces
    text = wrap_text(text, @width - 2).gsub(/^/, '  ') 
    # don't indent 1st line
    text.lstrip!
    s.swap( "* " + text  + "\n" )
  end
  doc.search('ul, ol, dl') do |s|
    s.swap(  s.inner_text.strip  )
  end

  doc.search('p') do |p|
    p.swap( "\n\n" + unwrap(p.inner_text) + "\n\n" )
  end

  doc.search('div') do |p|
    p.swap( "\n\n" + p.inner_text.strip + "\n\n" )
  end

  doc.to_s
end

#unwrap(text) ⇒ Object



214
215
216
# File 'lib/display.rb', line 214

def unwrap(text)
  text.gsub("\n", ' ').squeeze(' ').strip
end

#wrap_text(txt, col = @width) ⇒ Object



241
242
243
# File 'lib/display.rb', line 241

def wrap_text(txt, col = @width)
  txt.chars.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n") 
end