Class: String

Inherits:
Object
  • Object
show all
Defined in:
lib/gistgen/string.rb

Instance Method Summary collapse

Instance Method Details

#extract_passage(start_index = 0, length = 500) ⇒ Object

return a passage of size <= length from start_indexth sentence



3
4
5
6
7
8
9
# File 'lib/gistgen/string.rb', line 3

def extract_passage(start_index=0,length=500)
  sentences = self.split_sentences
  stop = ((start_index+1)...sentences.size).detect { |i| (sentences[start_index..i].join('. ')).size > length }
  stop = (stop and stop <= sentences.size)? stop-1 : sentences.size - 1
  passages = sentences[start_index...stop].join('. ').split("\n")
  (passages.size > 0)? passages[0].gsub(/^[^\w]+/,'').limit(length) : ''
end

#limit(length) ⇒ Object

constraint a string to a fixed length or less discard everything after the last punctuation that occurs right before lenght limit the regexp look ahead for any punctuation



36
37
38
# File 'lib/gistgen/string.rb', line 36

def limit(length)
  (self.length > length)? self[0...length].gsub(/(?![\s\S]+?[,:;)\/\\\|])([,:;)\/\\\|].*)/,'') : self
end

#split_sentencesObject

split text into sentences, take into account Mr.|Ms. endings are not end of sentence



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/gistgen/string.rb', line 12

def split_sentences
  #break text first by paragraph then into chunks delimited by a period
  #but these are not quite sentences yet
  chunks = (self.split(/\n+/).map { |p| "#{p}\n".split(/\.(?:[^\w])/) }).flatten.compact

  #if a sentence is split at Mr.|Ms.|Dr.|Mrs. 
  #then recombine it with its remaining part and nil it to delete later
  tmp=''
  sentences = chunks.map { |c|
    ss = (tmp != '')? "#{tmp}. #{c}" : c
    if c.match(/(?:Dr|Mr|Ms|Mrs)$/) #what about John F. Kennedy ([A-Z])
      tmp = ss
      ss=nil
    else
      tmp = ''
    end
    ss
  } 
  sentences.compact #delete nil elements
end