Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/gistgen/string.rb
Instance Method Summary collapse
-
#extract_passage(start_index = 0, length = 500) ⇒ Object
return a passage of size <= length from start_indexth sentence.
-
#limit(length) ⇒ Object
constraint a string to a fixed length or less discard everything after the last punctuation that occurs right before lenght limit the regexp look ahead for any punctuation.
-
#split_sentences ⇒ Object
split text into sentences, take into account Mr.|Ms.
Instance Method Details
#extract_passage(start_index = 0, length = 500) ⇒ Object
return a passage of size <= length from start_indexth sentence
3 4 5 6 7 8 9 |
# File 'lib/gistgen/string.rb', line 3 def extract_passage(start_index=0,length=500) sentences = self.split_sentences stop = ((start_index+1)...sentences.size).detect { |i| (sentences[start_index..i].join('. ')).size > length } stop = (stop and stop <= sentences.size)? stop-1 : sentences.size - 1 passages = sentences[start_index...stop].join('. ').split("\n") (passages.size > 0)? passages[0].gsub(/^[^\w]+/,'').limit(length) : '' end |
#limit(length) ⇒ Object
constraint a string to a fixed length or less discard everything after the last punctuation that occurs right before lenght limit the regexp look ahead for any punctuation
36 37 38 |
# File 'lib/gistgen/string.rb', line 36 def limit(length) (self.length > length)? self[0...length].gsub(/(?![\s\S]+?[,:;)\/\\\|])([,:;)\/\\\|].*)/,'') : self end |
#split_sentences ⇒ Object
split text into sentences, take into account Mr.|Ms. endings are not end of sentence
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/gistgen/string.rb', line 12 def split_sentences #break text first by paragraph then into chunks delimited by a period #but these are not quite sentences yet chunks = (self.split(/\n+/).map { |p| "#{p}\n".split(/\.(?:[^\w])/) }).flatten.compact #if a sentence is split at Mr.|Ms.|Dr.|Mrs. #then recombine it with its remaining part and nil it to delete later tmp='' sentences = chunks.map { |c| ss = (tmp != '')? "#{tmp}. #{c}" : c if c.match(/(?:Dr|Mr|Ms|Mrs)$/) #what about John F. Kennedy ([A-Z]) tmp = ss ss=nil else tmp = '' end ss } sentences.compact #delete nil elements end |