Module: Charles::InternalAttributes

Included in:
Document
Defined in:
lib/charles/internal_attributes.rb

Instance Method Summary collapse

Instance Method Details

#clean_titleObject



9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/charles/internal_attributes.rb', line 9

def clean_title
  return title if !@options[:sample_titles] || @options[:sample_titles].size < 5
  _title_words = {}
  
  _tokens = Charles::Misc.string_to_tokens_raw(self.title, type = :no_stop_words)
  while(_tokens.first && words_to_filter_from_sample_titles.include?(_tokens.first.text)); _tokens.shift; end; #remove words from the beginning of the tokens
  while(_tokens.last && words_to_filter_from_sample_titles.include?(_tokens.last.text)); _tokens.pop; end; #remove words from the end of the tokens
  return title if _tokens.empty? #everything stripped? return nil, use other titles
  
  _start = _tokens.first.start;
  _end = _tokens.last.end;
  _title = self.title.slice(_start, _end - _start)
  _title = self.title.match(/[^\s\302\240]*#{Regexp.escape(_title)}[^\s\302\240]*/)[0].strip #include symbols or punctuation surrounding the title
end

#titleObject



3
4
5
6
7
8
# File 'lib/charles/internal_attributes.rb', line 3

def title
  @title||=(
    title = @document.search('title').first
    title ? title.clean_inner_text : nil
  )
end