Module: Ircbot::Utils::HtmlParser
- Defined in:
- lib/ircbot/utils/html_parser.rb
Instance Method Summary collapse
Instance Method Details
#get_title(html) ⇒ Object
6 7 8 9 |
# File 'lib/ircbot/utils/html_parser.rb', line 6 def get_title(html) title = $1.strip if %r{<title>(.*?)</title>}mi =~ html title ? (title) : "" end |
#trim_tags(html) ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/ircbot/utils/html_parser.rb', line 11 def (html) html.gsub!(%r{<head.*?>.*?</head>}mi, '') html.gsub!(%r{<script.*?>.*?</script>}mi, '') html.gsub!(%r{<style.*?>.*?</style>}mi, '') html.gsub!(%r{<noscript.*?>.*?</noscript>}mi, '') html.gsub!(%r{</?.*?>}, '') html.gsub!(%r{<\!--.*?-->}mi, '') html.gsub!(%r{<\!\w.*?>}mi, '') html.gsub!(/\s+/m, ' ') html.strip! html = CGI.unescapeHTML(html) return html end |