Module: Abbreviato
- Defined in:
- lib/abbreviato/version.rb,
lib/abbreviato/abbreviato.rb
Constant Summary collapse
- VERSION =
'3.1.1'
- DEFAULT_OPTIONS =
{ max_length: 30, tail: '…', fragment: true }.freeze
Class Method Summary collapse
-
.truncate(source = '', user_options = {}) ⇒ [String] the truncated string, [boolean] whether the string was truncated
Truncates the source XML string and returns the truncated XML and a boolean flag indicating whether any truncation took place.
Class Method Details
.truncate(source = '', user_options = {}) ⇒ [String] the truncated string, [boolean] whether the string was truncated
Truncates the source XML string and returns the truncated XML and a boolean flag indicating whether any truncation took place. It will keep a valid XML structure and insert a tail text indicating the position where content was removed (…).
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/abbreviato/abbreviato.rb', line 25 def self.truncate(source = '', = {}) return [nil, false] if source.nil? truncated_sax_document = TruncatedSaxDocument.new(DEFAULT_OPTIONS.merge()) parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document) parser.parse(source) { |context| context.replace_entities = false } if truncated_sax_document.truncated && [:truncate_incomplete_row] parsed_results = [truncated_sax_document.truncated_string.strip, truncated_sax_document.truncated] html_fragment = Nokogiri::HTML.fragment(truncated_sax_document.truncated_string.strip) return parsed_results if html_fragment.nil? last_table_in_doc = html_fragment.xpath('.//table').last return parsed_results unless last_table_in_doc first_row = last_table_in_doc.xpath('.//tr').first return parsed_results unless first_row cols_in_first_row = first_row.xpath('.//td').length return parsed_results unless cols_in_first_row.positive? last_table_in_doc.xpath('.//tr').each do |row| row.remove if row.xpath('.//td').length != cols_in_first_row end return [html_fragment.to_html, truncated_sax_document.truncated] end [truncated_sax_document.truncated_string.strip, truncated_sax_document.truncated] end |