Class: SpreadsheetBuilder::HtmlParser

Inherits:

Object

Object
SpreadsheetBuilder::HtmlParser

show all

Defined in:: lib/spreadsheet_builder/html_parser.rb

Instance Attribute Summary collapse

#doc ⇒ Object readonly

Returns the value of attribute doc.

Class Method Summary collapse

Instance Method Summary collapse

#build(force_level = :none) ⇒ Object
#css ⇒ Object
#initialize(html, *css_paths) ⇒ HtmlParser constructor

A new instance of HtmlParser.
#to_data(force_level = :none) ⇒ Object

TODO clean this up.

Constructor Details

#initialize(html, *css_paths) ⇒ `HtmlParser`

Returns a new instance of HtmlParser.

# File 'lib/spreadsheet_builder/html_parser.rb', line 17

def initialize(html, *css_paths)
  # TODO merge inline style tags into CssParser
  @css_load_paths = css_paths
  @html = html
  @doc  = Nokogiri::HTML(@html) 
end

Instance Attribute Details

#doc ⇒ `Object` (readonly)

Returns the value of attribute doc.



16
17
18

# File 'lib/spreadsheet_builder/html_parser.rb', line 16

def doc
  @doc
end

Class Method Details

.from_erb(file) ⇒ `Object`

# File 'lib/spreadsheet_builder/html_parser.rb', line 8

def self.from_erb(file)
  html     = File.read(file)
  template = ERB.new(html)
  html     = template.result

  new(html)
end

.from_slim(file, options = {}, context = self, &block) ⇒ `Object`

# File 'lib/spreadsheet_builder/html_parser.rb', line 3

def self.from_slim(file, options = {}, context = self, &block)
  html = Slim::Template.new(file, options).render(context, &block)
  new(html)
end

Instance Method Details

#build(force_level = :none) ⇒ `Object`



24
25
26

# File 'lib/spreadsheet_builder/html_parser.rb', line 24

def build(force_level = :none)
  SpreadsheetBuilder.from_data(to_data(force_level))
end

#css ⇒ `Object`

# File 'lib/spreadsheet_builder/html_parser.rb', line 28

def css
  return @css  if @css

  @doc.css('link[rel=stylesheet]').map { |l| 
    href = l["href"].sub(/^\/+/, '')
    # This will have to be updated later with a host
    @css_load_paths << "#{href}" 
  }
  #@css = SpreadsheetBuilder::CssParser.new(@css_load_paths)
  # Figure out the best way to load this
  @css = SpreadsheetBuilder::CssParser.new([])
end

#to_data(force_level = :none) ⇒ `Object`

TODO clean this up

# File 'lib/spreadsheet_builder/html_parser.rb', line 42

def to_data(force_level = :none)
  cells       = [] 
  merges      = []
  col_widths  = {}
  row_heights = {}

  css.reset(force_level)

  tb  = doc.css('table').first 

  # ignoring specified formats for anything other than table tr td/th
  tb_format = css.format_from_node(tb) 

  row = 0
  doc.css('tr').each do |tr|
    tr_format = tb_format.merge(@css.format_from_node(tr))

    increment = true
    tr.css('td, th').each_with_index do |td, col|
       
      # TODO Do we really need rowheight and colwidth now that there
      # is css parsing?
      rowheight = td.attributes["rowheight"]
      colwidth  = td.attributes["colwidth"]
      rowspan   = td.attributes["rowspan"]
      colspan   = td.attributes["colspan"]

      rowheight &&= rowheight.value.to_i
      colwidth  &&= colwidth.value.to_i
      rowspan   &&= rowspan.value.to_i
      colspan   &&= colspan.value.to_i

      add_td_to_cells(row, col, td, tr_format, cells)
      if colspan
        (1..colspan-1).each {|t| 
          add_td_to_cells(row, col+t, td, tr_format, cells)
        }
      end
      if rowspan
        (1..rowspan-2).each {|t| 
          add_td_to_cells(row+t, col, td, tr_format, cells)
        }
        increment = false
      end
      if colspan || rowspan
        merges << [
          row, col, row + (rowspan || 2)-2, col + (colspan || 1)-1
        ]
      end
    end

    row += 1 if increment
  end

  puts cells.inspect
  { cells: cells, merges: { 0 => merges } }
end

Class: SpreadsheetBuilder::HtmlParser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, *css_paths) ⇒ HtmlParser

Instance Attribute Details

#doc ⇒ Object (readonly)

Class Method Details

.from_erb(file) ⇒ Object

.from_slim(file, options = {}, context = self, &block) ⇒ Object

Instance Method Details

#build(force_level = :none) ⇒ Object

#css ⇒ Object

#to_data(force_level = :none) ⇒ Object

#initialize(html, *css_paths) ⇒ `HtmlParser`

#doc ⇒ `Object` (readonly)

.from_erb(file) ⇒ `Object`

.from_slim(file, options = {}, context = self, &block) ⇒ `Object`

#build(force_level = :none) ⇒ `Object`

#css ⇒ `Object`

#to_data(force_level = :none) ⇒ `Object`