Class: WordToMarkdown::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/word-to-markdown/document.rb

Defined Under Namespace

Classes: NotFoundError

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Document

Returns a new instance of Document.

Raises:



7
8
9
10
# File 'lib/word-to-markdown/document.rb', line 7

def initialize(path)
  @path = File.expand_path path, Dir.pwd
  raise NotFoundError, "File #{@path} does not exist" unless File.exist?(@path)
end

Instance Attribute Details

#pathObject (readonly)

Returns the value of attribute path.



5
6
7
# File 'lib/word-to-markdown/document.rb', line 5

def path
  @path
end

Instance Method Details

#encoding(html) ⇒ Object

Determine the document encoding

html - the raw html export

Returns the encoding, defaulting to “UTF-8”



39
40
41
42
43
44
45
46
# File 'lib/word-to-markdown/document.rb', line 39

def encoding(html)
  match = html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
  if match
    match[1].sub("macintosh", "MacRoman")
  else
    "UTF-8"
  end
end

#extensionObject



12
13
14
# File 'lib/word-to-markdown/document.rb', line 12

def extension
  File.extname path
end

#htmlObject

Returns the html representation of the document



25
26
27
# File 'lib/word-to-markdown/document.rb', line 25

def html
  tree.to_html.gsub("</li>\n", "</li>")
end

#to_sObject

Returns the markdown representation of the document



30
31
32
# File 'lib/word-to-markdown/document.rb', line 30

def to_s
  @markdown ||= scrub_whitespace(ReverseMarkdown.convert(html, WordToMarkdown::REVERSE_MARKDOWN_OPTIONS))
end

#treeObject



16
17
18
19
20
21
22
# File 'lib/word-to-markdown/document.rb', line 16

def tree
  @tree ||= begin
    tree = Nokogiri::HTML(normalize(raw_html))
    tree.css("title").remove
    tree
  end
end