Class: AntiWordR::DocFile

Inherits:
Object
  • Object
show all
Defined in:
lib/antiwordr.rb

Overview

Provides facilities for converting Word Docs to Text rom Ruby code.

Direct Known Subclasses

DocFilePath, DocFileUrl

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input_path, target_path = nil) ⇒ DocFile

Returns a new instance of DocFile.



31
32
33
34
# File 'lib/antiwordr.rb', line 31

def initialize(input_path, target_path=nil)
  @path = input_path
  @target = target_path
end

Instance Attribute Details

#formatObject (readonly)

Returns the value of attribute format.



29
30
31
# File 'lib/antiwordr.rb', line 29

def format
  @format
end

#pathObject (readonly)

Returns the value of attribute path.



27
28
29
# File 'lib/antiwordr.rb', line 27

def path
  @path
end

#targetObject (readonly)

Returns the value of attribute target.



28
29
30
# File 'lib/antiwordr.rb', line 28

def target
  @target
end

Instance Method Details

#convertObject

Convert the PDF document to HTML. Returns a string



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/antiwordr.rb', line 37

def convert()
  errors = ""
  output = ""
  
  cmd = "antiword #{format}" + ' "' + @path + '"'
  
  output = `#{cmd} 2>&1`
  
  if (output.include?("command not found"))
    raise AntiWordRError, "AntiWordR requires antiword to be installed"
  elsif (output.include?("is not a Word Document"))
    raise AntiWordRError, "Source document is not a Word Document"
  elsif (output.include?("Error:"))
    raise AntiWordRError, output.split("\n").first.to_s.chomp
  else
    return output
  end
end

#convert_to_docbookObject



61
62
63
64
# File 'lib/antiwordr.rb', line 61

def convert_to_docbook()
  @format = "-x db"
  convert()
end

#convert_to_docbook_documentObject

Convert the PDF document to HTML. Returns a Nokogiri::HTML:Document



57
58
59
# File 'lib/antiwordr.rb', line 57

def convert_to_docbook_document() 
  Nokogiri::XML.parse(convert_to_docbook())
end