Class: Yomu

Inherits:
Object
  • Object
show all
Defined in:
lib/yomu.rb,
lib/yomu/version.rb

Constant Summary collapse

GEMPATH =
File.dirname(File.dirname(__FILE__))
JARPATH =
File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.5.jar')
VERSION =
"0.1.10"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input) ⇒ Yomu

Create a new instance of Yomu with a given document.

Using a file path:

Yomu.new 'sample.pages'

Using a URL:

Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'

From a stream or an object which responds to read

Yomu.new File.open('sample.pages')


61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/yomu.rb', line 61

def initialize(input)
  if input.is_a? String
    if input =~ URI::regexp
      @uri = URI.parse input
    elsif File.exists? input
      @path = input
    else
      raise Errno::ENOENT.new "missing file or invalid URI - #{input}"
    end
  elsif input.respond_to? :read
    @stream = input
  else
    raise TypeError.new "can't read from #{input.class.name}"
  end
end

Class Method Details

.read(type, data) ⇒ Object

Read text or metadata from a data buffer.

data = File.read 'sample.pages'
text = Yomu.read :text, data
 = Yomu.read :metadata, data


17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/yomu.rb', line 17

def self.read(type, data)
  switch = case type
  when :text
    '-t'
  when :html
    '-h'
  when :metadata
    '-m'
  when :mimetype
    '-m'
  end

  result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
    io.write data
    io.close_write
    io.read
  end

  case type
  when :text
    result
  when :html
    result
  when :metadata
    YAML.load quote(result)
  when :mimetype
    MIME::Types[YAML.load(quote(result))['Content-Type']].first
  end
end

Instance Method Details

#dataObject

Returns the raw/unparsed content of the Yomu document.

yomu = Yomu.new 'sample.pages'
yomu.data


155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/yomu.rb', line 155

def data
  return @data if defined? @data

  if path?
    @data = File.read @path
  elsif uri?
    @data = Net::HTTP.get @uri
  elsif stream?
    @data = @stream.read
  end

  @data
end

#htmlObject

Returns the text content of the Yomu document in HTML.

yomu = Yomu.new 'sample.pages'
yomu.html


93
94
95
96
97
# File 'lib/yomu.rb', line 93

def html
  return @text if defined? @text

  @text = Yomu.read :html, data
end

#metadataObject

Returns the metadata hash of the Yomu document.

yomu = Yomu.new 'sample.pages'
yomu.['Content-Type']


104
105
106
107
108
# File 'lib/yomu.rb', line 104

def 
  return @metadata if defined? @metadata

  @metadata = Yomu.read :metadata, data
end

#mimetypeObject

Returns the mimetype object of the Yomu document.

yomu = Yomu.new 'sample.docx'
yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
yomu.mimetype.extensions #=> ['docx']


116
117
118
119
120
# File 'lib/yomu.rb', line 116

def mimetype
  return @mimetype if defined? @mimetype

  @mimetype = MIME::Types[['Content-Type']].first
end

#path?Boolean

Returns true if the Yomu document was specified using a file path.

yomu = Yomu.new 'sample.pages'
yomu.path? #=> true

Returns:

  • (Boolean)


127
128
129
# File 'lib/yomu.rb', line 127

def path?
  defined? @path
end

#stream?Boolean

Returns true if the Yomu document was specified from a stream or an object which responds to read.

file = File.open('sample.pages')
yomu = Yomu.new file
yomu.stream? #=> true

Returns:

  • (Boolean)


146
147
148
# File 'lib/yomu.rb', line 146

def stream?
  defined? @stream
end

#textObject

Returns the text content of the Yomu document.

yomu = Yomu.new 'sample.pages'
yomu.text


82
83
84
85
86
# File 'lib/yomu.rb', line 82

def text
  return @text if defined? @text

  @text = Yomu.read :text, data
end

#uri?Boolean

Returns true if the Yomu document was specified using a URI.

yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
yomu.uri? #=> true

Returns:

  • (Boolean)


136
137
138
# File 'lib/yomu.rb', line 136

def uri?
  defined? @uri
end