Class: RTika::UrlParser

Inherits:
GenericParser show all
Defined in:
lib/rtika.rb

Instance Method Summary collapse

Methods inherited from GenericParser

parse, #parse, #remove_boilerplate?

Constructor Details

#initialize(url, content, opts = {}) ⇒ UrlParser

Returns a new instance of UrlParser.



115
116
117
118
119
# File 'lib/rtika.rb', line 115

def initialize(url, content, opts={})
  super(opts)
  @url = url
  @url_content = content
end

Instance Method Details

#processObject



121
122
123
124
125
126
127
128
129
# File 'lib/rtika.rb', line 121

def process 
  input_stream = java.io.ByteArrayInputStream.new(@url_content.to_java.get_bytes)
  @metadata.set("filename", File.basename(@url))

  @parser.parse(input_stream, @content, @metadata)
  input_stream.close

  return [@content, @metadata]
end