Class: RTika::GenericParser
- Inherits:
-
Object
- Object
- RTika::GenericParser
show all
- Defined in:
- lib/rtika.rb
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
Returns a new instance of GenericParser.
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'lib/rtika.rb', line 52
def initialize(*args)
@options = args.last
if remove_boilerplate?
@writeout_content = RTika::WriteOutContentHandler.new(-1)
@content = RTika::BoilerpipeContentHandler.new(@writeout_content)
else
@content = RTika::BodyContentHandler.new(-1)
end
@metadata = RTika::Metadata.new
end
|
Class Method Details
.parse(*args) ⇒ Object
44
45
46
|
# File 'lib/rtika.rb', line 44
def self.parse(*args)
new(*args).parse
end
|
Instance Method Details
#parse ⇒ Object
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/rtika.rb', line 65
def parse
@parser = RTika::AutoDetectParser.new
@content, @metadata = process
if remove_boilerplate?
RTika::ParsedResult.new(@writeout_content, @metadata)
else
RTika::ParsedResult.new(@content, @metadata)
end
end
|
#process ⇒ Object
76
77
78
|
# File 'lib/rtika.rb', line 76
def process
raise "override this in your parser, return content and metadata"
end
|
#remove_boilerplate? ⇒ Boolean
48
49
50
|
# File 'lib/rtika.rb', line 48
def remove_boilerplate?
@options[:remove_boilerplate] && @options[:remove_boilerplate] == true
end
|