Class: ChupaText::Data
- Inherits:
-
Object
- Object
- ChupaText::Data
- Defined in:
- lib/chupa-text/data.rb
Direct Known Subclasses
Instance Attribute Summary collapse
-
#attributes ⇒ Attributes
readonly
The attributes of the data.
-
#body ⇒ String?
The content of the data,
nil
if the data doesn't have any content. -
#expected_screenshot_size ⇒ Array<Integer, Integer>
The expected screenshot size.
-
#need_screenshot ⇒ Bool
writeonly
The specified value.
-
#path ⇒ String?
The path associated with the content of the data,
nil
if the data doesn't associated with any file. -
#screenshot ⇒ Screenshot?
The screenshot of the data.
-
#size ⇒ Integer?
The byte size of the data,
nil
if the data doesn't have any content. -
#source ⇒ Data?
The source of the data.
-
#uri ⇒ URI?
The URI of the data if the data is for remote or local file,
nil
if the data isn't associated with any URIs.
Instance Method Summary collapse
- #[](name) ⇒ Object
- #[]=(name, value) ⇒ Object
-
#extension ⇒ String?
Normalized extension as String if #uri is not
nil
,nil
otherwise. -
#initialize(options = {}) ⇒ Data
constructor
A new instance of Data.
- #initialize_copy(object) ⇒ Object
-
#merge!(data) ⇒ void
Merges metadata from data.
- #mime_type ⇒ String?
- #mime_type=(type) ⇒ Object
-
#need_screenshot? ⇒ Bool
true
when screenshot is needed if available. - #open {|StringIO.new(body)| ... } ⇒ Object
-
#text? ⇒ Bool
True if MIME type is "text/XXX", false otherwise.
-
#text_plain? ⇒ Bool
True if MIME type is "text/plain", false otherwise.
Constructor Details
#initialize(options = {}) ⇒ Data
Returns a new instance of Data.
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/chupa-text/data.rb', line 66 def initialize(={}) @uri = nil @body = nil @size = nil @path = nil @mime_type = nil @attributes = Attributes.new @source = nil @screenshot = nil @need_screenshot = true @expected_screenshot_size = [200, 200] @options = || {} source_data = @options[:source_data] if source_data merge!(source_data) @source = source_data end end |
Instance Attribute Details
#attributes ⇒ Attributes (readonly)
Returns The attributes of the data.
48 49 50 |
# File 'lib/chupa-text/data.rb', line 48 def attributes @attributes end |
#body ⇒ String?
Returns The content of the data, nil
if the data
doesn't have any content.
30 31 32 |
# File 'lib/chupa-text/data.rb', line 30 def body @body end |
#expected_screenshot_size ⇒ Array<Integer, Integer>
Returns the expected screenshot size.
64 65 66 |
# File 'lib/chupa-text/data.rb', line 64 def expected_screenshot_size @expected_screenshot_size end |
#need_screenshot=(value) ⇒ Bool (writeonly)
Returns the specified value.
61 62 63 |
# File 'lib/chupa-text/data.rb', line 61 def need_screenshot=(value) @need_screenshot = value end |
#path ⇒ String?
Returns The path associated with the content of
the data, nil
if the data doesn't associated with any file.
The path may not be related with the original content. For
example, "/tmp/XXX.txt"
may be returned for the data of
"http://example.com/XXX.txt"
.
This value is useful to use an external command to extract text and meta-data.
45 46 47 |
# File 'lib/chupa-text/data.rb', line 45 def path @path end |
#screenshot ⇒ Screenshot?
Returns The screenshot of the data. For example, the first page image for PDF file.text.
57 58 59 |
# File 'lib/chupa-text/data.rb', line 57 def screenshot @screenshot end |
#size ⇒ Integer?
Returns The byte size of the data, nil
if the data
doesn't have any content.
34 35 36 |
# File 'lib/chupa-text/data.rb', line 34 def size @size end |
#source ⇒ Data?
Returns The source of the data. For example, text
data (hello.txt
) in archive data (hello.tar
) have the
archive data in #source.
53 54 55 |
# File 'lib/chupa-text/data.rb', line 53 def source @source end |
#uri ⇒ URI?
Returns The URI of the data if the data is for remote
or local file, nil
if the data isn't associated with any
URIs.
26 27 28 |
# File 'lib/chupa-text/data.rb', line 26 def uri @uri end |
Instance Method Details
#[](name) ⇒ Object
141 142 143 |
# File 'lib/chupa-text/data.rb', line 141 def [](name) @attributes[name] end |
#[]=(name, value) ⇒ Object
145 146 147 |
# File 'lib/chupa-text/data.rb', line 145 def []=(name, value) @attributes[name] = value end |
#extension ⇒ String?
Returns Normalized extension as String if #uri
is not nil
, nil
otherwise. The normalized extension uses
lower case like pdf
not PDF
.
167 168 169 170 171 172 173 174 |
# File 'lib/chupa-text/data.rb', line 167 def extension return nil if @uri.nil? if @uri.is_a?(URI::HTTP) and @uri.path.end_with?("/") "html" else File.extname(@uri.path).downcase.gsub(/\A\./, "") end end |
#initialize_copy(object) ⇒ Object
85 86 87 88 89 |
# File 'lib/chupa-text/data.rb', line 85 def initialize_copy(object) super @attributes = @attributes.dup self end |
#merge!(data) ⇒ void
This method returns an undefined value.
Merges metadata from data.
96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/chupa-text/data.rb', line 96 def merge!(data) self.uri = data.uri self.path = data.path data.attributes.each do |name, value| self[name] = value end if data.mime_type self["source-mime-types"] ||= [] self["source-mime-types"].unshift(data.mime_type) end self.need_screenshot = data.need_screenshot? self.expected_screenshot_size = data.expected_screenshot_size end |
#mime_type ⇒ String?
153 154 155 |
# File 'lib/chupa-text/data.rb', line 153 def mime_type @mime_type || guess_mime_type end |
#mime_type=(type) ⇒ Object
160 161 162 |
# File 'lib/chupa-text/data.rb', line 160 def mime_type=(type) @mime_type = type end |
#need_screenshot? ⇒ Bool
Returns true
when screenshot is needed if available.
189 190 191 |
# File 'lib/chupa-text/data.rb', line 189 def need_screenshot? @need_screenshot end |
#open {|StringIO.new(body)| ... } ⇒ Object
137 138 139 |
# File 'lib/chupa-text/data.rb', line 137 def open yield(StringIO.new(body)) end |
#text? ⇒ Bool
Returns true if MIME type is "text/XXX", false otherwise.
178 179 180 |
# File 'lib/chupa-text/data.rb', line 178 def text? (mime_type || "").start_with?("text/") end |
#text_plain? ⇒ Bool
Returns true if MIME type is "text/plain", false otherwise.
184 185 186 |
# File 'lib/chupa-text/data.rb', line 184 def text_plain? mime_type == "text/plain" end |