Class: DJNML
- Inherits:
-
Object
- Object
- DJNML
- Defined in:
- lib/djnml.rb,
lib/djnml/codes.rb,
lib/djnml/delete.rb,
lib/djnml/modification.rb
Defined Under Namespace
Classes: Codes, Delete, FileError, Modification
Instance Attribute Summary collapse
-
#accession_number ⇒ Object
readonly
Returns the value of attribute accession_number.
-
#brand ⇒ Object
readonly
Returns the value of attribute brand.
-
#company_address ⇒ Object
readonly
Returns the value of attribute company_address.
-
#company_city ⇒ Object
readonly
Returns the value of attribute company_city.
-
#company_code ⇒ Object
readonly
Returns the value of attribute company_code.
-
#company_name ⇒ Object
readonly
Returns the value of attribute company_name.
-
#company_zip ⇒ Object
readonly
Returns the value of attribute company_zip.
-
#content_code ⇒ Object
readonly
Returns the value of attribute content_code.
-
#copyright_holder ⇒ Object
readonly
Returns the value of attribute copyright_holder.
-
#copyright_year ⇒ Object
readonly
Returns the value of attribute copyright_year.
-
#delete ⇒ Object
readonly
Returns the value of attribute delete.
-
#destination ⇒ Object
readonly
Returns the value of attribute destination.
-
#display_date ⇒ Object
readonly
Returns the value of attribute display_date.
-
#dist_id ⇒ Object
readonly
Returns the value of attribute dist_id.
-
#doc_date ⇒ Object
readonly
Returns the value of attribute doc_date.
-
#function_code ⇒ Object
readonly
Returns the value of attribute function_code.
-
#geo_code ⇒ Object
readonly
Returns the value of attribute geo_code.
-
#government_code ⇒ Object
readonly
Returns the value of attribute government_code.
-
#headline ⇒ Object
readonly
Returns the value of attribute headline.
-
#headline_brand ⇒ Object
readonly
Returns the value of attribute headline_brand.
-
#hot ⇒ Object
readonly
Returns the value of attribute hot.
-
#html ⇒ Object
readonly
Returns the value of attribute html.
-
#industry_code ⇒ Object
readonly
Returns the value of attribute industry_code.
-
#isin_code ⇒ Object
readonly
Returns the value of attribute isin_code.
-
#journal_code ⇒ Object
readonly
Returns the value of attribute journal_code.
-
#lang ⇒ Object
readonly
Returns the value of attribute lang.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
-
#market_code ⇒ Object
readonly
Returns the value of attribute market_code.
-
#md5 ⇒ Object
readonly
Returns the value of attribute md5.
-
#modifications ⇒ Object
readonly
Returns the value of attribute modifications.
-
#msize ⇒ Object
readonly
Returns the value of attribute msize.
-
#news_source ⇒ Object
readonly
Returns the value of attribute news_source.
-
#origin ⇒ Object
readonly
Returns the value of attribute origin.
-
#original_source ⇒ Object
readonly
Returns the value of attribute original_source.
-
#page_citation ⇒ Object
readonly
Returns the value of attribute page_citation.
-
#page_code ⇒ Object
readonly
Returns the value of attribute page_code.
-
#product ⇒ Object
readonly
Returns the value of attribute product.
-
#product_code ⇒ Object
readonly
Returns the value of attribute product_code.
-
#publisher ⇒ Object
readonly
Returns the value of attribute publisher.
-
#retention ⇒ Object
readonly
Returns the value of attribute retention.
-
#routing_code ⇒ Object
readonly
Returns the value of attribute routing_code.
-
#seq ⇒ Object
readonly
Returns the value of attribute seq.
-
#service_id ⇒ Object
readonly
Returns the value of attribute service_id.
-
#stat_code ⇒ Object
readonly
Returns the value of attribute stat_code.
-
#subject_code ⇒ Object
readonly
Returns the value of attribute subject_code.
-
#sys_id ⇒ Object
readonly
Returns the value of attribute sys_id.
-
#temp_perm ⇒ Object
readonly
Returns the value of attribute temp_perm.
-
#text ⇒ Object
readonly
Returns the value of attribute text.
-
#transmission_date ⇒ Object
readonly
Returns the value of attribute transmission_date.
-
#urgency ⇒ Object
readonly
Returns the value of attribute urgency.
-
#website ⇒ Object
readonly
Returns the value of attribute website.
Class Method Summary collapse
Instance Method Summary collapse
- #has_content? ⇒ Boolean
-
#initialize(data = {}) ⇒ DJNML
constructor
A new instance of DJNML.
- #load(filename) ⇒ Object
Constructor Details
#initialize(data = {}) ⇒ DJNML
Returns a new instance of DJNML.
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/djnml.rb', line 54 def initialize(data = {}) @msize = data['msize'].to_i @md5 = data['md5'] @sys_id = data['sys_id'] @destination = data['destination'] @dist_id = data['dist_id'] @transmission_date = Time.parse(data['transmission_date']) @publisher = data['publisher'] @doc_date = Time.parse(data['doc_date']) @product = data['product'] @seq = data['seq'].to_i @lang = data['lang'] @news_source = data['news_source'] @origin = data['origin'] @service_id = data['service_id'] @urgency = data['urgency'] @brand = data['brand'] @temp_perm = data['temp_perm'] @retention = data['retention'] @hot = data['hot'] @original_source = data['original_source'] @accession_number = data['accession_number'] @page_citation = data['page_citation'] @display_date = Time.parse(data['display_date']) @company_code = data['company_code'] @isin_code = data['isin_code'] @page_code = data['page_code'] @industry_code = data['industry_code'].to_a.map { |c| Codes.new(c) } @government_code = data['government_code'].to_a.map { |c| Codes.new(c) } @subject_code = data['subject_code'].to_a.map { |c| Codes.new(c) } @market_code = data['market_code'].to_a.map { |c| Codes.new(c) } @product_code = data['product_code'].to_a.map { |c| Codes.new(c) } @geo_code = data['geo_code'].to_a.map { |c| Codes.new(c) } @stat_code = data['stat_code'].to_a.map { |c| Codes.new(c) } @journal_code = data['stat_code'].to_a.map { |c| Codes.new(c) } @routing_code = data['routing_code'].to_a.map { |c| Codes.new(c) } @content_code = data['content_code'].to_a.map { |c| Codes.new(c) } @function_code = data['function_code'].to_a.map { |c| Codes.new(c) } @headline = data['headline'] @headline_brand = data['headline_brand'] @html = data['html'] @text = data['text'] @copyright_year = data['copyright_year'] @copyright_holder = data['copyright_holder'] @website = data['website'] @company_name = data['company_name'] @company_address= data['company_address'] @company_zip = data['company_zip'] @company_city = data['company_city'] @language = data['language'] end |
Instance Attribute Details
#accession_number ⇒ Object (readonly)
Returns the value of attribute accession_number.
38 39 40 |
# File 'lib/djnml.rb', line 38 def accession_number @accession_number end |
#brand ⇒ Object (readonly)
Returns the value of attribute brand.
38 39 40 |
# File 'lib/djnml.rb', line 38 def brand @brand end |
#company_address ⇒ Object (readonly)
Returns the value of attribute company_address.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_address @company_address end |
#company_city ⇒ Object (readonly)
Returns the value of attribute company_city.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_city @company_city end |
#company_code ⇒ Object (readonly)
Returns the value of attribute company_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_code @company_code end |
#company_name ⇒ Object (readonly)
Returns the value of attribute company_name.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_name @company_name end |
#company_zip ⇒ Object (readonly)
Returns the value of attribute company_zip.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_zip @company_zip end |
#content_code ⇒ Object (readonly)
Returns the value of attribute content_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def content_code @content_code end |
#copyright_holder ⇒ Object (readonly)
Returns the value of attribute copyright_holder.
38 39 40 |
# File 'lib/djnml.rb', line 38 def copyright_holder @copyright_holder end |
#copyright_year ⇒ Object (readonly)
Returns the value of attribute copyright_year.
38 39 40 |
# File 'lib/djnml.rb', line 38 def copyright_year @copyright_year end |
#delete ⇒ Object (readonly)
Returns the value of attribute delete.
38 39 40 |
# File 'lib/djnml.rb', line 38 def delete @delete end |
#destination ⇒ Object (readonly)
Returns the value of attribute destination.
38 39 40 |
# File 'lib/djnml.rb', line 38 def destination @destination end |
#display_date ⇒ Object (readonly)
Returns the value of attribute display_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def display_date @display_date end |
#dist_id ⇒ Object (readonly)
Returns the value of attribute dist_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def dist_id @dist_id end |
#doc_date ⇒ Object (readonly)
Returns the value of attribute doc_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def doc_date @doc_date end |
#function_code ⇒ Object (readonly)
Returns the value of attribute function_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def function_code @function_code end |
#geo_code ⇒ Object (readonly)
Returns the value of attribute geo_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def geo_code @geo_code end |
#government_code ⇒ Object (readonly)
Returns the value of attribute government_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def government_code @government_code end |
#headline ⇒ Object (readonly)
Returns the value of attribute headline.
38 39 40 |
# File 'lib/djnml.rb', line 38 def headline @headline end |
#headline_brand ⇒ Object (readonly)
Returns the value of attribute headline_brand.
38 39 40 |
# File 'lib/djnml.rb', line 38 def headline_brand @headline_brand end |
#hot ⇒ Object (readonly)
Returns the value of attribute hot.
38 39 40 |
# File 'lib/djnml.rb', line 38 def hot @hot end |
#html ⇒ Object (readonly)
Returns the value of attribute html.
38 39 40 |
# File 'lib/djnml.rb', line 38 def html @html end |
#industry_code ⇒ Object (readonly)
Returns the value of attribute industry_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def industry_code @industry_code end |
#isin_code ⇒ Object (readonly)
Returns the value of attribute isin_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def isin_code @isin_code end |
#journal_code ⇒ Object (readonly)
Returns the value of attribute journal_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def journal_code @journal_code end |
#lang ⇒ Object (readonly)
Returns the value of attribute lang.
38 39 40 |
# File 'lib/djnml.rb', line 38 def lang @lang end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
38 39 40 |
# File 'lib/djnml.rb', line 38 def language @language end |
#market_code ⇒ Object (readonly)
Returns the value of attribute market_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def market_code @market_code end |
#md5 ⇒ Object (readonly)
Returns the value of attribute md5.
38 39 40 |
# File 'lib/djnml.rb', line 38 def md5 @md5 end |
#modifications ⇒ Object (readonly)
Returns the value of attribute modifications.
38 39 40 |
# File 'lib/djnml.rb', line 38 def modifications @modifications end |
#msize ⇒ Object (readonly)
Returns the value of attribute msize.
38 39 40 |
# File 'lib/djnml.rb', line 38 def msize @msize end |
#news_source ⇒ Object (readonly)
Returns the value of attribute news_source.
38 39 40 |
# File 'lib/djnml.rb', line 38 def news_source @news_source end |
#origin ⇒ Object (readonly)
Returns the value of attribute origin.
38 39 40 |
# File 'lib/djnml.rb', line 38 def origin @origin end |
#original_source ⇒ Object (readonly)
Returns the value of attribute original_source.
38 39 40 |
# File 'lib/djnml.rb', line 38 def original_source @original_source end |
#page_citation ⇒ Object (readonly)
Returns the value of attribute page_citation.
38 39 40 |
# File 'lib/djnml.rb', line 38 def page_citation @page_citation end |
#page_code ⇒ Object (readonly)
Returns the value of attribute page_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def page_code @page_code end |
#product ⇒ Object (readonly)
Returns the value of attribute product.
38 39 40 |
# File 'lib/djnml.rb', line 38 def product @product end |
#product_code ⇒ Object (readonly)
Returns the value of attribute product_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def product_code @product_code end |
#publisher ⇒ Object (readonly)
Returns the value of attribute publisher.
38 39 40 |
# File 'lib/djnml.rb', line 38 def publisher @publisher end |
#retention ⇒ Object (readonly)
Returns the value of attribute retention.
38 39 40 |
# File 'lib/djnml.rb', line 38 def retention @retention end |
#routing_code ⇒ Object (readonly)
Returns the value of attribute routing_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def routing_code @routing_code end |
#seq ⇒ Object (readonly)
Returns the value of attribute seq.
38 39 40 |
# File 'lib/djnml.rb', line 38 def seq @seq end |
#service_id ⇒ Object (readonly)
Returns the value of attribute service_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def service_id @service_id end |
#stat_code ⇒ Object (readonly)
Returns the value of attribute stat_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def stat_code @stat_code end |
#subject_code ⇒ Object (readonly)
Returns the value of attribute subject_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def subject_code @subject_code end |
#sys_id ⇒ Object (readonly)
Returns the value of attribute sys_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def sys_id @sys_id end |
#temp_perm ⇒ Object (readonly)
Returns the value of attribute temp_perm.
38 39 40 |
# File 'lib/djnml.rb', line 38 def temp_perm @temp_perm end |
#text ⇒ Object (readonly)
Returns the value of attribute text.
38 39 40 |
# File 'lib/djnml.rb', line 38 def text @text end |
#transmission_date ⇒ Object (readonly)
Returns the value of attribute transmission_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def transmission_date @transmission_date end |
#urgency ⇒ Object (readonly)
Returns the value of attribute urgency.
38 39 40 |
# File 'lib/djnml.rb', line 38 def urgency @urgency end |
#website ⇒ Object (readonly)
Returns the value of attribute website.
38 39 40 |
# File 'lib/djnml.rb', line 38 def website @website end |
Class Method Details
Instance Method Details
#has_content? ⇒ Boolean
458 459 460 |
# File 'lib/djnml.rb', line 458 def has_content? ! self.text.nil? end |
#load(filename) ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 |
# File 'lib/djnml.rb', line 118 def load(filename) if ! File.exists?(filename) raise FileError.new("#{filename}: no such file!") end parser = Nokogiri::XML(open(filename)) # doc tag # begin doc = parser.search('/doc').first @msize = doc['msize'].to_i @md5 = doc['md5'] @sys_id = doc['sysId'] @destination = doc['destination'] @dist_id = doc['distId'] @transmission_date = Time.parse(doc['transmission-date']) rescue # ignore errors end doc = nil # djnml tag # begin djnml = parser.search('/doc/djnml').first @publisher = djnml['publisher'] @doc_date = Time.parse(djnml['docdate']) @product = djnml['product'] @seq = djnml['seq'].to_i @lang = djnml['lang'] rescue # ignore errors end djnml = nil # djn-newswires tag # begin newswires = parser.search('/doc/djnml/head/docdata/djn/djn-newswires').first @news_source = newswires['news-source'] @origin = newswires['origin'] @service_id = newswires['service-id'] rescue # ignore errors end newswires = nil # djn-press-cutout tag # presscutout = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-press-cutout').first presscutout = nil # djn-urgency tag # begin urgency = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-urgency').first @urgency = urgency.text.strip.squeeze.to_i rescue # ignore errors end urgency = nil # djn-mdata # begin mdata = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata').first @brand = mdata['brand'] @temp_perm = mdata['temp-perm'] @retention = mdata['retention'] @hot = mdata['hot'] @original_source = mdata['original-source'] @accession_number = mdata['accession-number'] @page_citation = mdata['page-citation'] @display_date = Time.parse(mdata['display-date']) rescue # ignore errors end mdata = nil # coding / company # begin ccompany = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-company/c') @company_code = ccompany.map { |tag| tag.text.strip } rescue # ignore errors end ccompany = nil # coding / isin # begin isin = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-isin/c') @isin_code = isin.map { |tag| tag.text.strip } rescue # ignore errors end isin = nil # coding / page # begin page = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-page/c') @page_code = page.map { |tag| tag.text.strip } rescue # ignore errors end page = nil # coding / industry # begin industry = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-industry/c') @industry_code = industry.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end industry = nil # coding / government # begin government = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-government/c') @government_code = government.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end government = nil # coding / subject # begin subject = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-subject/c') @subject_code = subject.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end subject = nil # coding / market # begin market = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-market/c') @market_code = market.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end market = nil # coding / product # begin product = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-product/c') @product_code = product.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end product = nil # coding / geo # begin geo = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-geo/c') @geo_code = geo.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end geo = nil # coding / stat # begin stat = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-stat/c') @stat_code = stat.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end stat = nil # coding / journal # begin journal = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-journal/c') @journal_code = journal.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end journal = nil # coding / routing # begin routing = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-routing/c') @routing_code = routing.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end routing = nil # coding / content # begin content = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-content/c') @content_code = content.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end content = nil # coding / function # begin function = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-function/c') @function_code = function.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end function = nil # body / headline # begin headline = parser.search('/doc/djnml/body/headline').first @headline = headline.text.strip @headline_brand = headline['brand-display'] if headline['brand-display'] rescue # ignore errors end headline = nil # body / text # begin text = parser.search('/doc/djnml/body/text').first @html = text.children.to_xml @text = text.children.text.strip rescue # ignore errors end text = nil # copyright # begin copyright = parser.search('/doc/djnml/head/copyright').first @copyright_year = copyright['year'].to_s.strip.to_i @copyright_holder = copyright['holder'] rescue # ignore errors end copyright = nil # website # begin if @text =~ /Internet:\s+(.+?)$/ @website = $1.strip end rescue # ignore errors end if @text =~ /Company:\s+(\S.+?)\s*\n+\s+(\b.+?)\n+\s+(\d+)\s+(\b.+?)\n+/ @company_name = $1.strip @company_address= $2.strip @company_zip = $3.strip @company_city = $4.strip end # language # begin @language = LanguageDetector.instance.detect(@text) rescue # ignore errors end # stories to delete # begin @delete = [] doc_delete = parser.search('/doc/djnml/administration/doc-delete') doc_delete.each do |dd| @delete << Delete.new(:product => dd['product'], :doc_date => dd['docdate'], :seq => dd['seq'], :publisher => dd['publisher'], :reason => dd['reason']) end rescue # ignore errors end # replacements # @modifications = [] doc_modify = parser.search('/doc/djnml/administration/doc-modify').first mods = parser.search('/doc/djnml/administration/doc-modify/modify-replace') mods.each do |m| @modifications << Modification.new(:doc_date => doc_modify['docdate'], :product => doc_modify['product'], :publisher => doc_modify['publisher'], :seq => doc_modify['seq'], :xml => m) end self end |