Class: DJNML
- Inherits:
-
Object
- Object
- DJNML
- Defined in:
- lib/djnml.rb,
lib/djnml/codes.rb,
lib/djnml/delete.rb,
lib/djnml/modification.rb
Defined Under Namespace
Classes: Codes, Delete, FileError, Modification
Instance Attribute Summary collapse
-
#accession_number ⇒ Object
readonly
Returns the value of attribute accession_number.
-
#brand ⇒ Object
readonly
Returns the value of attribute brand.
-
#company_address ⇒ Object
readonly
Returns the value of attribute company_address.
-
#company_city ⇒ Object
readonly
Returns the value of attribute company_city.
-
#company_code ⇒ Object
readonly
Returns the value of attribute company_code.
-
#company_name ⇒ Object
readonly
Returns the value of attribute company_name.
-
#company_zip ⇒ Object
readonly
Returns the value of attribute company_zip.
-
#content_code ⇒ Object
readonly
Returns the value of attribute content_code.
-
#copyright_holder ⇒ Object
readonly
Returns the value of attribute copyright_holder.
-
#copyright_year ⇒ Object
readonly
Returns the value of attribute copyright_year.
-
#delete ⇒ Object
readonly
Returns the value of attribute delete.
-
#destination ⇒ Object
readonly
Returns the value of attribute destination.
-
#display_date ⇒ Object
readonly
Returns the value of attribute display_date.
-
#dist_id ⇒ Object
readonly
Returns the value of attribute dist_id.
-
#doc_date ⇒ Object
readonly
Returns the value of attribute doc_date.
-
#function_code ⇒ Object
readonly
Returns the value of attribute function_code.
-
#geo_code ⇒ Object
readonly
Returns the value of attribute geo_code.
-
#government_code ⇒ Object
readonly
Returns the value of attribute government_code.
-
#headline ⇒ Object
readonly
Returns the value of attribute headline.
-
#headline_brand ⇒ Object
readonly
Returns the value of attribute headline_brand.
-
#hot ⇒ Object
readonly
Returns the value of attribute hot.
-
#html ⇒ Object
readonly
Returns the value of attribute html.
-
#industry_code ⇒ Object
readonly
Returns the value of attribute industry_code.
-
#isin_code ⇒ Object
readonly
Returns the value of attribute isin_code.
-
#journal_code ⇒ Object
readonly
Returns the value of attribute journal_code.
-
#lang ⇒ Object
readonly
Returns the value of attribute lang.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
-
#market_code ⇒ Object
readonly
Returns the value of attribute market_code.
-
#md5 ⇒ Object
readonly
Returns the value of attribute md5.
-
#modifications ⇒ Object
readonly
Returns the value of attribute modifications.
-
#msize ⇒ Object
readonly
Returns the value of attribute msize.
-
#news_source ⇒ Object
readonly
Returns the value of attribute news_source.
-
#origin ⇒ Object
readonly
Returns the value of attribute origin.
-
#original_source ⇒ Object
readonly
Returns the value of attribute original_source.
-
#page_citation ⇒ Object
readonly
Returns the value of attribute page_citation.
-
#page_code ⇒ Object
readonly
Returns the value of attribute page_code.
-
#product ⇒ Object
readonly
Returns the value of attribute product.
-
#product_code ⇒ Object
readonly
Returns the value of attribute product_code.
-
#publisher ⇒ Object
readonly
Returns the value of attribute publisher.
-
#retention ⇒ Object
readonly
Returns the value of attribute retention.
-
#routing_code ⇒ Object
readonly
Returns the value of attribute routing_code.
-
#seq ⇒ Object
readonly
Returns the value of attribute seq.
-
#service_id ⇒ Object
readonly
Returns the value of attribute service_id.
-
#stat_code ⇒ Object
readonly
Returns the value of attribute stat_code.
-
#subject_code ⇒ Object
readonly
Returns the value of attribute subject_code.
-
#sys_id ⇒ Object
readonly
Returns the value of attribute sys_id.
-
#temp_perm ⇒ Object
readonly
Returns the value of attribute temp_perm.
-
#text ⇒ Object
readonly
Returns the value of attribute text.
-
#transmission_date ⇒ Object
readonly
Returns the value of attribute transmission_date.
-
#urgency ⇒ Object
readonly
Returns the value of attribute urgency.
-
#website ⇒ Object
readonly
Returns the value of attribute website.
Class Method Summary collapse
Instance Method Summary collapse
- #has_content? ⇒ Boolean
-
#initialize(data = {}) ⇒ DJNML
constructor
A new instance of DJNML.
- #load(filename) ⇒ Object
Constructor Details
#initialize(data = {}) ⇒ DJNML
Returns a new instance of DJNML.
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/djnml.rb', line 54 def initialize(data = {}) @msize = data['msize'].to_i @md5 = data['md5'] @sys_id = data['sys_id'] @destination = data['destination'] @dist_id = data['dist_id'] @transmission_date = Time.parse(data['transmission_date']) @publisher = data['publisher'] @doc_date = Time.parse(data['doc_date']) @product = data['product'] @seq = data['seq'].to_i @lang = data['lang'] @news_source = data['news_source'] @origin = data['origin'] @service_id = data['service_id'] @urgency = data['urgency'] @brand = data['brand'] @temp_perm = data['temp_perm'] @retention = data['retention'] @hot = data['hot'] @original_source = data['original_source'] @accession_number = data['accession_number'] @page_citation = data['page_citation'] @display_date = Time.parse(data['display_date']) @company_code = data['company_code'] @isin_code = data['isin_code'] @page_code = data['page_code'] @industry_code = data['industry_code'].to_a.map { |c| Codes.new(c) } @government_code = data['government_code'].to_a.map { |c| Codes.new(c) } @subject_code = data['subject_code'].to_a.map { |c| Codes.new(c) } @market_code = data['market_code'].to_a.map { |c| Codes.new(c) } @product_code = data['product_code'].to_a.map { |c| Codes.new(c) } @geo_code = data['geo_code'].to_a.map { |c| Codes.new(c) } @stat_code = data['stat_code'].to_a.map { |c| Codes.new(c) } @journal_code = data['stat_code'].to_a.map { |c| Codes.new(c) } @routing_code = data['routing_code'].to_a.map { |c| Codes.new(c) } @content_code = data['content_code'].to_a.map { |c| Codes.new(c) } @function_code = data['function_code'].to_a.map { |c| Codes.new(c) } @headline = data['headline'] @headline_brand = data['headline_brand'] @html = data['html'] @text = data['text'] @copyright_year = data['copyright_year'] @copyright_holder = data['copyright_holder'] @website = data['website'] @company_name = data['company_name'] @company_address= data['company_address'] @company_zip = data['company_zip'] @company_city = data['company_city'] @language = data['language'] end |
Instance Attribute Details
#accession_number ⇒ Object (readonly)
Returns the value of attribute accession_number.
38 39 40 |
# File 'lib/djnml.rb', line 38 def accession_number @accession_number end |
#brand ⇒ Object (readonly)
Returns the value of attribute brand.
38 39 40 |
# File 'lib/djnml.rb', line 38 def brand @brand end |
#company_address ⇒ Object (readonly)
Returns the value of attribute company_address.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_address @company_address end |
#company_city ⇒ Object (readonly)
Returns the value of attribute company_city.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_city @company_city end |
#company_code ⇒ Object (readonly)
Returns the value of attribute company_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_code @company_code end |
#company_name ⇒ Object (readonly)
Returns the value of attribute company_name.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_name @company_name end |
#company_zip ⇒ Object (readonly)
Returns the value of attribute company_zip.
38 39 40 |
# File 'lib/djnml.rb', line 38 def company_zip @company_zip end |
#content_code ⇒ Object (readonly)
Returns the value of attribute content_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def content_code @content_code end |
#copyright_holder ⇒ Object (readonly)
Returns the value of attribute copyright_holder.
38 39 40 |
# File 'lib/djnml.rb', line 38 def copyright_holder @copyright_holder end |
#copyright_year ⇒ Object (readonly)
Returns the value of attribute copyright_year.
38 39 40 |
# File 'lib/djnml.rb', line 38 def copyright_year @copyright_year end |
#delete ⇒ Object (readonly)
Returns the value of attribute delete.
38 39 40 |
# File 'lib/djnml.rb', line 38 def delete @delete end |
#destination ⇒ Object (readonly)
Returns the value of attribute destination.
38 39 40 |
# File 'lib/djnml.rb', line 38 def destination @destination end |
#display_date ⇒ Object (readonly)
Returns the value of attribute display_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def display_date @display_date end |
#dist_id ⇒ Object (readonly)
Returns the value of attribute dist_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def dist_id @dist_id end |
#doc_date ⇒ Object (readonly)
Returns the value of attribute doc_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def doc_date @doc_date end |
#function_code ⇒ Object (readonly)
Returns the value of attribute function_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def function_code @function_code end |
#geo_code ⇒ Object (readonly)
Returns the value of attribute geo_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def geo_code @geo_code end |
#government_code ⇒ Object (readonly)
Returns the value of attribute government_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def government_code @government_code end |
#headline ⇒ Object (readonly)
Returns the value of attribute headline.
38 39 40 |
# File 'lib/djnml.rb', line 38 def headline @headline end |
#headline_brand ⇒ Object (readonly)
Returns the value of attribute headline_brand.
38 39 40 |
# File 'lib/djnml.rb', line 38 def headline_brand @headline_brand end |
#hot ⇒ Object (readonly)
Returns the value of attribute hot.
38 39 40 |
# File 'lib/djnml.rb', line 38 def hot @hot end |
#html ⇒ Object (readonly)
Returns the value of attribute html.
38 39 40 |
# File 'lib/djnml.rb', line 38 def html @html end |
#industry_code ⇒ Object (readonly)
Returns the value of attribute industry_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def industry_code @industry_code end |
#isin_code ⇒ Object (readonly)
Returns the value of attribute isin_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def isin_code @isin_code end |
#journal_code ⇒ Object (readonly)
Returns the value of attribute journal_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def journal_code @journal_code end |
#lang ⇒ Object (readonly)
Returns the value of attribute lang.
38 39 40 |
# File 'lib/djnml.rb', line 38 def lang @lang end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
38 39 40 |
# File 'lib/djnml.rb', line 38 def language @language end |
#market_code ⇒ Object (readonly)
Returns the value of attribute market_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def market_code @market_code end |
#md5 ⇒ Object (readonly)
Returns the value of attribute md5.
38 39 40 |
# File 'lib/djnml.rb', line 38 def md5 @md5 end |
#modifications ⇒ Object (readonly)
Returns the value of attribute modifications.
38 39 40 |
# File 'lib/djnml.rb', line 38 def modifications @modifications end |
#msize ⇒ Object (readonly)
Returns the value of attribute msize.
38 39 40 |
# File 'lib/djnml.rb', line 38 def msize @msize end |
#news_source ⇒ Object (readonly)
Returns the value of attribute news_source.
38 39 40 |
# File 'lib/djnml.rb', line 38 def news_source @news_source end |
#origin ⇒ Object (readonly)
Returns the value of attribute origin.
38 39 40 |
# File 'lib/djnml.rb', line 38 def origin @origin end |
#original_source ⇒ Object (readonly)
Returns the value of attribute original_source.
38 39 40 |
# File 'lib/djnml.rb', line 38 def original_source @original_source end |
#page_citation ⇒ Object (readonly)
Returns the value of attribute page_citation.
38 39 40 |
# File 'lib/djnml.rb', line 38 def page_citation @page_citation end |
#page_code ⇒ Object (readonly)
Returns the value of attribute page_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def page_code @page_code end |
#product ⇒ Object (readonly)
Returns the value of attribute product.
38 39 40 |
# File 'lib/djnml.rb', line 38 def product @product end |
#product_code ⇒ Object (readonly)
Returns the value of attribute product_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def product_code @product_code end |
#publisher ⇒ Object (readonly)
Returns the value of attribute publisher.
38 39 40 |
# File 'lib/djnml.rb', line 38 def publisher @publisher end |
#retention ⇒ Object (readonly)
Returns the value of attribute retention.
38 39 40 |
# File 'lib/djnml.rb', line 38 def retention @retention end |
#routing_code ⇒ Object (readonly)
Returns the value of attribute routing_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def routing_code @routing_code end |
#seq ⇒ Object (readonly)
Returns the value of attribute seq.
38 39 40 |
# File 'lib/djnml.rb', line 38 def seq @seq end |
#service_id ⇒ Object (readonly)
Returns the value of attribute service_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def service_id @service_id end |
#stat_code ⇒ Object (readonly)
Returns the value of attribute stat_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def stat_code @stat_code end |
#subject_code ⇒ Object (readonly)
Returns the value of attribute subject_code.
38 39 40 |
# File 'lib/djnml.rb', line 38 def subject_code @subject_code end |
#sys_id ⇒ Object (readonly)
Returns the value of attribute sys_id.
38 39 40 |
# File 'lib/djnml.rb', line 38 def sys_id @sys_id end |
#temp_perm ⇒ Object (readonly)
Returns the value of attribute temp_perm.
38 39 40 |
# File 'lib/djnml.rb', line 38 def temp_perm @temp_perm end |
#text ⇒ Object (readonly)
Returns the value of attribute text.
38 39 40 |
# File 'lib/djnml.rb', line 38 def text @text end |
#transmission_date ⇒ Object (readonly)
Returns the value of attribute transmission_date.
38 39 40 |
# File 'lib/djnml.rb', line 38 def transmission_date @transmission_date end |
#urgency ⇒ Object (readonly)
Returns the value of attribute urgency.
38 39 40 |
# File 'lib/djnml.rb', line 38 def urgency @urgency end |
#website ⇒ Object (readonly)
Returns the value of attribute website.
38 39 40 |
# File 'lib/djnml.rb', line 38 def website @website end |
Class Method Details
Instance Method Details
#has_content? ⇒ Boolean
458 459 460 |
# File 'lib/djnml.rb', line 458 def has_content? ! self.text.nil? end |
#load(filename) ⇒ Object
|
# File 'lib/djnml.rb', line 118 def load(filename) if ! File.exists?(filename) raise FileError.new("#{filename}: no such file!") end parser = Nokogiri::XML(open(filename)) # doc tag # begin doc = parser.search('/doc').first @msize = doc['msize'].to_i @md5 = doc['md5'] @sys_id = doc['sysId'] @destination = doc['destination'] @dist_id = doc['distId'] @transmission_date = Time.parse(doc['transmission-date']) rescue # ignore errors end doc = nil # djnml tag # begin djnml = parser.search('/doc/djnml').first @publisher = djnml['publisher'] @doc_date = Time.parse(djnml['docdate']) @product = djnml['product'] @seq = djnml['seq'].to_i @lang = djnml['lang'] rescue # ignore errors end djnml = nil # djn-newswires tag # begin newswires = parser.search('/doc/djnml/head/docdata/djn/djn-newswires').first @news_source = newswires['news-source'] @origin = newswires['origin'] @service_id = newswires['service-id'] rescue # ignore errors end newswires = nil # djn-press-cutout tag # presscutout = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-press-cutout').first presscutout = nil # djn-urgency tag # begin urgency = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-urgency').first @urgency = urgency.text.strip.squeeze.to_i rescue # ignore errors end urgency = nil # djn-mdata # begin mdata = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata').first @brand = mdata['brand'] @temp_perm = mdata['temp-perm'] @retention = mdata['retention'] @hot = mdata['hot'] @original_source = mdata['original-source'] @accession_number = mdata['accession-number'] @page_citation = mdata['page-citation'] @display_date = Time.parse(mdata['display-date']) rescue # ignore errors end mdata = nil # coding / company # begin ccompany = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-company/c') @company_code = ccompany.map { |tag| tag.text.strip } rescue # ignore errors end ccompany = nil # coding / isin # begin isin = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-isin/c') @isin_code = isin.map { |tag| tag.text.strip } rescue # ignore errors end isin = nil # coding / page # begin page = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-page/c') @page_code = page.map { |tag| tag.text.strip } rescue # ignore errors end page = nil # coding / industry # begin industry = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-industry/c') @industry_code = industry.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end industry = nil # coding / government # begin government = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-government/c') @government_code = government.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end government = nil # coding / subject # begin subject = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-subject/c') @subject_code = subject.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end subject = nil # coding / market # begin market = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-market/c') @market_code = market.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end market = nil # coding / product # begin product = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-product/c') @product_code = product.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end product = nil # coding / geo # begin geo = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-geo/c') @geo_code = geo.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end geo = nil # coding / stat # begin stat = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-stat/c') @stat_code = stat.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end stat = nil # coding / journal # begin journal = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-journal/c') @journal_code = journal.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end journal = nil # coding / routing # begin routing = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-routing/c') @routing_code = routing.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end routing = nil # coding / content # begin content = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-content/c') @content_code = content.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end content = nil # coding / function # begin function = parser.search('/doc/djnml/head/docdata/djn/djn-newswires/djn-mdata/djn-coding/djn-function/c') @function_code = function.map { |tag| Codes.new(tag.text.strip) } rescue # ignore errors end function = nil # body / headline # begin headline = parser.search('/doc/djnml/body/headline').first @headline = headline.text.strip @headline_brand = headline['brand-display'] if headline['brand-display'] rescue # ignore errors end headline = nil # body / text # begin text = parser.search('/doc/djnml/body/text').first @html = text.children.to_xml @text = text.children.text.strip rescue # ignore errors end text = nil # copyright # begin copyright = parser.search('/doc/djnml/head/copyright').first @copyright_year = copyright['year'].to_s.strip.to_i @copyright_holder = copyright['holder'] rescue # ignore errors end copyright = nil # website # begin if @text =~ /Internet:\s+(.+?)$/ @website = $1.strip end rescue # ignore errors end if @text =~ /Company:\s+(\S.+?)\s*\n+\s+(\b.+?)\n+\s+(\d+)\s+(\b.+?)\n+/ @company_name = $1.strip @company_address= $2.strip @company_zip = $3.strip @company_city = $4.strip end # language # begin @language = LanguageDetector.instance.detect(@text) rescue # ignore errors end # stories to delete # begin @delete = [] doc_delete = parser.search('/doc/djnml/administration/doc-delete') doc_delete.each do |dd| @delete << Delete.new(:product => dd['product'], :doc_date => dd['docdate'], :seq => dd['seq'], :publisher => dd['publisher'], :reason => dd['reason']) end rescue # ignore errors end # replacements # @modifications = [] doc_modify = parser.search('/doc/djnml/administration/doc-modify').first mods = parser.search('/doc/djnml/administration/doc-modify/modify-replace') mods.each do |m| @modifications << Modification.new(:doc_date => doc_modify['docdate'], :product => doc_modify['product'], :publisher => doc_modify['publisher'], :seq => doc_modify['seq'], :xml => m) end self end |