Module: Document::Corpus
- Defined in:
- lib/rbbt/document/corpus.rb,
lib/rbbt/document/corpus/pubmed.rb
Constant Summary collapse
- PUBMED_NAMESPACE =
"PMID"
Class Attribute Summary collapse
-
.claims ⇒ Object
Returns the value of attribute claims.
Class Method Summary collapse
Instance Method Summary collapse
- #[](*args) ⇒ Object
- #add_document(document) ⇒ Object
- #add_pmid(pmid, type = :title_and_abstract, update = false) ⇒ Object
- #add_pubmed_query(query, max = 3000, type = nil) ⇒ Object
- #docids(*prefix) ⇒ Object
- #documents(*prefix) ⇒ Object
Class Attribute Details
.claims ⇒ Object
Returns the value of attribute claims.
62 63 64 |
# File 'lib/rbbt/document/corpus.rb', line 62 def claims @claims end |
Class Method Details
.claim(namespace, &block) ⇒ Object
63 64 65 66 |
# File 'lib/rbbt/document/corpus.rb', line 63 def claim(namespace, &block) @claims = {} @claims[namespace.to_s] = block end |
.setup(corpus) ⇒ Object
6 7 8 9 10 11 12 |
# File 'lib/rbbt/document/corpus.rb', line 6 def self.setup(corpus) corpus = Persist.open_tokyocabinet(corpus, false, :single, "BDB") if String === corpus corpus.extend Document::Corpus unless Document::Corpus === corpus corpus.extend Persist::TSVAdapter unless Persist::TSVAdapter === corpus corpus.close corpus end |
Instance Method Details
#[](*args) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/rbbt/document/corpus.rb', line 37 def [](*args) docid, *rest = args res = self.read_and_close do super(*args) end res.force_encoding(Encoding.default_external) if res return res if args.length > 1 namespace, id, type = docid.split(":") if res.nil? if Document::Corpus.claims && Document::Corpus.claims.include?(namespace.to_s) res = self.instance_exec(id, type, &Document::Corpus.claims[namespace.to_s]) end end res.force_encoding(Encoding.default_external) if res Document.setup(res, namespace, id, type, self) unless res.nil? res end |
#add_document(document) ⇒ Object
14 15 16 17 18 19 20 21 22 |
# File 'lib/rbbt/document/corpus.rb', line 14 def add_document(document) docid = document.docid self.read_and_close do return self[docid] if self.include?(docid) end self.write_and_close do self[docid] = document end end |
#add_pmid(pmid, type = :title_and_abstract, update = false) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/rbbt/document/corpus/pubmed.rb', line 5 def add_pmid(pmid, type = :title_and_abstract, update = false) type = :title_and_abstract if type.nil? if ! (update || Array === pmid) id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":" documents = self.documents(id) return documents.first if documents.any? end pmids = Array === pmid ? pmid : [pmid] type = nil if String === type and type.empty? res = PubMed.get_article(pmids).collect do |pmid, article| document = if type.to_sym == :abstract Document.setup(article.abstract || "", PUBMED_NAMESPACE, pmid, type.to_sym , self, :corpus => self) elsif type.to_sym == :title Document.setup(article.title || "", PUBMED_NAMESPACE, pmid, type.to_sym, self) elsif type.to_sym == :title_and_abstract title = article.title abstract = article.abstract if title.nil? || title == "" text = article.abstract text = "" if text.nil? else title = title + "." unless title.end_with?(".") text = title + " " + abstract if abstract && ! abstract.empty? end Document.setup(text, PUBMED_NAMESPACE, pmid, type.to_sym, self) else raise "No FullText available for #{ pmid }" if article.full_text.nil? Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self) end Log.debug "Loading pmid #{pmid}" add_document(document) if document document end if Array === pmid corpus = res.first.corpus if res.first Document.setup(res, :corpus => corpus) else res = res.first end res end |
#add_pubmed_query(query, max = 3000, type = nil) ⇒ Object
55 56 57 58 |
# File 'lib/rbbt/document/corpus/pubmed.rb', line 55 def add_pubmed_query(query, max = 3000, type = nil) pmids = PubMed.query(query, max) add_pmid(pmids, type) end |
#docids(*prefix) ⇒ Object
24 25 26 27 28 29 30 31 |
# File 'lib/rbbt/document/corpus.rb', line 24 def docids(*prefix) prefix = prefix * ":" prefix += ":" unless prefix == :all || prefix == "all" || prefix[-1] == ":" docids = self.read_and_close do prefix == "all" ? self.keys : self.prefix(prefix) end DocID.setup(docids, :corpus => self) end |
#documents(*prefix) ⇒ Object
33 34 35 |
# File 'lib/rbbt/document/corpus.rb', line 33 def documents(*prefix) self.docids(*prefix).document end |