Module: Cul::Scv::Hydra::Solrizer::ScvModsFieldable
- Extended by:
- ActiveSupport::Concern
- Included in:
- Datastreams::ModsDocument
- Defined in:
- lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb
Defined Under Namespace
Modules: ClassMethods
Constant Summary collapse
- MODS_NS =
{'mods'=>'http://www.loc.gov/mods/v3'}
Class Method Summary collapse
Instance Method Summary collapse
- #collections ⇒ Object
- #dates(node = mods) ⇒ Object
- #formats(node = mods) ⇒ Object
- #main_title(node = mods) ⇒ Object
- #mods ⇒ Object
- #names(role_authority = nil, role = nil) ⇒ Object
- #projects ⇒ Object
- #repositories(node = mods) ⇒ Object
- #shelf_locators(node = mods) ⇒ Object
- #sort_title(node = mods) ⇒ Object
- #titles(node = mods) ⇒ Object
- #to_solr(solr_doc = {}) ⇒ Object
- #zero_pad_year(year) ⇒ Object
Class Method Details
.normalize(t, strip_punctuation = false) ⇒ Object
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 196 def self.normalize(t, strip_punctuation=false) # strip whitespace n_t = t.dup.strip # collapse intermediate whitespace n_t.gsub!(/\s+/, ' ') # pull off paired punctuation, and any leading punctuation if strip_punctuation n_t = n_t.sub(/^\((.*)\)$/, "\\1") n_t = n_t.sub(/^\{(.*)\}$/, "\\1") n_t = n_t.sub(/^\[(.*)\]$/, "\\1") n_t = n_t.sub(/^"(.*)"$/, "\\1") n_t = n_t.sub(/^'(.*)'$/, "\\1") n_t = n_t.sub(/^<(.*)>$/, "\\1") #n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1") n_t = n_t.sub(/^[[:punct:]]+/, '') # this may have 'created' leading/trailing space, so strip n_t.strip! end n_t end |
Instance Method Details
#collections ⇒ Object
35 36 37 38 39 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 35 def collections mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node| ScvModsFieldable.normalize(main_title(p_node), true) end end |
#dates(node = mods) ⇒ Object
95 96 97 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 95 def dates(node=mods) # get all the dateIssued with keyDate = 'yes', but not point = 'end' end |
#formats(node = mods) ⇒ Object
99 100 101 102 103 104 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 99 def formats(node=mods) # get all the form values with authority != 'marcform' node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n| ScvModsFieldable.normalize(n.text) end end |
#main_title(node = mods) ⇒ Object
55 56 57 58 59 60 61 62 63 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 55 def main_title(node=mods) # include only the untyped [!@type] titleInfo t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first if t ScvModsFieldable.normalize(t.text) else nil end end |
#mods ⇒ Object
25 26 27 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 25 def mods ng_xml.xpath('/mods:mods', MODS_NS).first end |
#names(role_authority = nil, role = nil) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 72 def names(=nil, role=nil) # get all the name nodes # keep all child text except the role terms xpath = "./mods:name" unless .nil? xpath << "/mods:role/mods:roleTerm[@authority='#{.to_s}'" unless role.nil? xpath << " and normalize-space(text()) = '#{role.to_s.strip}'" end xpath << "]/ancestor::mods:name" end names = mods.xpath(xpath, MODS_NS).collect do |node| base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ') ScvModsFieldable.normalize(base_text, true) end xpath = "./mods:subject" + xpath[1,xpath.length] mods.xpath(xpath, MODS_NS).each do |node| base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ') names << ScvModsFieldable.normalize(base_text, true) end names end |
#projects ⇒ Object
29 30 31 32 33 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 29 def projects mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node| ScvModsFieldable.normalize(main_title(p_node), true) end end |
#repositories(node = mods) ⇒ Object
106 107 108 109 110 111 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 106 def repositories(node=mods) # get all the location/physicalLocation[@authority = 'marcorg'] node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).collect do |n| ScvModsFieldable.normalize(n.text) end end |
#shelf_locators(node = mods) ⇒ Object
113 114 115 116 117 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 113 def shelf_locators(node=mods) node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n| ScvModsFieldable.normalize(n.text, true) end end |
#sort_title(node = mods) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 41 def sort_title(node=mods) # include only the untyped [!@type] titleInfo, exclude noSort base_text = '' t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first if t t.children.each do |child| base_text << child.text unless child.name == 'nonSort' end end base_text = ScvModsFieldable.normalize(base_text, true) base_text = nil if base_text.empty? base_text end |
#titles(node = mods) ⇒ Object
65 66 67 68 69 70 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 65 def titles(node=mods) # all titles without descending into relatedItems node.xpath('./mods:titleInfo', MODS_NS).collect do |t| ScvModsFieldable.normalize(t.text) end end |
#to_solr(solr_doc = {}) ⇒ Object
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 119 def to_solr(solr_doc={}) solr_doc = (defined? super) ? super : solr_doc solr_doc["title_si"] = sort_title solr_doc["title_ssm"] = titles solr_doc["lib_collection_sim"] = collections solr_doc["lib_project_sim"] = projects solr_doc["lib_name_sim"] = names solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"] solr_doc["lib_author_sim"] = names(:marcrelator, 'aut') solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp') solr_doc["lib_format_sim"] = formats solr_doc["lib_repo_sim"] = repositories solr_doc["lib_shelf_sim"] = shelf_locators # Create convenient start and end date values based on one of the many possible originInfo/dateX elements. possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm'] possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm'] start_date = nil end_date = nil start_year = nil end_year = nil possible_start_date_fields.each{|key| if solr_doc.has_key?(key) start_date = solr_doc[key][0] break end } possible_end_date_fields.each{|key| if solr_doc.has_key?(key) end_date = solr_doc[key][0] break end } if start_date.present? end_date = start_date if end_date.blank? #solr_doc["lib_start_date_ss"] = start_date #solr_doc["lib_end_date_ss"] = end_date year_regex = /^(-?\d{1,4}).*/ start_year_match = start_date.match(year_regex) start_year = start_year_match.captures[0] if start_year_match start_year = zero_pad_year(start_year) #solr_doc["lib_start_date_year_ssi"] = start_year if start_year solr_doc["lib_start_date_year_itsi"] = start_year.to_i if start_year # TrieInt version for searches end_year_match = end_date.match(year_regex) end_year = end_year_match.captures[0] if end_year_match end_year = zero_pad_year(end_year) #solr_doc["lib_end_date_year_ssi"] = end_year if end_year solr_doc["lib_end_date_year_itsi"] = end_year.to_i if end_year # TrieInt version for searches solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year end solr_doc.each do |k, v| if self.class.maps_field? k solr_doc[k] = self.class.map_value(k, v) end end solr_doc end |
#zero_pad_year(year) ⇒ Object
185 186 187 188 189 190 191 192 193 194 |
# File 'lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb', line 185 def zero_pad_year(year) year = year.to_s is_negative = year.start_with?('-') year_without_sign = (is_negative ? year[1, year.length]: year) if year_without_sign.length < 4 year_without_sign = year_without_sign.rjust(4, '0') end return (is_negative ? '-' : '') + year_without_sign end |