Module: Commonmeta::AuthorUtils
- Included in:
- MetadataUtils
- Defined in:
- lib/commonmeta/author_utils.rb
Instance Method Summary collapse
- #author_name_identifiers(id) ⇒ Object
- #authors_as_string(authors) ⇒ Object
- #cleanup_author(author) ⇒ Object
- #get_affiliations(affiliations) ⇒ Object
-
#get_authors(authors) ⇒ Object
parse array of author strings into commonmeta format.
- #get_one_author(author) ⇒ Object
-
#is_personal_name?(name: nil) ⇒ Boolean
check if given name is in the database of known given names: github.com/bmuller/gender_detector.
-
#name_exists?(name) ⇒ Boolean
recognize given name if we have loaded ::NameDetector data, e.g.
Instance Method Details
#author_name_identifiers(id) ⇒ Object
194 195 196 197 198 199 200 201 202 |
# File 'lib/commonmeta/author_utils.rb', line 194 def (id) return nil unless id.present? Array.wrap(id).map do |i| { 'nameIdentifier' => i, 'nameIdentifierScheme' => 'ORCID', 'schemeUri' => 'https://orcid.org' }.compact end.compact.presence end |
#authors_as_string(authors) ⇒ Object
160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/commonmeta/author_utils.rb', line 160 def () Array.wrap().map do |a| if a['familyName'].present? [a['familyName'], a['givenName']].join(', ') elsif a['type'] == 'Person' a['name'] elsif a['name'].present? "{#{a['name']}}" end end.join(' and ').presence end |
#cleanup_author(author) ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/commonmeta/author_utils.rb', line 99 def () return nil unless .present? # detect pattern "Smith J.", but not "Smith, John K." unless .include?(',') = .gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') end # strip suffixes, e.g. "John Smith, MD" as the named parser doesn't handle them = .split(',').first if %w[MD PhD].include? .split(', ').last # remove email addresses email = validate_email() = .gsub(email, '') if email.present? # strip spaces at the beginning and end of string = .strip # remove parentheses around names = [1..-2] if [0] == '(' && [-1] == ')' # remove spaces around hyphens = .gsub(' - ', '-') # remove non-standard space characters .gsub(/[[:space:]]/, ' ') end |
#get_affiliations(affiliations) ⇒ Object
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/commonmeta/author_utils.rb', line 172 def get_affiliations(affiliations) return nil unless affiliations.present? Array.wrap(affiliations).map do |a| affiliation_identifier = nil if a.is_a?(String) name = a.squish elsif a.is_a?(Hash) if a['affiliationIdentifier'].present? affiliation_identifier = a['affiliationIdentifier'] if a['schemeURI'].present? schemeURI = a['schemeURI'].end_with?('/') ? a['schemeURI'] : "#{a['schemeURI']}/" end affiliation_identifier = !affiliation_identifier.to_s.start_with?('https://') && schemeURI.present? ? normalize_id(schemeURI + affiliation_identifier) : normalize_id(affiliation_identifier) end name = (a['name'] || a['__content__']).to_s.squish.presence end { 'id' => affiliation_identifier, 'name' => name }.compact.presence end.compact.presence end |
#get_authors(authors) ⇒ Object
parse array of author strings into commonmeta format
156 157 158 |
# File 'lib/commonmeta/author_utils.rb', line 156 def () Array.wrap().map { || () }.compact end |
#get_one_author(author) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/commonmeta/author_utils.rb', line 7 def () # basic sanity checks return nil if .blank? # author is a string = { 'name' => } if .is_a?(String) # malformed XML return nil if .fetch('name', nil).is_a?(Array) # parse author name attributes name = parse_attributes(.fetch('name', nil)) || parse_attributes(.fetch('creatorName', nil)) || parse_attributes(.fetch('contributorName', nil)) given_name = parse_attributes(.fetch('givenName', nil)) || parse_attributes(.fetch('given', nil)) family_name = parse_attributes(.fetch('familyName', nil)) || parse_attributes(.fetch('family', nil)) name = (name) # parse author identifier id = parse_attributes(.fetch('id', nil), first: true) || parse_attributes(.fetch('identifier', nil), first: true) || parse_attributes(.fetch('sameAs', nil), first: true) # DataCite metadata if id.nil? && ['nameIdentifiers'].present? id = Array.wrap(.dig('nameIdentifiers')).find do |ni| ni['nameIdentifierScheme'] == 'ORCID' end id = id['nameIdentifier'] if id.present? # Crossref metadata elsif id.nil? && ['ORCID'].present? id = .fetch('ORCID') end id = normalize_orcid(id) || normalize_ror(id) # parse author type, i.e. "Person", "Organization" or not specified type = .fetch('type', nil) type = type.first if type.is_a?(Array) # DataCite metadata type = type[0..-3] if type.is_a?(String) && type.end_with?('al') if type.blank? && name.blank? && id.is_a?(String) && URI.parse(id).host == 'ror.org' type = 'Person' ['affiliation'] = { 'affiliationIdentifier' => id } id = nil elsif type.blank? && id.is_a?(String) && URI.parse(id).host == 'ror.org' type = 'Organization' elsif type.blank? && ['type'] == 'Organization' type = 'Organization' elsif type.blank? && id.is_a?(String) && URI.parse(id).host == 'orcid.org' type = 'Person' elsif type.blank? && (given_name.present? || family_name.present?) type = 'Person' elsif type.blank? && is_personal_name?(name: name) && name.to_s.exclude?(';') type = 'Person' elsif type.blank? && name.present? && !is_personal_name?(name: name) type = 'Organization' end # parse author contributor role contributor_type = parse_attributes(.fetch('contributorType', nil)) # split name for type Person into given/family name if not already provided if type == 'Person' && name.present? && given_name.blank? && family_name.blank? Namae.[:include_particle_in_family] = true names = Namae.parse(name) parsed_name = names.first if parsed_name.present? given_name = parsed_name.given family_name = parsed_name.family else given_name = nil family_name = nil end end # return author in commonmeta format, using name vs. given/family name # depending on type { 'id' => id, 'type' => type, 'name' => type == 'Person' ? nil : name, 'givenName' => type == 'Organization' ? nil : given_name, 'familyName' => type == 'Organization' ? nil : family_name, 'affiliation' => get_affiliations(.fetch('affiliation', nil)), 'contributorType' => contributor_type }.compact end |
#is_personal_name?(name: nil) ⇒ Boolean
check if given name is in the database of known given names: github.com/bmuller/gender_detector
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/commonmeta/author_utils.rb', line 129 def is_personal_name?(name: nil) return true if name_exists?(name.to_s.split.first) || name_exists?(name.to_s.split(', ').last) # check if a name has only one word, e.g. "FamousOrganization", not including commas return false if name.to_s.split(' ').size == 1 && name.to_s.exclude?(',') # check for suffixes, e.g. "John Smith, MD" return true if %w[MD PhD].include? name.split(', ').last # check of name can be parsed into given/family name Namae.[:include_particle_in_family] = true names = Namae.parse(name) parsed_name = names.first return true if parsed_name && parsed_name.given false end |
#name_exists?(name) ⇒ Boolean
recognize given name if we have loaded ::NameDetector data, e.g. in a Rails initializer
149 150 151 152 153 |
# File 'lib/commonmeta/author_utils.rb', line 149 def name_exists?(name) return false unless name_detector.present? name_detector.name_exists?(name) end |