Class: AnyStyle::Normalizer::Names

Inherits:
AnyStyle::Normalizer show all
Defined in:
lib/anystyle/normalizer/names.rb

Instance Attribute Summary collapse

Attributes inherited from AnyStyle::Normalizer

#keys, #skip

Instance Method Summary collapse

Methods inherited from AnyStyle::Normalizer

#append, #detect_language, #detect_scripts, #doi_extract, #each_value, #keys_for, #map_values, #name, #skip?

Constructor Details

#initialize(**opts) ⇒ Names

Returns a new instance of Names.



12
13
14
15
16
17
18
19
20
21
# File 'lib/anystyle/normalizer/names.rb', line 12

def initialize(**opts)
  super(**opts)

  @namae = Namae::Parser.new({
    prefer_comma_as_separator: true,
    separator: /\A(and|AND|&|;|und|UND|y|e)\s+/,
    appellation: /\A(?!x)x/,
    title: /\A(?!x)x/
  })
end

Instance Attribute Details

#namaeObject

Returns the value of attribute namae.



10
11
12
# File 'lib/anystyle/normalizer/names.rb', line 10

def namae
  @namae
end

Instance Method Details

#normalize(item, prev: [], **opts) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/anystyle/normalizer/names.rb', line 23

def normalize(item, prev: [], **opts)
  map_values(item) do |key, value|
    value.gsub!(/(^[\(\[]|[,;:\)\]]+$)/, '')
    case
    when repeater?(value) && prev.length > 0
      prev[-1].dig(key, 0) || prev[-1].dig(:author, 0) || prev[-1].dig(:editor, 0)
    else
      begin
        parse(strip(value))
      rescue
        [{ literal: value.strip }]
      end
    end
  end
end

#parse(value) ⇒ Object

Raises:

  • (ArgumentError)


65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/anystyle/normalizer/names.rb', line 65

def parse(value)
  raise ArgumentError if value.empty?

  others = value.sub!(
    /(,\s+)?((\&\s+)?\bet\s+(al|coll)\b|\bu\.\s*a\b|(\band|\&)\s+others).*$/, ''
  ) || value.sub!(/\.\.\.|…/, '')

  # Add surname/initial punctuation separator for Vancouver-style names
  # E.g. Rang HP, Dale MM, Ritter JM, Moore PK
  if value.match(/^(\p{Lu}[^\s,.]+)\s+([\p{Lu}][\p{Lu}\-]{0,3})(,|[.]?$)/)
    value.gsub!(/\b(\p{Lu}[^\s,.]+)\s+([\p{Lu}][\p{Lu}\-]{0,3})(,|[.]?$)/, '\1, \2\3')
  end

  names = namae.parse!(value).map { |name|
    name.normalize_initials
    name.to_h.reject { |_, v| v.nil? }
  }

  names << { others: true } unless others.nil?
  names
end

#repeater?(value) ⇒ Boolean

Returns:

  • (Boolean)


39
40
41
# File 'lib/anystyle/normalizer/names.rb', line 39

def repeater?(value)
  value =~ /^([\p{Pd}_*][\p{Pd}_* ]+|\p{Co})(,|:|\.|$)/
end

#strip(value) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/anystyle/normalizer/names.rb', line 43

def strip(value)
  value
    .gsub(/^[Ii]n:?\s+/, '')
    .gsub(/\b[EÉeé]d(s?\.|itors?\.?|ited|iteurs?|ité)(\s+(by|par)\s+|\b|$)/, '')
    .gsub(/\b([Hh](rsg|gg?)\.|Herausgeber)\s+/, '')
    .gsub(/\b[Hh]erausgegeben von\s+/, '')
    .gsub(/\b((d|ein)er )?[Üü]ber(s\.|setzt|setzung|tragen|tragung) v(\.|on)\s+/, '')
    .gsub(/\b[Tt]rans(l?\.|lated|lation)(\s+by\b)?\s*/, '')
    .gsub(/\b[Tt]rad(ucteurs?|(uit|\.)(\s+par\b)?)\s*/, '')
    .gsub(/\b([Dd]ir(\.|ected))(\s+by)?\s+/, '')
    .gsub(/\b([Pp]rod(\.|uce[rd]))(\s+by)?\s+/, '')
    .gsub(/\b([Pp]erf(\.|orme[rd]))(\s+by)?\s+/, '')
    .gsub(/\*/, '')
    .gsub(/\([^\)]*\)?/, '')
    .gsub(/\[[^\]]*\)?/, '')
    .gsub(/[;:]/, ',')
    .gsub(/^\p{^L}+|\s+\p{^L}+$/, '')
    .gsub(/[\s,\.]+$/, '')
    .gsub(/,{2,}/, ',')
    .gsub(/\s+\./, '.')
end