Module: MathMetadata
- Defined in:
- lib/math_metadata_lookup/lookup.rb,
lib/math_metadata_lookup/site.rb,
lib/math_metadata_lookup/tools.rb,
lib/math_metadata_lookup/author.rb,
lib/math_metadata_lookup/entity.rb,
lib/math_metadata_lookup/result.rb,
lib/math_metadata_lookup/article.rb,
lib/math_metadata_lookup/sites/mr.rb,
lib/math_metadata_lookup/reference.rb,
lib/math_metadata_lookup/sites/zbl.rb,
lib/math_metadata_lookup/sites/dmlcz.rb,
lib/math_metadata_lookup/sites/bas-bg.rb,
lib/math_metadata_lookup/sites/cedram.rb,
lib/math_metadata_lookup/sites/numdam.rb
Overview
vi: fenc=utf-8:expandtab:ts=2:sw=2:sts=2
Defined Under Namespace
Classes: Article, Author, BasBg, CEDRAM, DMLCZ, Entity, Lookup, MR, NUMDAM, Reference, Result, Site, ZBL
Constant Summary
collapse
- SITES =
[]
- ACCENT_REPL =
{
"`" => "\u0300", "'" => "\u0301", "^" => "\u0302", '"' => "\u0308", "~" => "\u0303", "H" => "\u030b", "c" => "\u0327", "=" => "\u0304", "." => "\u0307", "r" => "\u030a", "u" => "\u0306", "v" => "\u030c" }
Class Method Summary
collapse
Class Method Details
.latex_to_utf8(s) ⇒ Object
102
103
104
105
106
107
108
|
# File 'lib/math_metadata_lookup/tools.rb', line 102
def latex_to_utf8( s )
str = s.gsub( /\\(.)(?:([a-zA-Z])|\{([a-zA-Z])\}|\{\\([a-zA-Z])\})/ ) do |match|
accent = ACCENT_REPL[$1]
char = $2 || $3 || $4
accent ? Unicode.normalize_KC( char + accent ) : match
end
end
|
.levenshtein_distance(s1, s2) ⇒ Object
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/math_metadata_lookup/tools.rb', line 11
def levenshtein_distance( s1, s2 )
return 1.0 if s1 == s2
s1u, s2u = s1.split(//u), s2.split(//u)
tab = Array.new(s1u.size+1){ Array.new(s2u.size+1){0} }
(0..s1u.size).each do |i|
tab[i][0] = i
end
(0..s2u.size).each do |j|
tab[0][j] = j
end
(1..s2u.size).each do |j|
(1..s1u.size).each do |i|
if s2u[j-1] == s1u[i-1]
tab[i][j] = tab[i-1][j-1]
else
tab[i][j] = [
tab[i-1][j] + 1,
tab[i][j-1] + 1,
tab[i-1][j-1] + 1
].sort.first
end
end
end
1 - (tab.last.last.to_f / ([s1u.size, s2u.size].sort.last))
end
|
.normalize_mscs(mscs) ⇒ Object
46
47
48
|
# File 'lib/math_metadata_lookup/tools.rb', line 46
def normalize_mscs( mscs )
mscs.map{|m| m.split(/,|;/) }.flatten.map{|m| m.gsub(/<.*?>/,'')}.map{|m| m =~ /\s*\(?([^\s\)\(]+)\)?\s*/; $1}
end
|
.normalize_name(name) ⇒ Object
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
# File 'lib/math_metadata_lookup/tools.rb', line 51
def normalize_name( name )
trans = latex_to_utf8(name.to_s)
trans = trans.to_ascii
trans.sub! %r{\bjr\.(\b|$)}i, ' '
trans.sub! %r{(\W|^)\w\.}i, ' '
trans.gsub( /([^\s,])?\.([^\s,])/, '\1. \2' )
trans
end
|
.normalize_range(range) ⇒ Object
41
42
43
|
# File 'lib/math_metadata_lookup/tools.rb', line 41
def normalize_range( range )
range.to_s.gsub(/–|--/,'-')
end
|
.normalize_text(s) ⇒ Object
77
78
79
80
81
82
83
84
85
|
# File 'lib/math_metadata_lookup/tools.rb', line 77
def normalize_text( s )
str = latex_to_utf8(s.to_s)
str = str.to_ascii.downcase
str = remove_punctuation(str)
str.gsub!(%r{\W+}, ' ')
str.gsub(%r{(?: the| a| of|^a|^the|^of)\s+}i, ' ')
str.gsub!(%r{\s+}, ' ')
str.strip
end
|
.remove_punctuation(s) ⇒ Object
70
71
72
73
74
|
# File 'lib/math_metadata_lookup/tools.rb', line 70
def remove_punctuation( s )
str = s.gsub %r{(\w)[.,]+( |$)}i, '\1 '
str.gsub! %r{(\s)[.,]+( |$)}i, '\1 '
str.strip
end
|