Class: Taxamatch::Authmatch
- Inherits:
-
Object
- Object
- Taxamatch::Authmatch
- Defined in:
- lib/taxamatch_rb/authmatch.rb
Class Method Summary collapse
- .authmatch(authors1, authors2, years1, years2) ⇒ Object
- .compare_years(years1, years2) ⇒ Object
- .fuzzy_match_authors(author1, author2) ⇒ Object
- .get_score(authors1, unique_authors1, authors2, unique_authors2, year_diff) ⇒ Object
- .remove_duplicate_authors(authors1, authors2) ⇒ Object
Class Method Details
.authmatch(authors1, authors2, years1, years2) ⇒ Object
7 8 9 10 11 12 13 |
# File 'lib/taxamatch_rb/authmatch.rb', line 7 def self.authmatch(, , years1, years2) , = (, ) year_difference = compare_years(years1, years2) get_score(, , , , year_difference) end |
.compare_years(years1, years2) ⇒ Object
94 95 96 97 98 99 100 |
# File 'lib/taxamatch_rb/authmatch.rb', line 94 def self.compare_years(years1, years2) return 0 if years1 == [] && years2 == [] if years1.size == 1 && years2.size == 1 return (years1[0].to_i - years2[0].to_i).abs end nil end |
.fuzzy_match_authors(author1, author2) ⇒ Object
84 85 86 87 88 89 90 91 92 |
# File 'lib/taxamatch_rb/authmatch.rb', line 84 def self.(, ) au1_length = .size au2_length = .size dlm = DamerauLevenshtein #get around a bug in C code, but it really has to be fixed ed = dlm.distance(, ,1,3) (ed <= 3 && ([au1_length, au2_length].min > ed * 2) && (ed < 2 || [0] == [0])) end |
.get_score(authors1, unique_authors1, authors2, unique_authors2, year_diff) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/taxamatch_rb/authmatch.rb', line 15 def self.get_score(, , , , year_diff) count_before = .size + .size count_after = .size + .size score = 0 if count_after == 0 if year_diff != nil if year_diff == 0 score = 100 elsif year_diff == 1 score = 54 end else score = 94 end elsif .size == 0 || .size == 0 if year_diff != nil if year_diff == 0 score = 91 elsif year_diff == 1 score = 51 end else score = 90 end else score = ((1 - count_after.to_f/count_before.to_f) * 100).round score = 0 unless year_diff == nil || (year_diff && year_diff == 0) end score > 50 ? score : 0 end |
.remove_duplicate_authors(authors1, authors2) ⇒ Object
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/taxamatch_rb/authmatch.rb', line 47 def self.(, ) = .dup = .dup .each do |au1| .each do |au2| au1_match = au2_match = false if au1 == au2 au1_match = au2_match = true elsif au1 == au2[0...au1.size] au1_match = true elsif au1[0...au2.size] == au2 au2_match = true end if (au1.size >= 3 && au1_match) || (au2.size >= 3 && au2_match) || (au1_match && au2_match) .delete au1 .delete au2 elsif au1_match .delete au1 elsif au2_match .delete au2 else #TODO: masking a bug in damerau levenshtsin # mod which appears comparing 1letter to a longer string if au1.size > 1 && au2.size > 1 && self.(au1, au2) .delete au1 .delete au2 end end end end [, ] end |