Module: WikidataDiffAnalyzer
- Defined in:
- lib/wikidata-diff-analyzer.rb,
lib/wikidata-diff-analyzer/version.rb
Defined Under Namespace
Classes: Error
Constant Summary collapse
- VERSION =
"2.0.2"
Class Method Summary collapse
-
.analyze(revision_ids) ⇒ Object
This method analyzes a set of revision ids and returns the differences between them.
Class Method Details
.analyze(revision_ids) ⇒ Object
This method analyzes a set of revision ids and returns the differences between them.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/wikidata-diff-analyzer.rb', line 10 def self.analyze(revision_ids) diffs_analyzed_count = 0 diffs_analyzed = [] diffs_not_analyzed = [] diffs = {} total = { claims_added: 0, claims_removed: 0, claims_changed: 0, references_added: 0, references_removed: 0, references_changed: 0, qualifiers_added: 0, qualifiers_removed: 0, qualifiers_changed: 0, aliases_added: 0, aliases_removed: 0, aliases_changed: 0, labels_added: 0, labels_removed: 0, labels_changed: 0, descriptions_added: 0, descriptions_removed: 0, descriptions_changed: 0, sitelinks_added: 0, sitelinks_removed: 0, sitelinks_changed: 0, lemmas_added: 0, lemmas_removed: 0, lemmas_changed: 0, forms_added: 0, forms_removed: 0, forms_changed: 0, representations_added: 0, representations_removed: 0, representations_changed: 0, formclaims_added: 0, formclaims_removed: 0, formclaims_changed: 0, senses_added: 0, senses_removed: 0, senses_changed: 0, glosses_added: 0, glosses_removed: 0, glosses_changed: 0, senseclaims_added: 0, senseclaims_removed: 0, senseclaims_changed: 0, merge_to: 0, merge_from: 0, redirect: 0, undo: 0, restore: 0, clear_item: 0, create_item: 0, create_property: 0, create_lexeme: 0 } # if revision_ids has 0, then 0 can never be analyzed, so remove it and add in not analyzed if revision_ids.include?(0) revision_ids.delete(0) diffs_not_analyzed << 0 end result = LargeBatchesAnalyzer.handle_large_batches(revision_ids, 50) result.each do |revision_id, revision_data| current_content = revision_data[:current_content] if current_content diff = RevisionAnalyzer.analyze_diff(revision_data) diffs[revision_id] = diff Total.accumulate_totals(diff, total) diffs_analyzed << revision_id diffs_analyzed_count += 1 end end # adding the bad rev_ids to the not_analyzed list diffs_not_analyzed += revision_ids - diffs_analyzed { diffs_analyzed_count: diffs_analyzed_count, diffs_not_analyzed: diffs_not_analyzed, diffs: diffs, total: total } end |