Class: Wikipedia::VandalismDetection::Instances

Inherits:
Object
  • Object
show all
Defined in:
lib/wikipedia/vandalism_detection/instances.rb

Constant Summary collapse

REGULAR_CLASS_INDEX =
0
VANDALISM_CLASS_INDEX =
1
NOT_KNOWN_INDEX =
2
CLASS =
'class'
VANDALISM =
'vandalism'
REGULAR =
'regular'
NOT_KNOWN =
'?'
OUTLIER =
Weka::Classifiers::Meta::OneClassClassifier::OUTLIER_LABEL
VANDALISM_SHORT =
'V'
REGULAR_SHORT =
'R'
OLD_REVISION_ID =
'oldrevisionid'
NEW_REVISION_ID =
'newrevisionid'
CLASSES =
{
    REGULAR_CLASS_INDEX => REGULAR,
    VANDALISM_CLASS_INDEX => VANDALISM,
    NOT_KNOWN_INDEX => NOT_KNOWN
}
CLASSES_SHORT =
{
    REGULAR_CLASS_INDEX => REGULAR_SHORT,
    VANDALISM_CLASS_INDEX => VANDALISM_SHORT,
    NOT_KNOWN_INDEX => NOT_KNOWN
}

Class Method Summary collapse

Class Method Details

.emptyObject

Returns an empty instances dataset of type Java::WekaCore::Instances::Base. This dataset is used for feature computation and classification for Wikipedia vandalism detection while training.

Examples:

datset = Wikipedia::VandalismDetection::Instances.empty
=> #<Java::WekaCore::Instances::Base:0xf0f9a00
   @positions=[
     #<Java::WekaCore::Attribute:0x17207a76>,
     #<Java::WekaCore::Attribute:0x5547e4d6>,
     #<Java::WekaCore::Attribute:0x6300c957>,
     ...,
     #<Java::WekaCore::Attribute:0x5a74fae4>]>


52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/wikipedia/vandalism_detection/instances.rb', line 52

def self.empty
  features = Wikipedia::VandalismDetection.configuration.features
  classes = dataset_classes

  dataset = Core::Type::Instances::Base.new do
    features.each do |name|
      numeric :"#{name.gsub(' ', '_')}"
    end

    nominal :class, classes
  end

  dataset.class_index = features.count
  dataset
end

.empty_for_feature(name) ⇒ Object

Returns an empty instances dataset of type Java::WekaCore::Instances::Base. This dataset is used for feature computation and classification for Wikipedia vandalism detection while training.

Examples:

datset = Wikipedia::VandalismDetection::Instances.empty
=> #<Java::WekaCore::Instances::Base:0xf0f9a00
   @positions=[
     #<Java::WekaCore::Attribute:0x17207a76>


77
78
79
80
81
82
83
84
85
86
87
# File 'lib/wikipedia/vandalism_detection/instances.rb', line 77

def self.empty_for_feature(name)
  classes = dataset_classes

  dataset = Core::Type::Instances::Base.new do
    numeric :"#{name.gsub(' ', '_')}"
    nominal :class, classes
  end

  dataset.class_index = 1
  dataset
end

.empty_for_test_classObject

Returns an empty instances dataset of type Java::WekaCore::Instances::Base. This dataset is used for creating the ground truth classification.



110
111
112
113
114
115
116
117
118
# File 'lib/wikipedia/vandalism_detection/instances.rb', line 110

def self.empty_for_test_class
  classes = dataset_classes

  dataset = Core::Type::Instances::Base.new do
    nominal :class, classes
  end

  dataset
end

.empty_for_test_feature(name) ⇒ Object

Returns an empty instances dataset of type Java::WekaCore::Instances::Base. This dataset is used for feature computation and classification for Wikipedia vandalism detection while testing.

Examples:

datset = Wikipedia::VandalismDetection::Instances.empty_for_test
=> #<Java::WekaCore::Instances::Base:0xf0f9a00
   @positions=[
     #<Java::WekaCore::Attribute:0x17207a76>]>


98
99
100
101
102
103
104
105
106
# File 'lib/wikipedia/vandalism_detection/instances.rb', line 98

def self.empty_for_test_feature(name)
  dataset = Core::Type::Instances::Base.new do
    numeric :"#{name.gsub(' ', '_')}"
    numeric OLD_REVISION_ID.to_sym
    numeric NEW_REVISION_ID.to_sym
  end

  dataset
end