Class: Wikipedia::VandalismDetection::DefaultConfiguration

Inherits:
Hash
  • Object
show all
Defined in:
lib/wikipedia/vandalism_detection/configuration.rb

Overview

This class represents the default config which is merged with the customized config from config YAML file.

Constant Summary collapse

DEFAULTS =
{
    "source"    => Dir.pwd,
    'features'  => [
        "anonymity",
        "anonymity previous",
        "all wordlists frequency",
        "all wordlists impact",
        "article size",
        "bad frequency",
        "bad impact",
        "biased frequency",
        "biased impact",
        "blanking",
        "character sequence",
        "character diversity",
        "comment length",
        "comment biased frequency",
        "comment pronoun frequency",
        "comment vulgarism frequency",
        "compressibility",
        "copyedit",
        "digit ratio",
        "edits per user",
        "emoticons frequency",
        "emoticons impact",
        "inserted size",
        "inserted words",
        "inserted character distribution",
        "inserted external links",
        "inserted internal links",
        "longest word",
        "markup frequency",
        "markup impact",
        "non-alphanumeric ratio",
        "personal life",
        "pronoun frequency",
        "pronoun impact",
        "removed size",
        "removed words",
        "removed all wordlists frequency",
        "removed bad frequency",
        "removed biased frequency",
        "removed character distribution",
        "removed emoticons frequency",
        "removed markup frequency",
        "removed pronoun frequency",
        "removed sex frequency",
        "removed vulgarism frequency",
        "replacement similarity",
        "reverted",
        "revisions character distribution",
        "sex frequency",
        "sex impact",
        "same editor",
        "size increment",
        "size ratio",
        "term frequency",
        "time interval",
        "time of day",
        "upper case ratio",
        "upper case words ratio",
        "upper to lower case ratio",
        "user reputation",
        "vulgarism frequency",
        "vulgarism impact",
        "weekday",
        "words increment"
    ],
    "corpora" => {
      "base_directory" => nil,
      "training" => {
          "base_directory"      => nil,
          "edits_file"          => nil,
          "annotations_file"    => nil,
          "revisions_directory" => nil
      },
      "test" => {
          "base_directory"      => nil,
          "edits_file"          => nil,
          "revisions_directory" => nil,
          "ground_truth_file"   => nil
      }
    },
    "output" => {
        "base_directory" => File.join(Dir.pwd, 'build'),
        "training" => {
            "arff_file" => 'training.arff',
            "index_file" => 'training_index.yml',
        },
        "test" => {
            "arff_file" => 'test.arff',
            "index_file" => 'test_index.yml',
            "classification_file" => 'classification.txt'
        }
    },
    "classifier" => {
        "type"    => nil,
        "options" => nil,
        "cross-validation-fold" => 10,
        "training-data-options" => 'unbalanced',
        "replace-missing-values" => nil
    }
}

Instance Method Summary collapse

Instance Method Details

#config_fileObject

Looks in two places for a custom config file: in <app_root>/config/ and in <app_root>/lib/config



277
278
279
280
281
282
283
284
# File 'lib/wikipedia/vandalism_detection/configuration.rb', line 277

def config_file
  config_file_path = "config/config.yml"
  root_file = File.join(source, config_file_path)
  lib_file = File.join(source, "lib/#{config_file_path}")
  first_parent_file = find_first_parent_path_for(File.expand_path(File.dirname(__FILE__)), config_file_path)

  File.exist?(root_file) ? root_file : (File.exist?(lib_file) ? lib_file : first_parent_file)
end

#load_config_file(file) ⇒ Object



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/wikipedia/vandalism_detection/configuration.rb', line 286

def load_config_file(file)
  if File.exists?(file) && file =~ /config\.yml/
    YAML.load_file(file)
  else
    warn %Q{

      Configuration file not found in
      #{source}/config,
      #{source}/lib/config directory
      or any other parent path.

      To customize the system, create a config.yml file.

    }
  end
end

#sourceObject



271
272
273
# File 'lib/wikipedia/vandalism_detection/configuration.rb', line 271

def source
  DEFAULTS['source']
end