Class: FiletypeValidation::PlaintextValidator

Inherits:
BaseValidator show all
Defined in:
lib/filetype_validation/plaintext_validator.rb

Overview

validator to classify a file as plaintext or bin

Constant Summary collapse

LINE_FEED =

counting constants

10
CARRIAGE_RETURN =
13
CTRL_CHAR =
0...32
ASCII_CHAR =
33...128
BIN_CHAR_8 =
129...(2**8)
LINE_TH =

heuristic magic nums

1000
CTRL_TH =
0.1
BIN_TH =
0.05
LINE_NORM =
1 / 10_000
BYTES_TO_READ =

file reading

4096

Instance Attribute Summary

Attributes inherited from BaseValidator

#file, #options

Instance Method Summary collapse

Constructor Details

#initialize(file, options = {}) ⇒ PlaintextValidator

Returns a new instance of PlaintextValidator.



22
23
24
25
26
27
28
29
30
# File 'lib/filetype_validation/plaintext_validator.rb', line 22

def initialize(file, options = {})
  super(file, options)

  @line_len = 0
  @max_line_len = 0
  @char_counts = { ascii: 0, ctrl: 0, bin_8: 0 }

  @file_sample = make_sample_file
end

Instance Method Details

#valid?Boolean

Evaluates the given file to determine if its binary

Returns:

  • (Boolean)

    true if it’s plaintext



35
36
37
38
39
40
41
42
43
# File 'lib/filetype_validation/plaintext_validator.rb', line 35

def valid?
  return true if @file_sample.nil?

  calculate_counts

  return true if within_threshold?

  false
end