Class: Uchardet::CLI

Inherits:
Object
  • Object
show all
Defined in:
lib/uchardet/cli.rb

Class Method Summary collapse

Class Method Details

.detectObject



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/uchardet/cli.rb', line 64

def self.detect
  detector = ICU::UCharsetDetector.new
  detector.input_filtered = @options[:input_filtered]
  detector.declared_encoding = @options[:declared_encoding]

  source = IO.read(@options[:path])
  matches = if @options[:detect_all]
    detector.detect_all(source)
  else
    [detector.detect(source)]
  end
  
  matches.each do |match|
    @stdout.puts "#{match[:encoding]} (confidence #{match[:confidence]}%)"
  end
rescue Exception => ex
  STDERR.puts "ERROR: #{ex.to_s}"
end

.execute(stdout, args = []) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/uchardet/cli.rb', line 6

def self.execute(stdout, args=[])
  @stdout = stdout
  @options = {
    input_filtered: false,
    declared_encoding: nil,
    detect_all: false,
    path: nil
  }
  
  OptionParser.new do |opts|
    opts.banner = <<-BANNER.gsub(/^\s*/, '')
      Usage: #{File.basename($0)} [options] file
    BANNER
    
    opts.on("-l", "--list",
            "Display list of detectable character sets."
            ) { self.list; exit }
    opts.on("-s", "--strip",
            "Strip HTML or XML markup before detection."
            ) { @options[:input_filtered] = true }
    opts.on("-e", "--encoding",
            "Hint the charset detector about possible encoding."
            ) { |arg| @options[:declared_encoding] = arg }
    opts.on("-a", "--all",
            "Show all matching encodings."
            ) { @options[:detect_all] = true }
    opts.on("-h", "--help",
            "Show this help message."
            ) { @stdout.puts opts; exit }
    opts.on("-v", "--version",
            "Show version."
            ) { @stdout.puts Uchardet::VERSION; exit }
            
    if args.empty?
      @stdout.puts opts
    else
      begin
        opts.parse!(args)
      rescue OptionParser::ParseError => ex
        STDERR.puts "ERROR: #{ex.to_s}. See #{File.basename($0)} --help"
        exit
      end
    
      @options[:path] = args.last
      if @options[:path].nil? || @options[:path].empty?
        STDERR.puts "ERROR: please specify a file path."
        exit
      end
      
      self.detect
    end
  end
end

.listObject



60
61
62
# File 'lib/uchardet/cli.rb', line 60

def self.list
  ICU::UCharsetDetector.detectable_charsets.uniq.sort.each {|name| @stdout.puts name}
end