Module: RDoc::Encoding

Defined in:
lib/rdoc/encoding.rb

Overview

This class is a wrapper around File IO and Encoding that helps RDoc load files and convert them to the correct encoding.

Class Method Summary collapse

Class Method Details

.read_file(filename, encoding, force_transcode = false) ⇒ Object

Reads the contents of filename and handles any encoding directives in the file.

The content will be converted to the encoding. If the file cannot be converted a warning will be printed and nil will be returned.

If force_transcode is true the document will be transcoded and any unknown character in the target encoding will be replaced with ‘?’



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/rdoc/encoding.rb', line 20

def self.read_file filename, encoding, force_transcode = false
  content = open filename, "rb" do |f| f.read end
  content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/

  utf8 = content.sub!(/\A\xef\xbb\xbf/, '')

  RDoc::Encoding.set_encoding content

  if Object.const_defined? :Encoding then
    begin
      encoding ||= Encoding.default_external
      orig_encoding = content.encoding

      if not orig_encoding.ascii_compatible? then
        content.encode! encoding
      elsif utf8 then
        content.force_encoding Encoding::UTF_8
        content.encode! encoding
      else
        # assume the content is in our output encoding
        content.force_encoding encoding
      end

      unless content.valid_encoding? then
        # revert and try to transcode
        content.force_encoding orig_encoding
        content.encode! encoding
      end

      unless content.valid_encoding? then
        warn "unable to convert #{filename} to #{encoding}, skipping"
        content = nil
      end
    rescue Encoding::InvalidByteSequenceError,
           Encoding::UndefinedConversionError => e
      if force_transcode then
        content.force_encoding orig_encoding
        content.encode!(encoding,
                        :invalid => :replace, :undef => :replace,
                        :replace => '?')
        return content
      else
        warn "unable to convert #{e.message} for #{filename}, skipping"
        return nil
      end
    end
  end

  content
rescue ArgumentError => e
  raise unless e.message =~ /unknown encoding name - (.*)/
  warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
  nil
rescue Errno::EISDIR, Errno::ENOENT
  nil
end

.set_encoding(string) ⇒ Object

Sets the encoding of string based on the magic comment



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/rdoc/encoding.rb', line 80

def self.set_encoding string
  string =~ /\A(?:#!.*\n)?(.*\n)/

  first_line = $1

  name = case first_line
         when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
         when /\b(?:en)?coding[=:]\s*([^\s;]+)/i   then $1
         else                                           return
         end

  string.sub! first_line, ''

  return unless Object.const_defined? :Encoding

  enc = Encoding.find name
  string.force_encoding enc if enc
end