Module: RDoc::Encoding

Defined in:
lib/rdoc/encoding.rb

Overview

This class is a wrapper around File IO and Encoding that helps RDoc load files and convert them to the correct encoding.

Constant Summary collapse

HEADER_REGEXP =
/^
  (?:
    \A\#!.*\n
    |
    ^\#\s+frozen[-_]string[-_]literal[=:].+\n
    |
    ^\#[^\n]+\b(?:en)?coding[=:]\s*(?<name>[^\s;]+).*\n
    |
    <\?xml[^?]*encoding=(?<quote>["'])(?<name>.*?)\k<quote>.*\n
  )+
/xi

Class Method Summary collapse

Class Method Details

.change_encoding(text, encoding) ⇒ Object

Changes encoding based on encoding without converting and returns new string


123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rdoc/encoding.rb', line 123

def self.change_encoding text, encoding
  if text.kind_of? RDoc::Comment
    text.encode! encoding
  else
    # TODO: Remove this condition after Ruby 2.2 EOL
    if RUBY_VERSION < '2.3.0'
      text.force_encoding encoding
    else
      String.new text, encoding: encoding
    end
  end
end

.detect_encoding(string) ⇒ Object

Detects the encoding of string based on the magic comment


103
104
105
106
107
108
# File 'lib/rdoc/encoding.rb', line 103

def self.detect_encoding string
  result = HEADER_REGEXP.match string
  name = result && result[:name]

  name ? Encoding.find(name) : nil
end

.read_file(filename, encoding, force_transcode = false) ⇒ Object

Reads the contents of filename and handles any encoding directives in the file.

The content will be converted to the encoding. If the file cannot be converted a warning will be printed and nil will be returned.

If force_transcode is true the document will be transcoded and any unknown character in the target encoding will be replaced with '?'


32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/rdoc/encoding.rb', line 32

def self.read_file filename, encoding, force_transcode = false
  content = File.open filename, "rb" do |f| f.read end
  content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/

  utf8 = content.sub!(/\A\xef\xbb\xbf/, '')

  enc = RDoc::Encoding.detect_encoding content
  content = RDoc::Encoding.change_encoding content, enc if enc

  begin
    encoding ||= Encoding.default_external
    orig_encoding = content.encoding

    if not orig_encoding.ascii_compatible? then
      content = content.encode encoding
    elsif utf8 then
      content = RDoc::Encoding.change_encoding content, Encoding::UTF_8
      content = content.encode encoding
    else
      # assume the content is in our output encoding
      content = RDoc::Encoding.change_encoding content, encoding
    end

    unless content.valid_encoding? then
      # revert and try to transcode
      content = RDoc::Encoding.change_encoding content, orig_encoding
      content = content.encode encoding
    end

    unless content.valid_encoding? then
      warn "unable to convert #{filename} to #{encoding}, skipping"
      content = nil
    end
  rescue Encoding::InvalidByteSequenceError,
         Encoding::UndefinedConversionError => e
    if force_transcode then
      content = RDoc::Encoding.change_encoding content, orig_encoding
      content = content.encode(encoding,
                               :invalid => :replace,
                               :undef => :replace,
                               :replace => '?')
      return content
    else
      warn "unable to convert #{e.message} for #{filename}, skipping"
      return nil
    end
  end

  content
rescue ArgumentError => e
  raise unless e.message =~ /unknown encoding name - (.*)/
  warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
  nil
rescue Errno::EISDIR, Errno::ENOENT
  nil
end

.remove_frozen_string_literal(string) ⇒ Object


89
90
91
92
93
94
95
96
97
98
# File 'lib/rdoc/encoding.rb', line 89

def self.remove_frozen_string_literal string
  string =~ /\A(?:#!.*\n)?(.*\n)/
  first_line = $1

  if first_line =~ /\A# +frozen[-_]string[-_]literal[=:].+$/i
    string = string.sub first_line, ''
  end

  string
end

.remove_magic_comment(string) ⇒ Object

Removes magic comments and shebang


113
114
115
116
117
# File 'lib/rdoc/encoding.rb', line 113

def self.remove_magic_comment string
  string.sub HEADER_REGEXP do |s|
    s.gsub(/[^\n]/, '')
  end
end