module RDoc::Encoding

This class is a wrapper around File IO and Encoding that helps RDoc load files and convert them to the correct encoding.

Public Class Methods

read_file(filename, encoding, force_transcode = false) click to toggle source

Reads the contents of filename and handles any encoding directives in the file.

The content will be converted to the encoding. If the file cannot be converted a warning will be printed and nil will be returned.

If force_transcode is true the document will be transcoded and any unknown character in the target encoding will be replaced with '?'

# File lib/rdoc/encoding.rb, line 20
def self.read_file filename, encoding, force_transcode = false
  content = open filename, "rb" do |f| f.read end
  content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/

  utf8 = content.sub!(/\A\xef\xbb\xbf/, '')

  RDoc::Encoding.set_encoding content

  begin
    encoding ||= Encoding.default_external
    orig_encoding = content.encoding

    if not orig_encoding.ascii_compatible? then
      content.encode! encoding
    elsif utf8 then
      content.force_encoding Encoding::UTF_8
      content.encode! encoding
    else
      # assume the content is in our output encoding
      content.force_encoding encoding
    end

    unless content.valid_encoding? then
      # revert and try to transcode
      content.force_encoding orig_encoding
      content.encode! encoding
    end

    unless content.valid_encoding? then
      warn "unable to convert #{filename} to #{encoding}, skipping"
      content = nil
    end
  rescue Encoding::InvalidByteSequenceError,
         Encoding::UndefinedConversionError => e
    if force_transcode then
      content.force_encoding orig_encoding
      content.encode!(encoding,
                      :invalid => :replace, :undef => :replace,
                      :replace => '?')
      return content
    else
      warn "unable to convert #{e.message} for #{filename}, skipping"
      return nil
    end
  end

  content
rescue ArgumentError => e
  raise unless e.message =~ /unknown encoding name - (.*)/
  warn "unknown encoding name \"#{$1}\" for #{filename}, skipping"
  nil
rescue Errno::EISDIR, Errno::ENOENT
  nil
end
remove_frozen_string_literal(string) click to toggle source
# File lib/rdoc/encoding.rb, line 75
def self.remove_frozen_string_literal string
  string =~ /\A(?:#!.*\n)?(.*\n)/
  first_line = $1

  if first_line =~ /\A# +frozen[-_]string[-_]literal[=:].+$/i
    string.sub! first_line, ''
  end
end
set_encoding(string) click to toggle source

Sets the encoding of string based on the magic comment

# File lib/rdoc/encoding.rb, line 87
def self.set_encoding string
  remove_frozen_string_literal string

  string =~ /\A(?:#!.*\n)?(.*\n)/

  first_line = $1

  name = case first_line
         when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
         when /\b(?:en)?coding[=:]\s*([^\s;]+)/i   then $1
         else                                           return
         end

  string.sub! first_line, ''

  remove_frozen_string_literal string

  enc = Encoding.find name
  string.force_encoding enc if enc
end