Extended maintenance of Ruby versions 1.8.7 and 1.9.2 will end on July 31, 2014. Read more

In Files

  • rexml/encoding.rb
  • rexml/encodings/CP-1252.rb
  • rexml/encodings/EUC-JP.rb
  • rexml/encodings/ICONV.rb
  • rexml/encodings/ISO-8859-1.rb
  • rexml/encodings/ISO-8859-15.rb
  • rexml/encodings/SHIFT-JIS.rb
  • rexml/encodings/UNILE.rb
  • rexml/encodings/US-ASCII.rb
  • rexml/encodings/UTF-16.rb
  • rexml/encodings/UTF-8.rb

REXML::Encoding

Constants

EUCTOU8
SJISTOU8
U8TOEUC
U8TOSJIS
UNILE
UTF_16
UTF_8

Native, default format is UTF-8, so it is declared here rather than in an encodings/ definition.

Attributes

encoding[R]

ID —> Encoding name

Public Class Methods

apply(obj, enc) click to toggle source
 
               # File rexml/encoding.rb, line 7
def self.apply(obj, enc)
  @encoding_methods[enc][obj]
end
            
encoding_method(enc) click to toggle source
 
               # File rexml/encoding.rb, line 10
def self.encoding_method(enc)
  @encoding_methods[enc]
end
            
register(enc, &block) click to toggle source
 
               # File rexml/encoding.rb, line 4
def self.register(enc, &block)
  @encoding_methods[enc] = block
end
            

Public Instance Methods

check_encoding(str) click to toggle source
 
               # File rexml/encoding.rb, line 56
def check_encoding str
  # We have to recognize UTF-16, LSB UTF-16, and UTF-8
  if str[0] == 0xfe && str[1] == 0xff
    str[0,2] = ""
    return UTF_16
  elsif str[0] == 0xff && str[1] == 0xfe
    str[0,2] = ""
    return UNILE
  end
  str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/um
  return $3.upcase if $3
  return UTF_8
end
            
decode(str) click to toggle source
Alias for: from_iso_8859_15
decode_ascii(str) click to toggle source

Convert to UTF-8

 
               # File rexml/encodings/US-ASCII.rb, line 19
def decode_ascii(str)
  str.unpack('C*').pack('U*')
end
            
decode_cp1252(str) click to toggle source

Convert to UTF-8

 
               # File rexml/encodings/CP-1252.rb, line 63
def decode_cp1252(str)
  array_latin9 = str.unpack('C*')
  array_enc = []
  array_latin9.each do |num|
    case num
      # characters that added compared to iso-8859-1
    when 0x80; array_enc << 0x20AC # 0xe2 0x82 0xac
    when 0x82; array_enc << 0x201A # 0xe2 0x82 0x9a
    when 0x83; array_enc << 0x0192 # 0xc6 0x92
    when 0x84; array_enc << 0x201E # 0xe2 0x82 0x9e
    when 0x85; array_enc << 0x2026 # 0xe2 0x80 0xa6
    when 0x86; array_enc << 0x2020 # 0xe2 0x80 0xa0
    when 0x87; array_enc << 0x2021 # 0xe2 0x80 0xa1
    when 0x88; array_enc << 0x02C6 # 0xcb 0x86
    when 0x89; array_enc << 0x2030 # 0xe2 0x80 0xb0
    when 0x8A; array_enc << 0x0160 # 0xc5 0xa0
    when 0x8B; array_enc << 0x2039 # 0xe2 0x80 0xb9
    when 0x8C; array_enc << 0x0152 # 0xc5 0x92
    when 0x8E; array_enc << 0x017D # 0xc5 0xbd
    when 0x91; array_enc << 0x2018 # 0xe2 0x80 0x98
    when 0x92; array_enc << 0x2019 # 0xe2 0x80 0x99
    when 0x93; array_enc << 0x201C # 0xe2 0x80 0x9c
    when 0x94; array_enc << 0x201D # 0xe2 0x80 0x9d
    when 0x95; array_enc << 0x2022 # 0xe2 0x80 0xa2
    when 0x96; array_enc << 0x2013 # 0xe2 0x80 0x93
    when 0x97; array_enc << 0x2014 # 0xe2 0x80 0x94
    when 0x98; array_enc << 0x02DC # 0xcb 0x9c
    when 0x99; array_enc << 0x2122 # 0xe2 0x84 0xa2
    when 0x9A; array_enc << 0x0161 # 0xc5 0xa1
    when 0x9B; array_enc << 0x203A # 0xe2 0x80 0xba
    when 0x9C; array_enc << 0x0152 # 0xc5 0x93
    when 0x9E; array_enc << 0x017E # 0xc5 0xbe
    when 0x9F; array_enc << 0x0178 # 0xc5 0xb8
    else
      array_enc << num
    end
  end
  array_enc.pack('U*')
end
            
decode_eucjp(str) click to toggle source
 
               # File rexml/encodings/EUC-JP.rb, line 6
def decode_eucjp(str)
  Uconv::euctou8(str)
end
            
decode_iconv(str) click to toggle source
 
               # File rexml/encodings/ICONV.rb, line 6
def decode_iconv(str)
  Iconv.conv(UTF_8, @encoding, str)
end
            
decode_sjis(content) click to toggle source
 
               # File rexml/encodings/SHIFT-JIS.rb, line 6
def decode_sjis content
  Uconv::sjistou8(content)
end
            
decode_unile(str) click to toggle source
 
               # File rexml/encodings/UNILE.rb, line 18
def decode_unile(str)
  array_enc=str.unpack('C*')
  array_utf8 = []
  0.step(array_enc.size-1, 2){|i| 
    array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100)
  }
  array_utf8.pack('U*')
end
            
decode_utf16(str) click to toggle source
 
               # File rexml/encodings/UTF-16.rb, line 18
def decode_utf16(str)
  str = str[2..-1] if /^\376\377/n =~ str
  array_enc=str.unpack('C*')
  array_utf8 = []
  0.step(array_enc.size-1, 2){|i| 
    array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100)
  }
  array_utf8.pack('U*')
end
            
decode_utf8(str) click to toggle source
 
               # File rexml/encodings/UTF-8.rb, line 7
def decode_utf8(str)
  str
end
            
encode(content) click to toggle source
Alias for: to_iso_8859_15
encode_ascii(content) click to toggle source

Convert from UTF-8

 
               # File rexml/encodings/US-ASCII.rb, line 4
def encode_ascii content
  array_utf8 = content.unpack('U*')
  array_enc = []
  array_utf8.each do |num|
    if num <= 0x7F
      array_enc << num
    else
      # Numeric entity (&#nnnn;); shard by  Stefan Scholl
      array_enc.concat "&\##{num};".unpack('C*')
    end
  end
  array_enc.pack('C*')
end
            
encode_cp1252(content) click to toggle source

Convert from UTF-8

 
               # File rexml/encodings/CP-1252.rb, line 14
def encode_cp1252(content)
  array_utf8 = content.unpack('U*')
  array_enc = []
  array_utf8.each do |num|
    case num
      # shortcut first bunch basic characters
    when 0..0xFF; array_enc << num
      # characters added compared to iso-8859-1
    when 0x20AC; array_enc << 0x80 # 0xe2 0x82 0xac
    when 0x201A; array_enc << 0x82 # 0xe2 0x82 0x9a
    when 0x0192; array_enc << 0x83 # 0xc6 0x92
    when 0x201E; array_enc << 0x84 # 0xe2 0x82 0x9e
    when 0x2026; array_enc << 0x85 # 0xe2 0x80 0xa6
    when 0x2020; array_enc << 0x86 # 0xe2 0x80 0xa0
    when 0x2021; array_enc << 0x87 # 0xe2 0x80 0xa1
    when 0x02C6; array_enc << 0x88 # 0xcb 0x86
    when 0x2030; array_enc << 0x89 # 0xe2 0x80 0xb0
    when 0x0160; array_enc << 0x8A # 0xc5 0xa0
    when 0x2039; array_enc << 0x8B # 0xe2 0x80 0xb9
    when 0x0152; array_enc << 0x8C # 0xc5 0x92
    when 0x017D; array_enc << 0x8E # 0xc5 0xbd
    when 0x2018; array_enc << 0x91 # 0xe2 0x80 0x98
    when 0x2019; array_enc << 0x92 # 0xe2 0x80 0x99
    when 0x201C; array_enc << 0x93 # 0xe2 0x80 0x9c
    when 0x201D; array_enc << 0x94 # 0xe2 0x80 0x9d
    when 0x2022; array_enc << 0x95 # 0xe2 0x80 0xa2
    when 0x2013; array_enc << 0x96 # 0xe2 0x80 0x93
    when 0x2014; array_enc << 0x97 # 0xe2 0x80 0x94
    when 0x02DC; array_enc << 0x98 # 0xcb 0x9c
    when 0x2122; array_enc << 0x99 # 0xe2 0x84 0xa2
    when 0x0161; array_enc << 0x9A # 0xc5 0xa1
    when 0x203A; array_enc << 0x9B # 0xe2 0x80 0xba
    when 0x0152; array_enc << 0x9C # 0xc5 0x93
    when 0x017E; array_enc << 0x9E # 0xc5 0xbe
    when 0x0178; array_enc << 0x9F # 0xc5 0xb8
    else
      # all remaining basic characters can be used directly
      if num <= 0xFF
        array_enc << num
      else
        # Numeric entity (&#nnnn;); shard by  Stefan Scholl
        array_enc.concat "&\##{num};".unpack('C*')
      end
    end
  end
  array_enc.pack('C*')
end
            
encode_eucjp(content) click to toggle source
 
               # File rexml/encodings/EUC-JP.rb, line 10
def encode_eucjp content
  Uconv::u8toeuc(content)
end
            
encode_iconv(content) click to toggle source
 
               # File rexml/encodings/ICONV.rb, line 10
def encode_iconv(content)
  Iconv.conv(@encoding, UTF_8, content)
end
            
encode_sjis(str) click to toggle source
 
               # File rexml/encodings/SHIFT-JIS.rb, line 10
def encode_sjis(str)
  Uconv::u8tosjis(str)
end
            
encode_unile(content) click to toggle source
 
               # File rexml/encodings/UNILE.rb, line 3
def encode_unile content
  array_utf8 = content.unpack("U*")
  array_enc = []
  array_utf8.each do |num|
    if ((num>>16) > 0)
      array_enc << ??
      array_enc << 0
    else
      array_enc << (num & 0xFF)
      array_enc << (num >> 8)
    end
  end
  array_enc.pack('C*')
end
            
encode_utf16(content) click to toggle source
 
               # File rexml/encodings/UTF-16.rb, line 3
def encode_utf16 content
  array_utf8 = content.unpack("U*")
  array_enc = []
  array_utf8.each do |num|
    if ((num>>16) > 0)
      array_enc << 0
      array_enc << ??
    else
      array_enc << (num >> 8)
      array_enc << (num & 0xFF)
    end
  end
  array_enc.pack('C*')
end
            
encode_utf8(content) click to toggle source
 
               # File rexml/encodings/UTF-8.rb, line 3
def encode_utf8 content
  content
end
            
encoding=( enc ) click to toggle source
 
               # File rexml/encoding.rb, line 22
def encoding=( enc )
  old_verbosity = $VERBOSE
  begin
    $VERBOSE = false
    enc = enc.nil? ? nil : enc.upcase
    return false if defined? @encoding and enc == @encoding
    if enc and enc != UTF_8
      @encoding = enc
      raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
      @encoding.untaint 
      begin
        require 'rexml/encodings/ICONV.rb'
        Encoding.apply(self, "ICONV")
      rescue LoadError, Exception
        begin
          enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
          require enc_file
          Encoding.apply(self, @encoding)
        rescue LoadError => err
          puts err.message
          raise ArgumentError, "No decoder found for encoding #@encoding.  Please install iconv."
        end
      end
    else
      @encoding = UTF_8
      require 'rexml/encodings/UTF-8.rb'
      Encoding.apply(self, @encoding)
    end
  ensure
    $VERBOSE = old_verbosity
  end
  true
end
            
from_iso_8859_15(str) click to toggle source

Convert to UTF-8

 
               # File rexml/encodings/ISO-8859-15.rb, line 51
def from_iso_8859_15(str)
  array_latin9 = str.unpack('C*')
  array_enc = []
  array_latin9.each do |num|
    case num
      # characters that differ compared to iso-8859-1
    when 0xA4; array_enc << 0x20AC
    when 0xA6; array_enc << 0x0160
    when 0xA8; array_enc << 0x0161
    when 0xB4; array_enc << 0x017D
    when 0xB8; array_enc << 0x017E
    when 0xBC; array_enc << 0x0152
    when 0xBD; array_enc << 0x0153
    when 0xBE; array_enc << 0x0178
    else
      array_enc << num
    end
  end
  array_enc.pack('U*')
end
            
Also aliased as: decode
to_iso_8859_15(content) click to toggle source

Convert from UTF-8

 
               # File rexml/encodings/ISO-8859-15.rb, line 12
def to_iso_8859_15(content)
  array_utf8 = content.unpack('U*')
  array_enc = []
  array_utf8.each do |num|
    case num
      # shortcut first bunch basic characters
    when 0..0xA3; array_enc << num
      # characters removed compared to iso-8859-1
    when 0xA4; array_enc << '&#164;'
    when 0xA6; array_enc << '&#166;'
    when 0xA8; array_enc << '&#168;'
    when 0xB4; array_enc << '&#180;'
    when 0xB8; array_enc << '&#184;'
    when 0xBC; array_enc << '&#188;'
    when 0xBD; array_enc << '&#189;'
    when 0xBE; array_enc << '&#190;'
      # characters added compared to iso-8859-1
    when 0x20AC; array_enc << 0xA4 # 0xe2 0x82 0xac
    when 0x0160; array_enc << 0xA6 # 0xc5 0xa0
    when 0x0161; array_enc << 0xA8 # 0xc5 0xa1
    when 0x017D; array_enc << 0xB4 # 0xc5 0xbd
    when 0x017E; array_enc << 0xB8 # 0xc5 0xbe
    when 0x0152; array_enc << 0xBC # 0xc5 0x92
    when 0x0153; array_enc << 0xBD # 0xc5 0x93
    when 0x0178; array_enc << 0xBE # 0xc5 0xb8
    else
      # all remaining basic characters can be used directly
      if num <= 0xFF
        array_enc << num
      else
        # Numeric entity (&#nnnn;); shard by  Stefan Scholl
        array_enc.concat "&\##{num};".unpack('C*')
      end
    end
  end
  array_enc.pack('C*')
end
            
Also aliased as: encode

Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.

blog comments powered by Disqus