class REXML::Source

A Source can be searched for patterns, and wraps buffers and other objects and provides consumption of text

Attributes

buffer[R]

The current buffer (what we're going to read next)

encoding[R]
line[R]

The line number of the last consumed text

Public Class Methods

new(arg, encoding=nil) click to toggle source

Constructor @param arg must be a String, and should be a valid XML document @param encoding if non-null, sets the encoding of the source to this value, overriding all encoding detection

# File lib/rexml/source.rb, line 43
def initialize(arg, encoding=nil)
  @orig = @buffer = arg
  if encoding
    self.encoding = encoding
  else
    detect_encoding
  end
  @line = 0
end

Public Instance Methods

consume( pattern ) click to toggle source
# File lib/rexml/source.rb, line 87
def consume( pattern )
  @buffer = $' if pattern.match( @buffer )
end
current_line() click to toggle source

@return the current line in the source

# File lib/rexml/source.rb, line 117
def current_line
  lines = @orig.split
  res = lines.grep @buffer[0..30]
  res = res[-1] if res.kind_of? Array
  lines.index( res ) if res
end
detect_encoding() click to toggle source
# File lib/rexml/source.rb, line 125
def detect_encoding
  buffer_encoding = @buffer.encoding
  detected_encoding = "UTF-8"
  begin
    @buffer.force_encoding("ASCII-8BIT")
    if @buffer[0, 2] == "\xfe\xff"
      @buffer[0, 2] = ""
      detected_encoding = "UTF-16BE"
    elsif @buffer[0, 2] == "\xff\xfe"
      @buffer[0, 2] = ""
      detected_encoding = "UTF-16LE"
    elsif @buffer[0, 3] == "\xef\xbb\xbf"
      @buffer[0, 3] = ""
      detected_encoding = "UTF-8"
    end
  ensure
    @buffer.force_encoding(buffer_encoding)
  end
  self.encoding = detected_encoding
end
empty?() click to toggle source

@return true if the Source is exhausted

# File lib/rexml/source.rb, line 108
def empty?
  @buffer == ""
end
encoding=(enc) click to toggle source

Inherited from Encoding Overridden to support optimized en/decoding

Calls superclass method REXML::Encoding#encoding=
# File lib/rexml/source.rb, line 56
  def encoding=(enc)
    return unless super
    encoding_updated
  end

  # Scans the source for a given pattern.  Note, that this is not your
  # usual scan() method.  For one thing, the pattern argument has some
  # requirements; for another, the source can be consumed.  You can easily
  # confuse this method.  Originally, the patterns were easier
  # to construct and this method more robust, because this method
  # generated search regexps on the fly; however, this was
  # computationally expensive and slowed down the entire REXML package
  # considerably, since this is by far the most commonly called method.
  # @param pattern must be a Regexp, and must be in the form of
  # /^\s*(#{your pattern, with no groups})(.*)/.  The first group
  # will be returned; the second group is used if the consume flag is
  # set.
  # @param consume if true, the pattern returned will be consumed, leaving
  # everything after it in the Source.
  # @return the pattern, if found, or nil if the Source is empty or the
  # pattern is not found.
  def scan(pattern, cons=false)
    return nil if @buffer.nil?
    rv = @buffer.scan(pattern)
    @buffer = $' if cons and rv.size>0
    rv
  end

  def read
  end

  def consume( pattern )
    @buffer = $' if pattern.match( @buffer )
  end

  def match_to( char, pattern )
    return pattern.match(@buffer)
  end

  def match_to_consume( char, pattern )
    md = pattern.match(@buffer)
    @buffer = $'
    return md
  end

  def match(pattern, cons=false)
    md = pattern.match(@buffer)
    @buffer = $' if cons and md
    return md
  end

  # @return true if the Source is exhausted
  def empty?
    @buffer == ""
  end

  def position
    @orig.index( @buffer )
  end

  # @return the current line in the source
  def current_line
    lines = @orig.split
    res = lines.grep @buffer[0..30]
    res = res[-1] if res.kind_of? Array
    lines.index( res ) if res
  end

  private
  def detect_encoding
    buffer_encoding = @buffer.encoding
    detected_encoding = "UTF-8"
    begin
      @buffer.force_encoding("ASCII-8BIT")
      if @buffer[0, 2] == "\xfe\xff"
        @buffer[0, 2] = ""
        detected_encoding = "UTF-16BE"
      elsif @buffer[0, 2] == "\xff\xfe"
        @buffer[0, 2] = ""
        detected_encoding = "UTF-16LE"
      elsif @buffer[0, 3] == "\xef\xbb\xbf"
        @buffer[0, 3] = ""
        detected_encoding = "UTF-8"
      end
    ensure
      @buffer.force_encoding(buffer_encoding)
    end
    self.encoding = detected_encoding
  end

  def encoding_updated
    if @encoding != 'UTF-8'
      @buffer = decode(@buffer)
      @to_utf = true
    else
      @to_utf = false
      @buffer.force_encoding ::Encoding::UTF_8
    end
  end
end
encoding_updated() click to toggle source
# File lib/rexml/source.rb, line 146
def encoding_updated
  if @encoding != 'UTF-8'
    @buffer = decode(@buffer)
    @to_utf = true
  else
    @to_utf = false
    @buffer.force_encoding ::Encoding::UTF_8
  end
end
match(pattern, cons=false) click to toggle source
# File lib/rexml/source.rb, line 101
def match(pattern, cons=false)
  md = pattern.match(@buffer)
  @buffer = $' if cons and md
  return md
end
match_to( char, pattern ) click to toggle source
# File lib/rexml/source.rb, line 91
def match_to( char, pattern )
  return pattern.match(@buffer)
end
match_to_consume( char, pattern ) click to toggle source
# File lib/rexml/source.rb, line 95
def match_to_consume( char, pattern )
  md = pattern.match(@buffer)
  @buffer = $'
  return md
end
position() click to toggle source
# File lib/rexml/source.rb, line 112
def position
  @orig.index( @buffer )
end
read() click to toggle source
# File lib/rexml/source.rb, line 84
def read
end
scan(pattern, cons=false) click to toggle source

Scans the source for a given pattern. Note, that this is not your usual scan() method. For one thing, the pattern argument has some requirements; for another, the source can be consumed. You can easily confuse this method. Originally, the patterns were easier to construct and this method more robust, because this method generated search regexps on the fly; however, this was computationally expensive and slowed down the entire REXML package considerably, since this is by far the most commonly called method. @param pattern must be a Regexp, and must be in the form of /^s*(#{your pattern, with no groups})(.*)/. The first group will be returned; the second group is used if the consume flag is set. @param consume if true, the pattern returned will be consumed, leaving everything after it in the Source. @return the pattern, if found, or nil if the Source is empty or the pattern is not found.

# File lib/rexml/source.rb, line 77
def scan(pattern, cons=false)
  return nil if @buffer.nil?
  rv = @buffer.scan(pattern)
  @buffer = $' if cons and rv.size>0
  rv
end