class Prism::Translation::Parser::Lexer

Accepts a list of prism tokens and converts them into the expected format for the parser gem.

Constants

TYPES

The direct translating of types between the two lexers.

Attributes

lexed[R]

An array of prism tokens that we lexed.

offset_cache[R]

A hash that maps offsets in bytes to offsets in characters.

source_buffer[R]

The Parser::Source::Buffer that the tokens were lexed from.

Public Class Methods

new(source_buffer, lexed, offset_cache) click to toggle source

Initialize the lexer with the given source buffer, prism tokens, and offset cache.

# File lib/prism/translation/parser/lexer.rb, line 193
def initialize(source_buffer, lexed, offset_cache)
  @source_buffer = source_buffer
  @lexed = lexed
  @offset_cache = offset_cache
end

Public Instance Methods

to_a() click to toggle source

Convert the prism tokens into the expected format for the parser gem.

# File lib/prism/translation/parser/lexer.rb, line 203
def to_a
  tokens = []
  index = 0

  while index < lexed.length
    token, = lexed[index]
    index += 1
    next if token.type == :IGNORED_NEWLINE || token.type == :EOF

    type = TYPES.fetch(token.type)
    value = token.value
    location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])

    case type
    when :tCHARACTER
      value.delete_prefix!("?")
    when :tCOMMENT
      if token.type == :EMBDOC_BEGIN
        until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
          value += next_token.value
          index += 1
        end

        value += next_token.value
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
        index += 1
      else
        value.chomp!
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
      end
    when :tNL
      value = nil
    when :tFLOAT
      value = Float(value)
    when :tIMAGINARY
      value = parse_complex(value)
    when :tINTEGER
      if value.start_with?("+")
        tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
      end

      value = Integer(value)
    when :tLABEL
      value.chomp!(":")
    when :tLABEL_END
      value.chomp!(":")
    when :tNTH_REF
      value = Integer(value.delete_prefix("$"))
    when :tOP_ASGN
      value.chomp!("=")
    when :tRATIONAL
      value = parse_rational(value)
    when :tSPACE
      value = nil
    when :tSTRING_BEG
      if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSTRING
        value = ""
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
        next_location = token.location.join(next_next_token.location)
        type = :tSTRING
        value = next_token.value
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 2
      elsif value.start_with?("<<")
        quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
        value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
      end
    when :tSTRING_DVAR
      value = nil
    when :tSTRING_END
      if token.type == :REGEXP_END
        value = value[0]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
      end
    when :tSYMBEG
      if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
        next_location = token.location.join(next_token.location)
        type = :tSYMBOL
        value = next_token.value
        value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      end
    when :tFID
      if !tokens.empty? && tokens[-1][0] == :kDEF
        type = :tIDENTIFIER
      end
    end

    tokens << [type, [value, location]]

    if token.type == :REGEXP_END
      tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
    end
  end

  tokens
end

Private Instance Methods

parse_complex(value) click to toggle source

Parse a complex from the string representation.

# File lib/prism/translation/parser/lexer.rb, line 310
def parse_complex(value)
  value.chomp!("i")

  if value.end_with?("r")
    Complex(0, parse_rational(value))
  elsif value.start_with?(/0[BbOoDdXx]/)
    Complex(0, Integer(value))
  else
    Complex(0, value)
  end
end
parse_rational(value) click to toggle source

Parse a rational from the string representation.

# File lib/prism/translation/parser/lexer.rb, line 323
def parse_rational(value)
  value.chomp!("r")

  if value.start_with?(/0[BbOoDdXx]/)
    Rational(Integer(value))
  else
    Rational(value)
  end
end