# File irb/ruby-lex.rb, line 40
def initialize
lex_init
set_input(STDIN)
@seek = 0
@exp_line_no = @line_no = 1
@base_char_no = 0
@char_no = 0
@rests = []
@readed = []
@here_readed = []
@indent = 0
@indent_stack = []
@lex_state = EXPR_BEG
@space_seen = false
@here_header = false
@continue = false
@line = ""
@skip_space = false
@readed_auto_clean_up = false
@exception_on_syntax_error = true
@prompt = nil
end
# File irb/ruby-lex.rb, line 227
def each_top_level_statement
initialize_input
catch(:TERM_INPUT) do
loop do
begin
@continue = false
prompt
unless l = lex
throw :TERM_INPUT if @line == ''
else
#p l
@line.concat l
if @ltype or @continue or @indent > 0
next
end
end
if @line != "\n"
yield @line, @exp_line_no
end
break unless l
@line = ''
@exp_line_no = @line_no
@indent = 0
@indent_stack = []
prompt
rescue TerminateLineInput
initialize_input
prompt
get_readed
end
end
end
end
# File irb/ruby-lex.rb, line 89
def get_readed
if idx = @readed.reverse.index("\n")
@base_char_no = idx
else
@base_char_no += @readed.size
end
readed = @readed.join("")
@readed = []
readed
end
# File irb/ruby-lex.rb, line 101
def getc
while @rests.empty?
# return nil unless buf_input
@rests.push nil unless buf_input
end
c = @rests.shift
if @here_header
@here_readed.push c
else
@readed.push c
end
@seek += 1
if c == "\n"
@line_no += 1
@char_no = 0
else
@char_no += 1
end
c
end
# File irb/ruby-lex.rb, line 136
def getc_of_rests
if @rests.empty?
nil
else
getc
end
end
# File irb/ruby-lex.rb, line 122
def gets
l = ""
while c = getc
l.concat(c)
break if c == "\n"
end
return nil if l == "" and c.nil?
l
end
# File irb/ruby-lex.rb, line 1085
def identify_comment
@ltype = "#"
while ch = getc
# if ch == "\\" #"
# read_escape
# end
if ch == "\n"
@ltype = nil
ungetc
break
end
end
return Token(TkCOMMENT)
end
# File irb/ruby-lex.rb, line 746
def identify_gvar
@lex_state = EXPR_END
case ch = getc
when /[~_*$?!@\/\;,=:<>".]/ #"
Token(TkGVAR, "$" + ch)
when "-"
Token(TkGVAR, "$-" + getc)
when "&", "`", "'", "+"
Token(TkBACK_REF, "$"+ch)
when /[1-9]/
while getc =~ /[0-9]/; end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
identify_identifier
else
ungetc
Token("$")
end
end
# File irb/ruby-lex.rb, line 879
def identify_here_document
ch = getc
# if lt = PERCENT_LTYPE[ch]
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = []
while ch = getc
reserve.push ch
if ch == "\\"
reserve.push ch = getc
elsif ch == "\n"
break
end
end
@here_header = false
while l = gets
l = l.sub(/(:?\r)?\n\z/, '')
if (indent ? l.strip : l) == quoted
break
end
end
@here_header = true
@here_readed.concat reserve
while ch = reserve.pop
ungetc ch
end
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt])
end
# File irb/ruby-lex.rb, line 770
def identify_identifier
token = ""
if peek(0) =~ /[$@]/
token.concat(c = getc)
if c == "@" and peek(0) == "@"
token.concat getc
end
end
while (ch = getc) =~ /\w|_/
print ":", ch, ":" if RubyLex.debug?
token.concat ch
end
ungetc
if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
token.concat getc
end
# almost fix token
case token
when /^\$/
return Token(TkGVAR, token)
when /^\@\@/
@lex_state = EXPR_END
# p Token(TkCVAR, token)
return Token(TkCVAR, token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if (@lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1])
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
# check for ``class = val'' etc.
valid = true
case token
when "class"
valid = false unless peek_match?(/^\s*(<<|\w|::)/)
when "def"
valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
when "do"
valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
when *ENINDENT_CLAUSE
valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
else
# no nothing
end
if valid
if token == "do"
if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
@indent += 1
@indent_stack.push token_c
end
else
@indent += 1
@indent_stack.push token_c
end
# p @indent_stack
end
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
@indent_stack.pop
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token)
else
return Token(TkIDENTIFIER, token)
end
end
# File irb/ruby-lex.rb, line 949
def identify_number
@lex_state = EXPR_END
if peek(0) == "0" && peek(1) !~ /[.eE]/
getc
case peek(0)
when /[xX]/
ch = getc
match = /[0-9a-fA-F_]/
when /[bB]/
ch = getc
match = /[01_]/
when /[oO]/
ch = getc
match = /[0-7_]/
when /[dD]/
ch = getc
match = /[0-9_]/
when /[0-7]/
match = /[0-7_]/
when /[89]/
RubyLex.fail SyntaxError, "Illegal octal digit"
else
return Token(TkINTEGER)
end
len0 = true
non_digit = false
while ch = getc
if match =~ ch
if ch == "_"
if non_digit
RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
else
non_digit = ch
end
else
non_digit = false
len0 = false
end
else
ungetc
if len0
RubyLex.fail SyntaxError, "numeric literal without digits"
end
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
break
end
end
return Token(TkINTEGER)
end
type = TkINTEGER
allow_point = true
allow_e = true
non_digit = false
while ch = getc
case ch
when /[0-9]/
non_digit = false
when "_"
non_digit = ch
when allow_point && "."
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) !~ /[0-9]/
type = TkINTEGER
ungetc
break
end
allow_point = false
when allow_e && "e", allow_e && "E"
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) =~ /[+-]/
getc
end
allow_e = false
allow_point = false
non_digit = ch
else
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
ungetc
break
end
end
Token(type)
end
# File irb/ruby-lex.rb, line 931
def identify_quotation
ch = getc
if lt = PERCENT_LTYPE[ch]
ch = getc
elsif ch =~ /\W/
lt = "\""
else
RubyLex.fail SyntaxError, "unknown type of %string"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted)
end
# File irb/ruby-lex.rb, line 1046
def identify_string(ltype, quoted = ltype)
@ltype = ltype
@quoted = quoted
subtype = nil
begin
nest = 0
while ch = getc
if @quoted == ch and nest == 0
break
elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
subtype = true
elsif ch == '\' #'
read_escape
end
if PERCENT_PAREN.values.include?(@quoted)
if PERCENT_PAREN[ch] == @quoted
nest += 1
elsif ch == @quoted
nest -= 1
end
end
end
if @ltype == "/"
if peek(0) =~ /i|m|x|o|e|s|u|n/
getc
end
end
if subtype
Token(DLtype2Token[ltype])
else
Token(Ltype2Token[ltype])
end
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
# File irb/ruby-lex.rb, line 211
def initialize_input
@ltype = nil
@quoted = nil
@indent = 0
@indent_stack = []
@lex_state = EXPR_BEG
@space_seen = false
@here_header = false
@continue = false
prompt
@line = ""
@exp_line_no = @line_no
end
# File irb/ruby-lex.rb, line 262
def lex
until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
!@continue or
tk.nil?)
#p tk
#p @lex_state
#p self
end
line = get_readed
# print self.inspect
if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
nil
else
line
end
end
# File irb/ruby-lex.rb, line 339
def lex_init()
@OP = IRB::SLex.new
@OP.def_rules("\00"", "\0004", "\0032") do |op, io|
Token(TkEND_OF_SCRIPT)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\113"") do |op, io|
@space_seen = true
while getc =~ /[ \t\f\r\13]/; end
ungetc
Token(TkSPACE)
end
@OP.def_rule("#") do |op, io|
identify_comment
end
@OP.def_rule("=begin",
proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
|op, io|
@ltype = "="
until getc == "\n"; end
until peek_equal?("=end") && peek(4) =~ /\s/
until getc == "\n"; end
end
gets
@ltype = nil
Token(TkRD_COMMENT)
end
@OP.def_rule("\n") do |op, io|
print "\\n\n" if RubyLex.debug?
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = true
else
@continue = false
@lex_state = EXPR_BEG
until (@indent_stack.empty? ||
[TkLPAREN, TkLBRACK, TkLBRACE,
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
@indent_stack.pop
end
end
@here_header = false
@here_readed = []
Token(TkNL)
end
@OP.def_rules("*", "**",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>") do
|op, io|
case @lex_state
when EXPR_FNAME, EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_BEG
end
Token(op)
end
@OP.def_rules("!", "!=", "!~") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
@OP.def_rules("<<") do
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /\S/ =~ c && (/["'`]/ =~ c || /[\w_]/ =~ c || c == "-")
tk = identify_here_document
end
end
unless tk
tk = Token(op)
case @lex_state
when EXPR_FNAME, EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_BEG
end
end
tk
end
@OP.def_rules("'", '"') do
|op, io|
identify_string(op)
end
@OP.def_rules("`") do
|op, io|
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
Token(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do
|op, io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION)
else
ch = getc
if @lex_state == EXPR_ARG && ch =~ /\s/
ungetc
@lex_state = EXPR_BEG;
Token(TkQUESTION)
else
if (ch == '\')
read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
|op, io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1)
end
@OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token(op)
end
@OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token(op)
end
@OP.def_rules("+", "-") do
|op, io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number
else
@lex_state = EXPR_BEG
end
Token(op)
end
end
@OP.def_rule(".") do
|op, io|
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number
else
# for "obj.if" etc.
@lex_state = EXPR_DOT
Token(TkDOT)
end
end
@OP.def_rules("..", "...") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
lex_int2
end
# File irb/ruby-lex.rb, line 533
def lex_int2
@OP.def_rules("]", "}", ")") do
|op, io|
@lex_state = EXPR_END
@indent -= 1
@indent_stack.pop
Token(op)
end
@OP.def_rule(":") do
|op, io|
if @lex_state == EXPR_END || peek(0) =~ /\s/
@lex_state = EXPR_BEG
Token(TkCOLON)
else
@lex_state = EXPR_FNAME;
Token(TkSYMBEG)
end
end
@OP.def_rule("::") do
|op, io|
# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
Token(TkCOLON3)
else
@lex_state = EXPR_DOT
Token(TkCOLON2)
end
end
@OP.def_rule("/") do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, "/") #/)
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/") #/)
end
end
@OP.def_rules("^") do
|op, io|
@lex_state = EXPR_BEG
Token("^")
end
# @OP.def_rules("^=") do
# @lex_state = EXPR_BEG
# Token(OP_ASGN, :^)
# end
@OP.def_rules(",") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
@OP.def_rules(";") do
|op, io|
@lex_state = EXPR_BEG
until (@indent_stack.empty? ||
[TkLPAREN, TkLBRACK, TkLBRACE,
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
@indent_stack.pop
end
Token(op)
end
@OP.def_rule("~") do
|op, io|
@lex_state = EXPR_BEG
Token("~")
end
@OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_BEG
Token("~")
end
@OP.def_rule("(") do
|op, io|
@indent += 1
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
@lex_state = EXPR_BEG
tk_c = TkfLPAREN
else
@lex_state = EXPR_BEG
tk_c = TkLPAREN
end
@indent_stack.push tk_c
tk = Token(tk_c)
end
@OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token("[]")
end
@OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token("[]=")
end
@OP.def_rule("[") do
|op, io|
@indent += 1
if @lex_state == EXPR_FNAME
tk_c = TkfLBRACK
else
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
tk_c = TkLBRACK
elsif @lex_state == EXPR_ARG && @space_seen
tk_c = TkLBRACK
else
tk_c = TkfLBRACK
end
@lex_state = EXPR_BEG
end
@indent_stack.push tk_c
Token(tk_c)
end
@OP.def_rule("{") do
|op, io|
@indent += 1
if @lex_state != EXPR_END && @lex_state != EXPR_ARG
tk_c = TkLBRACE
else
tk_c = TkfLBRACE
end
@lex_state = EXPR_BEG
@indent_stack.push tk_c
Token(tk_c)
end
@OP.def_rule('\') do
|op, io|
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE)
else
ungetc
Token("\\")
end
end
@OP.def_rule('%') do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation
elsif peek(0) == '='
getc
Token(TkOPASGN, :%)
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_quotation
else
@lex_state = EXPR_BEG
Token("%") #))
end
end
@OP.def_rule('$') do
|op, io|
identify_gvar
end
@OP.def_rule('@') do
|op, io|
if peek(0) =~ /[\w_@]/
ungetc
identify_identifier
else
Token("@")
end
end
# @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
# |op, io|
# @indent += 1
# @lex_state = EXPR_FNAME
# # @lex_state = EXPR_END
# # until @rests[0] == "\n" or @rests[0] == ";"
# # rests.shift
# # end
# end
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number
elsif peek(0) =~ /[\w_]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
t
end
p @OP if RubyLex.debug?
end
# File irb/ruby-lex.rb, line 180
def peek(i = 0)
while @rests.size <= i
return nil unless buf_input
end
@rests[i]
end
# File irb/ruby-lex.rb, line 165
def peek_equal?(str)
chrs = str.split(//)
until @rests.size >= chrs.size
return false unless buf_input
end
@rests[0, chrs.size] == chrs
end
# File irb/ruby-lex.rb, line 173
def peek_match?(regexp)
while @rests.empty?
return false unless buf_input
end
regexp =~ @rests.join("")
end
# File irb/ruby-lex.rb, line 205
def prompt
if @prompt
@prompt.call(@ltype, @indent, @continue, @line_no)
end
end
# File irb/ruby-lex.rb, line 1101
def read_escape
case ch = getc
when "\n", "\r", "\f"
when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
end
when "x"
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
end
when "M"
if (ch = getc) != '-'
ungetc
else
if (ch = getc) == "\\" #"
read_escape
end
end
when "C", "c" #, "^"
if ch == "C" and (ch = getc) != "-"
ungetc
elsif (ch = getc) == "\\" #"
read_escape
end
else
# other characters
end
end
io functions
# File irb/ruby-lex.rb, line 78
def set_input(io, p = nil, &block)
@io = io
if p.respond_to?(:call)
@input = p
elsif block_given?
@input = block
else
@input = Proc.new{@io.gets}
end
end
# File irb/ruby-lex.rb, line 196
def set_prompt(p = nil, &block)
p = block if block_given?
if p.respond_to?(:call)
@prompt = p
else
@prompt = Proc.new{print p}
end
end
# File irb/ruby-lex.rb, line 279
def token
# require "tracer"
# Tracer.on
@prev_seek = @seek
@prev_line_no = @line_no
@prev_char_no = @char_no
begin
begin
tk = @OP.match(self)
@space_seen = tk.kind_of?(TkSPACE)
rescue SyntaxError
raise if @exception_on_syntax_error
tk = TkError.new(@seek, @line_no, @char_no)
end
end while @skip_space and tk.kind_of?(TkSPACE)
if @readed_auto_clean_up
get_readed
end
# Tracer.off
tk
end
# File irb/ruby-lex.rb, line 144
def ungetc(c = nil)
if @here_readed.empty?
c2 = @readed.pop
else
c2 = @here_readed.pop
end
c = c2 unless c
@rests.unshift c #c =
@seek -= 1
if c == "\n"
@line_no -= 1
if idx = @readed.reverse.index("\n")
@char_no = @readed.size - idx
else
@char_no = @base_char_no + @readed.size
end
else
@char_no -= 1
end
end
Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.
If you want to help improve the Ruby documentation, please see Improve the docs, or visit Documenting-ruby.org.