Lexical analyzer for Ruby source
# File rdoc/parsers/parse_rb.rb, line 443
def RubyLex.debug?
false
end
# File rdoc/parsers/parse_rb.rb, line 447
def initialize(content)
lex_init
@reader = BufferedReader.new(content)
@exp_line_no = @line_no = 1
@base_char_no = 0
@indent = 0
@ltype = nil
@quoted = nil
@lex_state = EXPR_BEG
@space_seen = false
@continue = false
@line = ""
@skip_space = false
@read_auto_clean_up = false
@exception_on_syntax_error = true
end
# File rdoc/parsers/parse_rb.rb, line 480
def char_no
@reader.column
end
# File rdoc/parsers/parse_rb.rb, line 484
def get_read
@reader.get_read
end
# File rdoc/parsers/parse_rb.rb, line 492
def getc_of_rests
@reader.getc_already_read
end
# File rdoc/parsers/parse_rb.rb, line 496
def gets
c = getc or return
l = ""
begin
l.concat c unless c == "\r"
break if c == "\n"
end while c = getc
l
end
# File rdoc/parsers/parse_rb.rb, line 1269
def identify_comment
@ltype = "#"
comment = "#"
while ch = getc
if ch == "\\"
ch = getc
if ch == "\n"
ch = " "
else
comment << "\\"
end
else
if ch == "\n"
@ltype = nil
ungetc
break
end
end
comment << ch
end
return Token(TkCOMMENT).set_text(comment)
end
# File rdoc/parsers/parse_rb.rb, line 964
def identify_gvar
@lex_state = EXPR_END
str = "$"
tk = case ch = getc
when /[~_*$?!@\/\;,=:<>".]/ #"
str << ch
Token(TkGVAR, str)
when "-"
str << "-" << getc
Token(TkGVAR, str)
when "&", "`", "'", "+"
str << ch
Token(TkBACK_REF, str)
when /[1-9]/
str << ch
while (ch = getc) =~ /[0-9]/
str << ch
end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
return identify_identifier
else
ungetc
Token("$")
end
tk.set_text(str)
end
# File rdoc/parsers/parse_rb.rb, line 1074
def identify_here_document
ch = getc
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch # '
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = ""
while ch = getc
reserve << ch
if ch == "\\" #"
ch = getc
reserve << ch
elsif ch == "\n"
break
end
end
str = ""
while (l = gets)
l.chomp!
l.strip! if indent
break if l == quoted
str << l.chomp << "\n"
end
@reader.divert_read_from(reserve)
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt], str).set_text(str.dump)
end
# File rdoc/parsers/parse_rb.rb, line 999
def identify_identifier
token = ""
token.concat getc if peek(0) =~ /[$@]/
token.concat getc if peek(0) == "@"
while (ch = getc) =~ /\w|_/
print ":", ch, ":" if RubyLex.debug?
token.concat ch
end
ungetc
if ch == "!" or ch == "?"
token.concat getc
end
# fix token
# $stderr.puts "identifier - #{token}, state = #@lex_state"
case token
when /^\$/
return Token(TkGVAR, token).set_text(token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token).set_text(token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if (@lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1])
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
@indent += 1
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token).set_text(token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token).set_text(token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token).set_text(token)
else
return Token(TkIDENTIFIER, token).set_text(token)
end
end
# File rdoc/parsers/parse_rb.rb, line 1142
def identify_number(start)
str = start.dup
if start == "+" or start == "-" or start == ""
start = getc
str << start
end
@lex_state = EXPR_END
if start == "0"
if peek(0) == "x"
ch = getc
str << ch
match = /[0-9a-f_]/
else
match = /[0-7_]/
end
while ch = getc
if ch !~ match
ungetc
break
else
str << ch
end
end
return Token(TkINTEGER).set_text(str)
end
type = TkINTEGER
allow_point = TRUE
allow_e = TRUE
while ch = getc
case ch
when /[0-9_]/
str << ch
when allow_point && "."
type = TkFLOAT
if peek(0) !~ /[0-9]/
ungetc
break
end
str << ch
allow_point = false
when allow_e && "e", allow_e && "E"
str << ch
type = TkFLOAT
if peek(0) =~ /[+-]/
str << getc
end
allow_e = false
allow_point = false
else
ungetc
break
end
end
Token(type).set_text(str)
end
# File rdoc/parsers/parse_rb.rb, line 1123
def identify_quotation(initial_char)
ch = getc
if lt = PERCENT_LTYPE[ch]
initial_char += ch
ch = getc
elsif ch =~ /\W/
lt = "\""
else
RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted, ch, initial_char)
end
# File rdoc/parsers/parse_rb.rb, line 1204
def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
@ltype = ltype
@quoted = quoted
subtype = nil
str = ""
str << initial_char if initial_char
str << (opener||quoted)
nest = 0
begin
while ch = getc
str << ch
if @quoted == ch
if nest == 0
break
else
nest -= 1
end
elsif opener == ch
nest += 1
elsif @ltype != "'" && @ltype != "]" and ch == "#"
ch = getc
if ch == "{"
subtype = true
str << ch << skip_inner_expression
else
ungetc(ch)
end
elsif ch == '\' #'
str << read_escape
end
end
if @ltype == "/"
if peek(0) =~ /i|o|n|e|s/
str << getc
end
end
if subtype
Token(DLtype2Token[ltype], str)
else
Token(Ltype2Token[ltype], str)
end.set_text(str)
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
# File rdoc/parsers/parse_rb.rb, line 519
def lex
until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
!@continue or
tk.nil?)
end
line = get_read
if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
nil
else
line
end
end
# File rdoc/parsers/parse_rb.rb, line 589
def lex_init()
@OP = SLex.new
@OP.def_rules("\00"", "\0004", "\0032") do |chars, io|
Token(TkEND_OF_SCRIPT).set_text(chars)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\113"") do |chars, io|
@space_seen = TRUE
while (ch = getc) =~ /[ \t\f\r\13]/
chars << ch
end
ungetc
Token(TkSPACE).set_text(chars)
end
@OP.def_rule("#") do
|op, io|
identify_comment
end
@OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
|op, io|
str = op
@ltype = "="
begin
line = ""
begin
ch = getc
line << ch
end until ch == "\n"
str << line
end until line =~ /^=end/
ungetc
@ltype = nil
if str =~ /\A=begin\s+rdoc/
str.sub!(/\A=begin.*\n/, '')
str.sub!(/^=end.*/, '')
Token(TkCOMMENT).set_text(str)
else
Token(TkRD_COMMENT)#.set_text(str)
end
end
@OP.def_rule("\n") do
print "\\n\n" if RubyLex.debug?
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = TRUE
else
@continue = FALSE
@lex_state = EXPR_BEG
end
Token(TkNL).set_text("\n")
end
@OP.def_rules("*", "**",
"!", "!=", "!~",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("<<") do
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /[-\w_\"\\`]/ =~ c
tk = identify_here_document
end
end
if !tk
@lex_state = EXPR_BEG
tk = Token(op).set_text(op)
end
tk
end
@OP.def_rules("'", '"') do
|op, io|
identify_string(op)
end
@OP.def_rules("`") do
|op, io|
if @lex_state == EXPR_FNAME
Token(op).set_text(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do
|op, io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
ch = getc
if @lex_state == EXPR_ARG && ch !~ /\s/
ungetc
@lex_state = EXPR_BEG;
Token(TkQUESTION).set_text(op)
else
str = op
str << ch
if (ch == '\') #'
str << read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER).set_text(str)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
|op, io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1).set_text(op)
end
@OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUPLUS).set_text(op)
end
@OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUMINUS).set_text(op)
end
@OP.def_rules("+", "-") do
|op, io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
Token(op).set_text(op)
end
end
@OP.def_rule(".") do
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number("")
else
# for obj.if
@lex_state = EXPR_DOT
Token(TkDOT).set_text(".")
end
end
@OP.def_rules("..", "...") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
lex_int2
end
# File rdoc/parsers/parse_rb.rb, line 775
def lex_int2
@OP.def_rules("]", "}", ")") do
|op, io|
@lex_state = EXPR_END
@indent -= 1
Token(op).set_text(op)
end
@OP.def_rule(":") do
if @lex_state == EXPR_END || peek(0) =~ /\s/
@lex_state = EXPR_BEG
tk = Token(TkCOLON)
else
@lex_state = EXPR_FNAME;
tk = Token(TkSYMBEG)
end
tk.set_text(":")
end
@OP.def_rule("::") do
# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
tk = Token(TkCOLON3)
else
@lex_state = EXPR_DOT
tk = Token(TkCOLON2)
end
tk.set_text("::")
end
@OP.def_rule("/") do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, :/).set_text("/=") #")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/").set_text(op)
end
end
@OP.def_rules("^") do
@lex_state = EXPR_BEG
Token("^").set_text("^")
end
# @OP.def_rules("^=") do
# @lex_state = EXPR_BEG
# Token(TkOPASGN, :^)
# end
@OP.def_rules(",", ";") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rule("~") do
@lex_state = EXPR_BEG
Token("~").set_text("~")
end
@OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
@lex_state = EXPR_BEG
Token("~").set_text("~@")
end
@OP.def_rule("(") do
@indent += 1
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
@lex_state = EXPR_BEG
tk = Token(TkfLPAREN)
else
@lex_state = EXPR_BEG
tk = Token(TkLPAREN)
end
tk.set_text("(")
end
@OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
Token("[]").set_text("[]")
end
@OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
Token("[]=").set_text("[]=")
end
@OP.def_rule("[") do
@indent += 1
if @lex_state == EXPR_FNAME
t = Token(TkfLBRACK)
else
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
t = Token(TkLBRACK)
elsif @lex_state == EXPR_ARG && @space_seen
t = Token(TkLBRACK)
else
t = Token(TkfLBRACK)
end
@lex_state = EXPR_BEG
end
t.set_text("[")
end
@OP.def_rule("{") do
@indent += 1
if @lex_state != EXPR_END && @lex_state != EXPR_ARG
t = Token(TkLBRACE)
else
t = Token(TkfLBRACE)
end
@lex_state = EXPR_BEG
t.set_text("{")
end
@OP.def_rule('\') do #'
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE).set_text("\\\n")
else
ungetc
Token("\\").set_text("\\") #"
end
end
@OP.def_rule('%') do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation('%')
elsif peek(0) == '='
getc
Token(TkOPASGN, "%").set_text("%=")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_quotation('%')
else
@lex_state = EXPR_BEG
Token("%").set_text("%")
end
end
@OP.def_rule('$') do #'
identify_gvar
end
@OP.def_rule('@') do
if peek(0) =~ /[@\w_]/
ungetc
identify_identifier
else
Token("@").set_text("@")
end
end
# @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
# |op, io|
# @indent += 1
# @lex_state = EXPR_FNAME
# # @lex_state = EXPR_END
# # until @rests[0] == "\n" or @rests[0] == ";"
# # rests.shift
# # end
# end
@OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
throw :eof
end
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number("")
elsif peek(0) =~ /[\w_]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
t
end
p @OP if RubyLex.debug?
end
io functions
# File rdoc/parsers/parse_rb.rb, line 476
def line_no
@reader.line_num
end
# File rdoc/parsers/parse_rb.rb, line 515
def peek(i = 0)
@reader.peek(i)
end
# File rdoc/parsers/parse_rb.rb, line 511
def peek_equal?(str)
@reader.peek_equal(str)
end
# File rdoc/parsers/parse_rb.rb, line 1292
def read_escape
res = ""
case ch = getc
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
res << ch
end
when "x"
res << ch
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
res << ch
end
when "M"
res << ch
if (ch = getc) != '-'
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
when "C", "c" #, "^"
res << ch
if ch == "C" and (ch = getc) != "-"
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
else
res << ch
end
res
end
# File rdoc/parsers/parse_rb.rb, line 1254
def skip_inner_expression
res = ""
nest = 0
while (ch = getc)
res << ch
if ch == '}'
break if nest.zero?
nest -= 1
elsif ch == '{'
nest += 1
end
end
res
end
# File rdoc/parsers/parse_rb.rb, line 533
def token
set_token_position(line_no, char_no)
begin
begin
tk = @OP.match(self)
@space_seen = tk.kind_of?(TkSPACE)
rescue SyntaxError
abort if @exception_on_syntax_error
tk = TkError.new(line_no, char_no)
end
end while @skip_space and tk.kind_of?(TkSPACE)
if @read_auto_clean_up
get_read
end
# throw :eof unless tk
p tk if $DEBUG
tk
end
Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.
If you want to help improve the Ruby documentation, please see Improve the docs, or visit Documenting-ruby.org.