Lexical analyzer for Ruby source
# File rdoc/parser/ruby.rb, line 450
def initialize(content, options)
lex_init
@options = options
@reader = BufferedReader.new content, @options
@exp_line_no = @line_no = 1
@base_char_no = 0
@indent = 0
@ltype = nil
@quoted = nil
@lex_state = EXPR_BEG
@space_seen = false
@continue = false
@line = ""
@skip_space = false
@read_auto_clean_up = false
@exception_on_syntax_error = true
end
# File rdoc/parser/ruby.rb, line 484
def char_no
@reader.column
end
# File rdoc/parser/ruby.rb, line 488
def get_read
@reader.get_read
end
# File rdoc/parser/ruby.rb, line 496
def getc_of_rests
@reader.getc_already_read
end
# File rdoc/parser/ruby.rb, line 500
def gets
c = getc or return
l = ""
begin
l.concat c unless c == "\r"
break if c == "\n"
end while c = getc
l
end
# File rdoc/parser/ruby.rb, line 1256
def identify_comment
@ltype = "#"
comment = "#"
while ch = getc
if ch == "\\"
ch = getc
if ch == "\n"
ch = " "
else
comment << "\\"
end
else
if ch == "\n"
@ltype = nil
ungetc
break
end
end
comment << ch
end
return Token(TkCOMMENT).set_text(comment)
end
# File rdoc/parser/ruby.rb, line 951
def identify_gvar
@lex_state = EXPR_END
str = "$"
tk = case ch = getc
when /[~_*$?!@\/\;,=:<>".]/ #"
str << ch
Token(TkGVAR, str)
when "-"
str << "-" << getc
Token(TkGVAR, str)
when "&", "`", "'", "+"
str << ch
Token(TkBACK_REF, str)
when /[1-9]/
str << ch
while (ch = getc) =~ /[0-9]/
str << ch
end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
return identify_identifier
else
ungetc
Token("$")
end
tk.set_text(str)
end
# File rdoc/parser/ruby.rb, line 1061
def identify_here_document
ch = getc
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch # '
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = ""
while ch = getc
reserve << ch
if ch == "\\" #"
ch = getc
reserve << ch
elsif ch == "\n"
break
end
end
str = ""
while (l = gets)
l.chomp!
l.strip! if indent
break if l == quoted
str << l.chomp << "\n"
end
@reader.divert_read_from(reserve)
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt], str).set_text(str.dump)
end
# File rdoc/parser/ruby.rb, line 986
def identify_identifier
token = ""
token.concat getc if peek(0) =~ /[$@]/
token.concat getc if peek(0) == "@"
while (ch = getc) =~ /\w|_/
print ":", ch, ":" if RDoc::RubyLex.debug?
token.concat ch
end
ungetc
if ch == "!" or ch == "?"
token.concat getc
end
# fix token
# $stderr.puts "identifier - #{token}, state = #@lex_state"
case token
when /^\$/
return Token(TkGVAR, token).set_text(token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token).set_text(token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RDoc::RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if (@lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1])
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
@indent += 1
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token).set_text(token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token).set_text(token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token).set_text(token)
else
return Token(TkIDENTIFIER, token).set_text(token)
end
end
# File rdoc/parser/ruby.rb, line 1129
def identify_number(start)
str = start.dup
if start == "+" or start == "-" or start == ""
start = getc
str << start
end
@lex_state = EXPR_END
if start == "0"
if peek(0) == "x"
ch = getc
str << ch
match = /[0-9a-f_]/
else
match = /[0-7_]/
end
while ch = getc
if ch !~ match
ungetc
break
else
str << ch
end
end
return Token(TkINTEGER).set_text(str)
end
type = TkINTEGER
allow_point = TRUE
allow_e = TRUE
while ch = getc
case ch
when /[0-9_]/
str << ch
when allow_point && "."
type = TkFLOAT
if peek(0) !~ /[0-9]/
ungetc
break
end
str << ch
allow_point = false
when allow_e && "e", allow_e && "E"
str << ch
type = TkFLOAT
if peek(0) =~ /[+-]/
str << getc
end
allow_e = false
allow_point = false
else
ungetc
break
end
end
Token(type).set_text(str)
end
# File rdoc/parser/ruby.rb, line 1110
def identify_quotation(initial_char)
ch = getc
if lt = PERCENT_LTYPE[ch]
initial_char += ch
ch = getc
elsif ch =~ /\W/
lt = "\""
else
fail SyntaxError, "unknown type of %string ('#{ch}')"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted, ch, initial_char)
end
# File rdoc/parser/ruby.rb, line 1191
def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
@ltype = ltype
@quoted = quoted
subtype = nil
str = ""
str << initial_char if initial_char
str << (opener||quoted)
nest = 0
begin
while ch = getc
str << ch
if @quoted == ch
if nest == 0
break
else
nest -= 1
end
elsif opener == ch
nest += 1
elsif @ltype != "'" && @ltype != "]" and ch == "#"
ch = getc
if ch == "{"
subtype = true
str << ch << skip_inner_expression
else
ungetc(ch)
end
elsif ch == '\' #'
str << read_escape
end
end
if @ltype == "/"
if peek(0) =~ /i|o|n|e|s/
str << getc
end
end
if subtype
Token(DLtype2Token[ltype], str)
else
Token(Ltype2Token[ltype], str)
end.set_text(str)
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
# File rdoc/parser/ruby.rb, line 523
def lex
until (TkNL === (tk = token) or TkEND_OF_SCRIPT === tk) and
not @continue or tk.nil?
end
line = get_read
if line == "" and TkEND_OF_SCRIPT === tk or tk.nil? then
nil
else
line
end
end
def token
set_token_position(line_no, char_no)
begin
begin
tk = @OP.match(self)
@space_seen = TkSPACE === tk
rescue SyntaxError => e
raise RDoc::Error, "syntax error: #{e.message}" if
@exception_on_syntax_error
tk = TkError.new(line_no, char_no)
end
end while @skip_space and TkSPACE === tk
if @read_auto_clean_up
get_read
end
# throw :eof unless tk
tk
end
ENINDENT_CLAUSE = [
"case", "class", "def", "do", "for", "if",
"module", "unless", "until", "while", "begin" #, "when"
]
DEINDENT_CLAUSE = ["end" #, "when"
]
PERCENT_LTYPE = {
"q" => "\'",
"Q" => "\"",
"x" => "\`",
"r" => "/",
"w" => "]"
}
PERCENT_PAREN = {
"{" => "}",
"[" => "]",
"<" => ">",
"(" => ")"
}
Ltype2Token = {
"\'" => TkSTRING,
"\"" => TkSTRING,
"\`" => TkXSTRING,
"/" => TkREGEXP,
"]" => TkDSTRING
}
Ltype2Token.default = TkSTRING
DLtype2Token = {
"\"" => TkDSTRING,
"\`" => TkDXSTRING,
"/" => TkDREGEXP,
}
def lex_init()
@OP = IRB::SLex.new
@OP.def_rules("\00"", "\0004", "\0032") do |chars, io|
Token(TkEND_OF_SCRIPT).set_text(chars)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\113"") do |chars, io|
@space_seen = TRUE
while (ch = getc) =~ /[ \t\f\r\13]/
chars << ch
end
ungetc
Token(TkSPACE).set_text(chars)
end
@OP.def_rule("#") do
|op, io|
identify_comment
end
@OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
|op, io|
str = op
@ltype = "="
begin
line = ""
begin
ch = getc
line << ch
end until ch == "\n"
str << line
end until line =~ /^=end/
ungetc
@ltype = nil
if str =~ /\A=begin\s+rdoc/
str.sub!(/\A=begin.*\n/, '')
str.sub!(/^=end.*/, '')
Token(TkCOMMENT).set_text(str)
else
Token(TkRD_COMMENT)#.set_text(str)
end
end
@OP.def_rule("\n") do
print "\\n\n" if RDoc::RubyLex.debug?
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = TRUE
else
@continue = FALSE
@lex_state = EXPR_BEG
end
Token(TkNL).set_text("\n")
end
@OP.def_rules("*", "**",
"!", "!=", "!~",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("<<") do
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /[-\w_\"\\`]/ =~ c
tk = identify_here_document
end
end
if !tk
@lex_state = EXPR_BEG
tk = Token(op).set_text(op)
end
tk
end
@OP.def_rules("'", '"') do
|op, io|
identify_string(op)
end
@OP.def_rules("`") do
|op, io|
if @lex_state == EXPR_FNAME
Token(op).set_text(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do
|op, io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
ch = getc
if @lex_state == EXPR_ARG && ch !~ /\s/
ungetc
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
str = op
str << ch
if (ch == '\') #'
str << read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER).set_text(str)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
|op, io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1).set_text(op)
end
@OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUPLUS).set_text(op)
end
@OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUMINUS).set_text(op)
end
@OP.def_rules("+", "-") do
|op, io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
Token(op).set_text(op)
end
end
@OP.def_rule(".") do
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number("")
else
# for obj.if
@lex_state = EXPR_DOT
Token(TkDOT).set_text(".")
end
end
@OP.def_rules("..", "...") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
lex_int2
end
def lex_int2
@OP.def_rules("]", "}", ")") do
|op, io|
@lex_state = EXPR_END
@indent -= 1
Token(op).set_text(op)
end
@OP.def_rule(":") do
if @lex_state == EXPR_END || peek(0) =~ /\s/
@lex_state = EXPR_BEG
tk = Token(TkCOLON)
else
@lex_state = EXPR_FNAME
tk = Token(TkSYMBEG)
end
tk.set_text(":")
end
@OP.def_rule("::") do
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
tk = Token(TkCOLON3)
else
@lex_state = EXPR_DOT
tk = Token(TkCOLON2)
end
tk.set_text("::")
end
@OP.def_rule("/") do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, :/).set_text("/=") #")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/").set_text(op)
end
end
@OP.def_rules("^") do
@lex_state = EXPR_BEG
Token("^").set_text("^")
end
@OP.def_rules(",", ";") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rule("~") do
@lex_state = EXPR_BEG
Token("~").set_text("~")
end
@OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
@lex_state = EXPR_BEG
Token("~").set_text("~@")
end
@OP.def_rule("(") do
@indent += 1
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
@lex_state = EXPR_BEG
tk = Token(TkfLPAREN)
else
@lex_state = EXPR_BEG
tk = Token(TkLPAREN)
end
tk.set_text("(")
end
@OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
Token("[]").set_text("[]")
end
@OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
Token("[]=").set_text("[]=")
end
@OP.def_rule("[") do
@indent += 1
if @lex_state == EXPR_FNAME
t = Token(TkfLBRACK)
else
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
t = Token(TkLBRACK)
elsif @lex_state == EXPR_ARG && @space_seen
t = Token(TkLBRACK)
else
t = Token(TkfLBRACK)
end
@lex_state = EXPR_BEG
end
t.set_text("[")
end
@OP.def_rule("{") do
@indent += 1
if @lex_state != EXPR_END && @lex_state != EXPR_ARG
t = Token(TkLBRACE)
else
t = Token(TkfLBRACE)
end
@lex_state = EXPR_BEG
t.set_text("{")
end
@OP.def_rule('\') do #'
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE).set_text("\\\n")
else
ungetc
Token("\\").set_text("\\") #"
end
end
@OP.def_rule('%') do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation('%')
elsif peek(0) == '='
getc
Token(TkOPASGN, "%").set_text("%=")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_quotation('%')
else
@lex_state = EXPR_BEG
Token("%").set_text("%")
end
end
@OP.def_rule('$') do #'
identify_gvar
end
@OP.def_rule('@') do
if peek(0) =~ /[@\w_]/
ungetc
identify_identifier
else
Token("@").set_text("@")
end
end
@OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
throw :eof
end
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number("")
elsif peek(0) =~ /[\w_]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
t
end
end
def identify_gvar
@lex_state = EXPR_END
str = "$"
tk = case ch = getc
when /[~_*$?!@\/\;,=:<>".]/ #"
str << ch
Token(TkGVAR, str)
when "-"
str << "-" << getc
Token(TkGVAR, str)
when "&", "`", "'", "+"
str << ch
Token(TkBACK_REF, str)
when /[1-9]/
str << ch
while (ch = getc) =~ /[0-9]/
str << ch
end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
return identify_identifier
else
ungetc
Token("$")
end
tk.set_text(str)
end
def identify_identifier
token = ""
token.concat getc if peek(0) =~ /[$@]/
token.concat getc if peek(0) == "@"
while (ch = getc) =~ /\w|_/
print ":", ch, ":" if RDoc::RubyLex.debug?
token.concat ch
end
ungetc
if ch == "!" or ch == "?"
token.concat getc
end
# fix token
# $stderr.puts "identifier - #{token}, state = #@lex_state"
case token
when /^\$/
return Token(TkGVAR, token).set_text(token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token).set_text(token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RDoc::RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if (@lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1])
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
@indent += 1
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token).set_text(token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token).set_text(token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token).set_text(token)
else
return Token(TkIDENTIFIER, token).set_text(token)
end
end
def identify_here_document
ch = getc
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch # '
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = ""
while ch = getc
reserve << ch
if ch == "\\" #"
ch = getc
reserve << ch
elsif ch == "\n"
break
end
end
str = ""
while (l = gets)
l.chomp!
l.strip! if indent
break if l == quoted
str << l.chomp << "\n"
end
@reader.divert_read_from(reserve)
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt], str).set_text(str.dump)
end
def identify_quotation(initial_char)
ch = getc
if lt = PERCENT_LTYPE[ch]
initial_char += ch
ch = getc
elsif ch =~ /\W/
lt = "\""
else
fail SyntaxError, "unknown type of %string ('#{ch}')"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted, ch, initial_char)
end
def identify_number(start)
str = start.dup
if start == "+" or start == "-" or start == ""
start = getc
str << start
end
@lex_state = EXPR_END
if start == "0"
if peek(0) == "x"
ch = getc
str << ch
match = /[0-9a-f_]/
else
match = /[0-7_]/
end
while ch = getc
if ch !~ match
ungetc
break
else
str << ch
end
end
return Token(TkINTEGER).set_text(str)
end
type = TkINTEGER
allow_point = TRUE
allow_e = TRUE
while ch = getc
case ch
when /[0-9_]/
str << ch
when allow_point && "."
type = TkFLOAT
if peek(0) !~ /[0-9]/
ungetc
break
end
str << ch
allow_point = false
when allow_e && "e", allow_e && "E"
str << ch
type = TkFLOAT
if peek(0) =~ /[+-]/
str << getc
end
allow_e = false
allow_point = false
else
ungetc
break
end
end
Token(type).set_text(str)
end
def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
@ltype = ltype
@quoted = quoted
subtype = nil
str = ""
str << initial_char if initial_char
str << (opener||quoted)
nest = 0
begin
while ch = getc
str << ch
if @quoted == ch
if nest == 0
break
else
nest -= 1
end
elsif opener == ch
nest += 1
elsif @ltype != "'" && @ltype != "]" and ch == "#"
ch = getc
if ch == "{"
subtype = true
str << ch << skip_inner_expression
else
ungetc(ch)
end
elsif ch == '\' #'
str << read_escape
end
end
if @ltype == "/"
if peek(0) =~ /i|o|n|e|s/
str << getc
end
end
if subtype
Token(DLtype2Token[ltype], str)
else
Token(Ltype2Token[ltype], str)
end.set_text(str)
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
def skip_inner_expression
res = ""
nest = 0
while (ch = getc)
res << ch
if ch == '}'
break if nest.zero?
nest -= 1
elsif ch == '{'
nest += 1
end
end
res
end
def identify_comment
@ltype = "#"
comment = "#"
while ch = getc
if ch == "\\"
ch = getc
if ch == "\n"
ch = " "
else
comment << "\\"
end
else
if ch == "\n"
@ltype = nil
ungetc
break
end
end
comment << ch
end
return Token(TkCOMMENT).set_text(comment)
end
def read_escape
res = ""
case ch = getc
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
res << ch
end
when "x"
res << ch
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
res << ch
end
when "M"
res << ch
if (ch = getc) != '-'
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
when "C", "c" #, "^"
res << ch
if ch == "C" and (ch = getc) != "-"
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
else
res << ch
end
res
end
end
# File rdoc/parser/ruby.rb, line 594
def lex_init()
@OP = IRB::SLex.new
@OP.def_rules("\00"", "\0004", "\0032") do |chars, io|
Token(TkEND_OF_SCRIPT).set_text(chars)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\113"") do |chars, io|
@space_seen = TRUE
while (ch = getc) =~ /[ \t\f\r\13]/
chars << ch
end
ungetc
Token(TkSPACE).set_text(chars)
end
@OP.def_rule("#") do
|op, io|
identify_comment
end
@OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
|op, io|
str = op
@ltype = "="
begin
line = ""
begin
ch = getc
line << ch
end until ch == "\n"
str << line
end until line =~ /^=end/
ungetc
@ltype = nil
if str =~ /\A=begin\s+rdoc/
str.sub!(/\A=begin.*\n/, '')
str.sub!(/^=end.*/, '')
Token(TkCOMMENT).set_text(str)
else
Token(TkRD_COMMENT)#.set_text(str)
end
end
@OP.def_rule("\n") do
print "\\n\n" if RDoc::RubyLex.debug?
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = TRUE
else
@continue = FALSE
@lex_state = EXPR_BEG
end
Token(TkNL).set_text("\n")
end
@OP.def_rules("*", "**",
"!", "!=", "!~",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("<<") do
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /[-\w_\"\\`]/ =~ c
tk = identify_here_document
end
end
if !tk
@lex_state = EXPR_BEG
tk = Token(op).set_text(op)
end
tk
end
@OP.def_rules("'", '"') do
|op, io|
identify_string(op)
end
@OP.def_rules("`") do
|op, io|
if @lex_state == EXPR_FNAME
Token(op).set_text(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do
|op, io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
ch = getc
if @lex_state == EXPR_ARG && ch !~ /\s/
ungetc
@lex_state = EXPR_BEG
Token(TkQUESTION).set_text(op)
else
str = op
str << ch
if (ch == '\') #'
str << read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER).set_text(str)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
|op, io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1).set_text(op)
end
@OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUPLUS).set_text(op)
end
@OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
Token(TkUMINUS).set_text(op)
end
@OP.def_rules("+", "-") do
|op, io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number(op)
else
@lex_state = EXPR_BEG
end
Token(op).set_text(op)
end
end
@OP.def_rule(".") do
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number("")
else
# for obj.if
@lex_state = EXPR_DOT
Token(TkDOT).set_text(".")
end
end
@OP.def_rules("..", "...") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
lex_int2
end
# File rdoc/parser/ruby.rb, line 780
def lex_int2
@OP.def_rules("]", "}", ")") do
|op, io|
@lex_state = EXPR_END
@indent -= 1
Token(op).set_text(op)
end
@OP.def_rule(":") do
if @lex_state == EXPR_END || peek(0) =~ /\s/
@lex_state = EXPR_BEG
tk = Token(TkCOLON)
else
@lex_state = EXPR_FNAME
tk = Token(TkSYMBEG)
end
tk.set_text(":")
end
@OP.def_rule("::") do
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
tk = Token(TkCOLON3)
else
@lex_state = EXPR_DOT
tk = Token(TkCOLON2)
end
tk.set_text("::")
end
@OP.def_rule("/") do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, :/).set_text("/=") #")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/").set_text(op)
end
end
@OP.def_rules("^") do
@lex_state = EXPR_BEG
Token("^").set_text("^")
end
@OP.def_rules(",", ";") do
|op, io|
@lex_state = EXPR_BEG
Token(op).set_text(op)
end
@OP.def_rule("~") do
@lex_state = EXPR_BEG
Token("~").set_text("~")
end
@OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
@lex_state = EXPR_BEG
Token("~").set_text("~@")
end
@OP.def_rule("(") do
@indent += 1
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
@lex_state = EXPR_BEG
tk = Token(TkfLPAREN)
else
@lex_state = EXPR_BEG
tk = Token(TkLPAREN)
end
tk.set_text("(")
end
@OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
Token("[]").set_text("[]")
end
@OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
Token("[]=").set_text("[]=")
end
@OP.def_rule("[") do
@indent += 1
if @lex_state == EXPR_FNAME
t = Token(TkfLBRACK)
else
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
t = Token(TkLBRACK)
elsif @lex_state == EXPR_ARG && @space_seen
t = Token(TkLBRACK)
else
t = Token(TkfLBRACK)
end
@lex_state = EXPR_BEG
end
t.set_text("[")
end
@OP.def_rule("{") do
@indent += 1
if @lex_state != EXPR_END && @lex_state != EXPR_ARG
t = Token(TkLBRACE)
else
t = Token(TkfLBRACE)
end
@lex_state = EXPR_BEG
t.set_text("{")
end
@OP.def_rule('\') do #'
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE).set_text("\\\n")
else
ungetc
Token("\\").set_text("\\") #"
end
end
@OP.def_rule('%') do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation('%')
elsif peek(0) == '='
getc
Token(TkOPASGN, "%").set_text("%=")
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_quotation('%')
else
@lex_state = EXPR_BEG
Token("%").set_text("%")
end
end
@OP.def_rule('$') do #'
identify_gvar
end
@OP.def_rule('@') do
if peek(0) =~ /[@\w_]/
ungetc
identify_identifier
else
Token("@").set_text("@")
end
end
@OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
throw :eof
end
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number("")
elsif peek(0) =~ /[\w_]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
t
end
end
io functions
# File rdoc/parser/ruby.rb, line 480
def line_no
@reader.line_num
end
# File rdoc/parser/ruby.rb, line 519
def peek(i = 0)
@reader.peek(i)
end
# File rdoc/parser/ruby.rb, line 515
def peek_equal?(str)
@reader.peek_equal(str)
end
# File rdoc/parser/ruby.rb, line 1279
def read_escape
res = ""
case ch = getc
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
res << ch
end
when "x"
res << ch
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
res << ch
end
when "M"
res << ch
if (ch = getc) != '-'
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
when "C", "c" #, "^"
res << ch
if ch == "C" and (ch = getc) != "-"
ungetc
else
res << ch
if (ch = getc) == "\\" #"
res << ch
res << read_escape
else
res << ch
end
end
else
res << ch
end
res
end
Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.
If you want to help improve the Ruby documentation, please see Improve the docs, or visit Documenting-ruby.org.