In Files

  • rdoc/parsers/parse_rb.rb

Files

Class/Module Index [+]

Quicksearch

RubyLex

Lexical analyzer for Ruby source

Attributes

continue[R]
exception_on_syntax_error[RW]
indent[R]
lex_state[R]
read_auto_clean_up[RW]
skip_space[RW]

Public Class Methods

debug?() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 443
def RubyLex.debug?
  false
end
            
new(content) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 447
def initialize(content)
  lex_init

  @reader = BufferedReader.new(content)

  @exp_line_no = @line_no = 1
  @base_char_no = 0
  @indent = 0

  @ltype = nil
  @quoted = nil
  @lex_state = EXPR_BEG
  @space_seen = false
  
  @continue = false
  @line = ""

  @skip_space = false
  @read_auto_clean_up = false
  @exception_on_syntax_error = true
end
            

Public Instance Methods

char_no() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 480
def char_no
  @reader.column
end
            
get_read() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 484
def get_read
  @reader.get_read
end
            
getc() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 488
def getc
  @reader.getc
end
            
getc_of_rests() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 492
def getc_of_rests
  @reader.getc_already_read
end
            
gets() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 496
def gets
  c = getc or return
  l = ""
  begin
    l.concat c unless c == "\r"
    break if c == "\n"
  end while c = getc
  l
end
            
identify_comment() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1269
def identify_comment
  @ltype = "#"
  comment = "#"
  while ch = getc
    if ch == "\\"
      ch = getc
      if ch == "\n"
        ch = " "
      else
        comment << "\\" 
      end
    else
      if ch == "\n"
        @ltype = nil
        ungetc
        break
      end
    end
    comment << ch
  end
  return Token(TkCOMMENT).set_text(comment)
end
            
identify_gvar() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 964
def identify_gvar
  @lex_state = EXPR_END
  str = "$"

  tk = case ch = getc
       when /[~_*$?!@\/\;,=:<>".]/   #"
         str << ch
         Token(TkGVAR, str)
         
       when "-"
         str << "-" << getc
         Token(TkGVAR, str)
         
       when "&", "`", "'", "+"
         str << ch
         Token(TkBACK_REF, str)
         
       when /[1-9]/
         str << ch
         while (ch = getc) =~ /[0-9]/
           str << ch
         end
         ungetc
         Token(TkNTH_REF)
       when /\w/
         ungetc
         ungetc
         return identify_identifier
       else 
         ungetc
         Token("$")     
       end
  tk.set_text(str)
end
            
identify_here_document() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1074
def identify_here_document
  ch = getc
  if ch == "-"
    ch = getc
    indent = true
  end
  if /['"`]/ =~ ch            # '
    lt = ch
    quoted = ""
    while (c = getc) && c != lt
      quoted.concat c
    end
  else
    lt = '"'
    quoted = ch.dup
    while (c = getc) && c =~ /\w/
      quoted.concat c
    end
    ungetc
  end

  ltback, @ltype = @ltype, lt
  reserve = ""

  while ch = getc
    reserve << ch
    if ch == "\\"    #"
      ch = getc
      reserve << ch
    elsif ch == "\n"
      break
    end
  end

  str = ""
  while (l = gets)
    l.chomp!
    l.strip! if indent
    break if l == quoted
    str << l.chomp << "\n"
  end

  @reader.divert_read_from(reserve)

  @ltype = ltback
  @lex_state = EXPR_END
  Token(Ltype2Token[lt], str).set_text(str.dump)
end
            
identify_identifier() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 999
def identify_identifier
  token = ""
  token.concat getc if peek(0) =~ /[$@]/
  token.concat getc if peek(0) == "@"

  while (ch = getc) =~ /\w|_/
    print ":", ch, ":" if RubyLex.debug?
    token.concat ch
  end
  ungetc
  
  if ch == "!" or ch == "?"
    token.concat getc
  end
  # fix token

  # $stderr.puts "identifier - #{token}, state = #@lex_state"

  case token
  when /^\$/
    return Token(TkGVAR, token).set_text(token)
  when /^\@/
    @lex_state = EXPR_END
    return Token(TkIVAR, token).set_text(token)
  end
  
  if @lex_state != EXPR_DOT
    print token, "\n" if RubyLex.debug?

    token_c, *trans = TkReading2Token[token]
    if token_c
      # reserved word?

      if (@lex_state != EXPR_BEG &&
          @lex_state != EXPR_FNAME &&
          trans[1])
        # modifiers
        token_c = TkSymbol2Token[trans[1]]
        @lex_state = trans[0]
      else
        if @lex_state != EXPR_FNAME
          if ENINDENT_CLAUSE.include?(token)
            @indent += 1
          elsif DEINDENT_CLAUSE.include?(token)
            @indent -= 1
          end
          @lex_state = trans[0]
        else
          @lex_state = EXPR_END
        end
      end
      return Token(token_c, token).set_text(token)
    end
  end

  if @lex_state == EXPR_FNAME
    @lex_state = EXPR_END
    if peek(0) == '='
      token.concat getc
    end
  elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
    @lex_state = EXPR_ARG
  else
    @lex_state = EXPR_END
  end

  if token[0, 1] =~ /[A-Z]/
    return Token(TkCONSTANT, token).set_text(token)
  elsif token[token.size - 1, 1] =~ /[!?]/
    return Token(TkFID, token).set_text(token)
  else
    return Token(TkIDENTIFIER, token).set_text(token)
  end
end
            
identify_number(start) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1142
def identify_number(start)
  str = start.dup

  if start == "+" or start == "-" or start == ""
    start = getc
    str << start
  end

  @lex_state = EXPR_END

  if start == "0"
    if peek(0) == "x"
      ch = getc
      str << ch
      match = /[0-9a-f_]/
    else
      match = /[0-7_]/
    end
    while ch = getc
      if ch !~ match
        ungetc
        break
      else
        str << ch
      end
    end
    return Token(TkINTEGER).set_text(str)
  end

  type = TkINTEGER
  allow_point = TRUE
  allow_e = TRUE
  while ch = getc
    case ch
    when /[0-9_]/
      str << ch

    when allow_point && "."
      type = TkFLOAT
      if peek(0) !~ /[0-9]/
        ungetc
        break
      end
      str << ch
      allow_point = false

    when allow_e && "e", allow_e && "E"
      str << ch
      type = TkFLOAT
      if peek(0) =~ /[+-]/
        str << getc
      end
      allow_e = false
      allow_point = false
    else
      ungetc
      break
    end
  end
  Token(type).set_text(str)
end
            
identify_quotation(initial_char) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1123
  def identify_quotation(initial_char)
    ch = getc
    if lt = PERCENT_LTYPE[ch]
      initial_char += ch
      ch = getc
    elsif ch =~ /\W/
      lt = "\""
    else
      RubyLex.fail SyntaxError, "unknown type of %string ('#{ch}')"
    end
#     if ch !~ /\W/
#       ungetc
#       next
#     end
    #@ltype = lt
    @quoted = ch unless @quoted = PERCENT_PAREN[ch]
    identify_string(lt, @quoted, ch, initial_char)
  end
            
identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1204
def identify_string(ltype, quoted = ltype, opener=nil, initial_char = nil)
  @ltype = ltype
  @quoted = quoted
  subtype = nil

  str = ""
  str << initial_char if initial_char
  str << (opener||quoted)

  nest = 0
  begin
    while ch = getc 
      str << ch
      if @quoted == ch 
        if nest == 0
          break
        else
          nest -= 1
        end
      elsif opener == ch
        nest += 1
      elsif @ltype != "'" && @ltype != "]" and ch == "#"
        ch = getc
        if ch == "{"
          subtype = true
          str << ch << skip_inner_expression
        else
          ungetc(ch)
        end
      elsif ch == '\' #'
        str << read_escape
      end
    end
    if @ltype == "/"
      if peek(0) =~ /i|o|n|e|s/
        str << getc
      end
    end
    if subtype
      Token(DLtype2Token[ltype], str)
    else
      Token(Ltype2Token[ltype], str)
    end.set_text(str)
  ensure
    @ltype = nil
    @quoted = nil
    @lex_state = EXPR_END
  end
end
            
lex() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 519
def lex
  until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
           !@continue or
           tk.nil?)
  end
  line = get_read

  if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
    nil
  else
    line
  end
end
            
lex_init() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 589
def lex_init()
  @OP = SLex.new
  @OP.def_rules("\0", "\004", "\032") do |chars, io|
    Token(TkEND_OF_SCRIPT).set_text(chars)
  end

  @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |chars, io|
    @space_seen = TRUE
    while (ch = getc) =~ /[ \t\f\r\13]/
      chars << ch
    end
    ungetc
    Token(TkSPACE).set_text(chars)
  end

  @OP.def_rule("#") do
    |op, io|
    identify_comment
  end

  @OP.def_rule("=begin", proc{@prev_char_no == 0 && peek(0) =~ /\s/}) do
    |op, io|
    str = op
    @ltype = "="


    begin
      line = ""
      begin
        ch = getc
        line << ch
      end until ch == "\n"
      str << line
    end until line =~ /^=end/

    ungetc

    @ltype = nil

    if str =~ /\A=begin\s+rdoc/i
      str.sub!(/\A=begin.*\n/, '')
      str.sub!(/^=end.*/m, '')
      Token(TkCOMMENT).set_text(str)
    else
      Token(TkRD_COMMENT)#.set_text(str)
    end
  end

  @OP.def_rule("\n") do
    print "\\n\n" if RubyLex.debug?
    case @lex_state
    when EXPR_BEG, EXPR_FNAME, EXPR_DOT
      @continue = TRUE
    else
      @continue = FALSE
      @lex_state = EXPR_BEG
    end
    Token(TkNL).set_text("\n")
  end

  @OP.def_rules("*", "**",    
                "!", "!=", "!~",
                "=", "==", "===", 
                "=~", "<=>",        
                "<", "<=",
                ">", ">=", ">>") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  @OP.def_rules("<<") do
    |op, io|
    tk = nil
    if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
        (@lex_state != EXPR_ARG || @space_seen)
      c = peek(0)
      if /[-\w_\"\\`]/ =~ c
        tk = identify_here_document
      end
    end
    if !tk
      @lex_state = EXPR_BEG
      tk = Token(op).set_text(op)
    end
    tk
  end

  @OP.def_rules("'", '"') do
    |op, io|
    identify_string(op)
  end

  @OP.def_rules("`") do
    |op, io|
    if @lex_state == EXPR_FNAME
      Token(op).set_text(op)
    else
      identify_string(op)
    end
  end

  @OP.def_rules('?') do
    |op, io|
    if @lex_state == EXPR_END
      @lex_state = EXPR_BEG
      Token(TkQUESTION).set_text(op)
    else
      ch = getc
      if @lex_state == EXPR_ARG && ch !~ /\s/
        ungetc
        @lex_state = EXPR_BEG;
        Token(TkQUESTION).set_text(op)
      else
        str = op
        str << ch
        if (ch == '\') #'
          str << read_escape
        end
        @lex_state = EXPR_END
        Token(TkINTEGER).set_text(str)
      end
    end
  end

  @OP.def_rules("&", "&&", "|", "||") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end
  
  @OP.def_rules("+=", "-=", "*=", "**=", 
                "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
    |op, io|
    @lex_state = EXPR_BEG
    op =~ /^(.*)=$/
    Token(TkOPASGN, $1).set_text(op)
  end

  @OP.def_rule("+@", proc{@lex_state == EXPR_FNAME}) do |op, io|
    Token(TkUPLUS).set_text(op)
  end

  @OP.def_rule("-@", proc{@lex_state == EXPR_FNAME}) do |op, io|
    Token(TkUMINUS).set_text(op)
  end

  @OP.def_rules("+", "-") do
    |op, io|
    catch(:RET) do
      if @lex_state == EXPR_ARG
        if @space_seen and peek(0) =~ /[0-9]/
          throw :RET, identify_number(op)
        else
          @lex_state = EXPR_BEG
        end
      elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
        throw :RET, identify_number(op)
      else
        @lex_state = EXPR_BEG
      end
      Token(op).set_text(op)
    end
  end

  @OP.def_rule(".") do
    @lex_state = EXPR_BEG
    if peek(0) =~ /[0-9]/
      ungetc
      identify_number("")
    else
      # for obj.if
      @lex_state = EXPR_DOT
      Token(TkDOT).set_text(".")
    end
  end

  @OP.def_rules("..", "...") do
    |op, io|
    @lex_state = EXPR_BEG
    Token(op).set_text(op)
  end

  lex_int2
end
            
lex_int2() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 775
  def lex_int2
    @OP.def_rules("]", "}", ")") do
      |op, io|
      @lex_state = EXPR_END
      @indent -= 1
      Token(op).set_text(op)
    end

    @OP.def_rule(":") do
      if @lex_state == EXPR_END || peek(0) =~ /\s/
        @lex_state = EXPR_BEG
        tk = Token(TkCOLON)
      else
        @lex_state = EXPR_FNAME;
        tk = Token(TkSYMBEG)
      end
      tk.set_text(":")
    end

    @OP.def_rule("::") do
#      p @lex_state.id2name, @space_seen
      if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
        @lex_state = EXPR_BEG
        tk = Token(TkCOLON3)
      else
        @lex_state = EXPR_DOT
        tk = Token(TkCOLON2)
      end
      tk.set_text("::")
    end

    @OP.def_rule("/") do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        identify_string(op)
      elsif peek(0) == '='
        getc
        @lex_state = EXPR_BEG
        Token(TkOPASGN, :/).set_text("/=") #")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
        identify_string(op)
      else 
        @lex_state = EXPR_BEG
        Token("/").set_text(op)
      end
    end

    @OP.def_rules("^") do
      @lex_state = EXPR_BEG
      Token("^").set_text("^")
    end

    #       @OP.def_rules("^=") do
    #   @lex_state = EXPR_BEG
    #   Token(TkOPASGN, :^)
    #       end
    
    @OP.def_rules(",", ";") do
      |op, io|
      @lex_state = EXPR_BEG
      Token(op).set_text(op)
    end

    @OP.def_rule("~") do
      @lex_state = EXPR_BEG
      Token("~").set_text("~")
    end

    @OP.def_rule("~@", proc{@lex_state = EXPR_FNAME}) do
      @lex_state = EXPR_BEG
      Token("~").set_text("~@")
    end
    
    @OP.def_rule("(") do
      @indent += 1
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        @lex_state = EXPR_BEG
        tk = Token(TkfLPAREN)
      else
        @lex_state = EXPR_BEG
        tk = Token(TkLPAREN)
      end
      tk.set_text("(")
    end

    @OP.def_rule("[]", proc{@lex_state == EXPR_FNAME}) do
      Token("[]").set_text("[]")
    end

    @OP.def_rule("[]=", proc{@lex_state == EXPR_FNAME}) do
      Token("[]=").set_text("[]=")
    end

    @OP.def_rule("[") do
      @indent += 1
      if @lex_state == EXPR_FNAME
        t = Token(TkfLBRACK)
      else
        if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
          t = Token(TkLBRACK)
        elsif @lex_state == EXPR_ARG && @space_seen
          t = Token(TkLBRACK)
        else
          t = Token(TkfLBRACK)
        end
        @lex_state = EXPR_BEG
      end
      t.set_text("[")
    end

    @OP.def_rule("{") do
      @indent += 1
      if @lex_state != EXPR_END && @lex_state != EXPR_ARG
        t = Token(TkLBRACE)
      else
        t = Token(TkfLBRACE)
      end
      @lex_state = EXPR_BEG
      t.set_text("{")
    end

    @OP.def_rule('\') do   #'
      if getc == "\n" 
        @space_seen = true
        @continue = true
        Token(TkSPACE).set_text("\\\n")
      else 
        ungetc
        Token("\\").set_text("\\")  #"
      end 
    end 

    @OP.def_rule('%') do
      |op, io|
      if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
        identify_quotation('%')
      elsif peek(0) == '='
        getc
        Token(TkOPASGN, "%").set_text("%=")
      elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
        identify_quotation('%')
      else
        @lex_state = EXPR_BEG
        Token("%").set_text("%")
      end
    end

    @OP.def_rule('$') do  #'
      identify_gvar
    end

    @OP.def_rule('@') do
      if peek(0) =~ /[@\w_]/
        ungetc
        identify_identifier
      else
        Token("@").set_text("@")
      end
    end

    #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do 
    #   |op, io|
    #   @indent += 1
    #   @lex_state = EXPR_FNAME
    # # @lex_state = EXPR_END
    # # until @rests[0] == "\n" or @rests[0] == ";"
    # #   rests.shift
    # # end
    #       end

    @OP.def_rule("__END__", proc{@prev_char_no == 0 && peek(0) =~ /[\r\n]/}) do
      throw :eof
    end

    @OP.def_rule("") do
      |op, io|
      printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
      if peek(0) =~ /[0-9]/
        t = identify_number("")
      elsif peek(0) =~ /[\w_]/
        t = identify_identifier
      end
      printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
      t
    end
    
    p @OP if RubyLex.debug?
  end
            
line_no() click to toggle source

io functions

 
               # File rdoc/parsers/parse_rb.rb, line 476
def line_no
  @reader.line_num
end
            
peek(i = 0) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 515
def peek(i = 0)
  @reader.peek(i)
end
            
peek_equal?(str) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 511
def peek_equal?(str)
  @reader.peek_equal(str)
end
            
read_escape() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1292
def read_escape
  res = ""
  case ch = getc
  when /[0-7]/
    ungetc ch
    3.times do
      case ch = getc
      when /[0-7]/
      when nil
        break
      else
        ungetc
        break
      end
      res << ch
    end
    
  when "x"
    res << ch
    2.times do
      case ch = getc
      when /[0-9a-fA-F]/
      when nil
        break
      else
        ungetc
        break
      end
      res << ch
    end

  when "M"
    res << ch
    if (ch = getc) != '-'
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end

  when "C", "c" #, "^"
    res << ch
    if ch == "C" and (ch = getc) != "-"
      ungetc
    else
      res << ch
      if (ch = getc) == "\\" #"
        res << ch
        res << read_escape
      else
        res << ch
      end
    end
  else
    res << ch
  end
  res
end
            
skip_inner_expression() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 1254
def skip_inner_expression
  res = ""
  nest = 0
  while (ch = getc)
    res << ch
    if ch == '}'
      break if nest.zero?
      nest -= 1
    elsif ch == '{'
      nest += 1
    end
  end
  res
end
            
token() click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 533
  def token
    set_token_position(line_no, char_no)
    begin
      begin
        tk = @OP.match(self)
        @space_seen = tk.kind_of?(TkSPACE)
      rescue SyntaxError
        abort if @exception_on_syntax_error
        tk = TkError.new(line_no, char_no)
      end
    end while @skip_space and tk.kind_of?(TkSPACE)
    if @read_auto_clean_up
      get_read
    end
#   throw :eof unless tk
    p tk if $DEBUG
    tk
  end
            
ungetc(c = nil) click to toggle source
 
               # File rdoc/parsers/parse_rb.rb, line 507
def ungetc(c = nil)
  @reader.ungetc(c)
end
            

Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.

If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.

If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.

If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.

blog comments powered by Disqus