Wednesday, July 7, 2010

Writing parsers with m4o

This is a short example of my latest addition to the m4o toolchain. With m4o_lexer you can quite easily write simple LL(1) recursive descent parsers for domain-specific languages in PL/SQL. The lexer is implemented via regexp_substr and regexp_instr. This probably means that performance is not great, but for small code that's not an issue.

@package-begin sample_parser
  @plsql
    c_equal constant m4o_lexer.token_t := 1;
    c_plus  constant m4o_lexer.token_t := 2;
    c_begin constant m4o_lexer.token_t := 3;
    c_end   constant m4o_lexer.token_t := 4;
    c_if    constant m4o_lexer.token_t := 5;
    c_then  constant m4o_lexer.token_t := 6;
    c_else  constant m4o_lexer.token_t := 7;
    c_ident constant m4o_lexer.token_t := 99;
    c_num   constant m4o_lexer.token_t := 100;
    procedure parse_stmtseq;
  @end
--------------------------------------------------------------------------------
  @procedure parse_assign
  @declare
    v_var   varchar2(30);
    v_value varchar2(30);
  @begin
    -- [ident] = [num]
    v_var   := m4o_lexer.text;
    m4o_lexer.eat(c_ident);
    m4o_lexer.eat(c_equal);
    v_value := m4o_lexer.text;
    m4o_lexer.eat(c_num);
    dbms_output.put_line('assigning '
                       ||v_value
                       ||' to '
                       ||v_var);
  @end
--------------------------------------------------------------------------------
  @procedure parse_if
  @begin
    m4o_lexer.eat(c_if);
    m4o_lexer.eat(c_ident);
    m4o_lexer.eat(c_equal);
    m4o_lexer.eat(c_num);
    m4o_lexer.eat(c_then);
    parse_stmtseq;
    if m4o_lexer.cur = c_else then
      m4o_lexer.eat;
      parse_stmtseq;
    end if;
    m4o_lexer.eat(c_end);
  @end
--------------------------------------------------------------------------------
  @procedure parse_stmtseq
  @begin
    loop
      case m4o_lexer.cur
      when c_if then
        parse_if;
      when c_ident then
        parse_assign;
      when c_begin then
        m4o_lexer.eat;
        parse_stmtseq;
        m4o_lexer.eat(c_end);
      else
        exit;
      end case;
    end loop;
  @end
--------------------------------------------------------------------------------
  @procedure parse*
    i_code in varchar2
  @begin
    m4o_lexer.begin_define_tokens;
    m4o_lexer.set_whitespace('[[:space:]]+');
    m4o_lexer.set_token(c_equal,'=');
    m4o_lexer.set_token(c_plus ,'\\+');
    m4o_lexer.set_token(c_begin,'begin');
    m4o_lexer.set_token(c_end  ,'end');
    m4o_lexer.set_token(c_if   ,'if');
    m4o_lexer.set_token(c_then ,'then');
    m4o_lexer.set_token(c_else ,'else');
    m4o_lexer.set_token(c_ident,'[a-z][a-z0-9_#$]*');
    m4o_lexer.set_token(c_num  ,'[0-9]+');

    m4o_lexer.begin_reading(i_code);

    m4o_lexer.eat(c_begin);
    parse_stmtseq;
    m4o_lexer.eat(c_end);

    m4o_lexer.end_reading;
  @end
--------------------------------------------------------------------------------
  @procedure main*
  @begin
    parse(
      'begin
        i = 3
        IF FOO = 7 THEN
          I = 5
          bar = 10
        else begin b=0 end end
      end');
  @end
@package-end

No comments: