This is a short example of my latest addition to the m4o toolchain. With m4o_lexer you can quite easily write simple LL(1) recursive descent parsers for domain-specific languages in PL/SQL. The lexer is implemented via regexp_substr and regexp_instr. This probably means that performance is not great, but for small code that's not an issue.
@package-begin sample_parser
@plsql
c_equal constant m4o_lexer.token_t := 1;
c_plus constant m4o_lexer.token_t := 2;
c_begin constant m4o_lexer.token_t := 3;
c_end constant m4o_lexer.token_t := 4;
c_if constant m4o_lexer.token_t := 5;
c_then constant m4o_lexer.token_t := 6;
c_else constant m4o_lexer.token_t := 7;
c_ident constant m4o_lexer.token_t := 99;
c_num constant m4o_lexer.token_t := 100;
procedure parse_stmtseq;
@end
--------------------------------------------------------------------------------
@procedure parse_assign
@declare
v_var varchar2(30);
v_value varchar2(30);
@begin
-- [ident] = [num]
v_var := m4o_lexer.text;
m4o_lexer.eat(c_ident);
m4o_lexer.eat(c_equal);
v_value := m4o_lexer.text;
m4o_lexer.eat(c_num);
dbms_output.put_line('assigning '
||v_value
||' to '
||v_var);
@plsql
c_equal constant m4o_lexer.token_t := 1;
c_plus constant m4o_lexer.token_t := 2;
c_begin constant m4o_lexer.token_t := 3;
c_end constant m4o_lexer.token_t := 4;
c_if constant m4o_lexer.token_t := 5;
c_then constant m4o_lexer.token_t := 6;
c_else constant m4o_lexer.token_t := 7;
c_ident constant m4o_lexer.token_t := 99;
c_num constant m4o_lexer.token_t := 100;
procedure parse_stmtseq;
@end
--------------------------------------------------------------------------------
@procedure parse_assign
@declare
v_var varchar2(30);
v_value varchar2(30);
@begin
-- [ident] = [num]
v_var := m4o_lexer.text;
m4o_lexer.eat(c_ident);
m4o_lexer.eat(c_equal);
v_value := m4o_lexer.text;
m4o_lexer.eat(c_num);
dbms_output.put_line('assigning '
||v_value
||' to '
||v_var);
@end
--------------------------------------------------------------------------------
@procedure parse_if
@begin
--------------------------------------------------------------------------------
@procedure parse_if
@begin
m4o_lexer.eat(c_if);
m4o_lexer.eat(c_ident);
m4o_lexer.eat(c_equal);
m4o_lexer.eat(c_num);
m4o_lexer.eat(c_then);
parse_stmtseq;
if m4o_lexer.cur = c_else then
m4o_lexer.eat;
parse_stmtseq;
end if;
m4o_lexer.eat(c_end);
@end
--------------------------------------------------------------------------------
@procedure parse_stmtseq
@begin
loop
case m4o_lexer.cur
when c_if then
parse_if;
when c_ident then
parse_assign;
when c_begin then
m4o_lexer.eat;
parse_stmtseq;
m4o_lexer.eat(c_end);
else
exit;
end case;
end loop;
@end
--------------------------------------------------------------------------------
@procedure parse*
i_code in varchar2
@begin
m4o_lexer.begin_define_tokens;
m4o_lexer.set_whitespace('[[:space:]]+');
m4o_lexer.set_token(c_equal,'=');
m4o_lexer.set_token(c_plus ,'\\+');
m4o_lexer.set_token(c_begin,'begin');
m4o_lexer.set_token(c_end ,'end');
m4o_lexer.set_token(c_if ,'if');
m4o_lexer.set_token(c_then ,'then');
m4o_lexer.set_token(c_else ,'else');
m4o_lexer.set_token(c_ident,'[a-z][a-z0-9_#$]*');
m4o_lexer.set_token(c_num ,'[0-9]+');
m4o_lexer.begin_reading(i_code);
m4o_lexer.eat(c_begin);
parse_stmtseq;
m4o_lexer.eat(c_end);
m4o_lexer.end_reading;
@end
--------------------------------------------------------------------------------
@procedure main*
@begin
parse(
'begin
m4o_lexer.eat(c_ident);
m4o_lexer.eat(c_equal);
m4o_lexer.eat(c_num);
m4o_lexer.eat(c_then);
parse_stmtseq;
if m4o_lexer.cur = c_else then
m4o_lexer.eat;
parse_stmtseq;
end if;
m4o_lexer.eat(c_end);
@end
--------------------------------------------------------------------------------
@procedure parse_stmtseq
@begin
loop
case m4o_lexer.cur
when c_if then
parse_if;
when c_ident then
parse_assign;
when c_begin then
m4o_lexer.eat;
parse_stmtseq;
m4o_lexer.eat(c_end);
else
exit;
end case;
end loop;
@end
--------------------------------------------------------------------------------
@procedure parse*
i_code in varchar2
@begin
m4o_lexer.begin_define_tokens;
m4o_lexer.set_whitespace('[[:space:]]+');
m4o_lexer.set_token(c_equal,'=');
m4o_lexer.set_token(c_plus ,'\\+');
m4o_lexer.set_token(c_begin,'begin');
m4o_lexer.set_token(c_end ,'end');
m4o_lexer.set_token(c_if ,'if');
m4o_lexer.set_token(c_then ,'then');
m4o_lexer.set_token(c_else ,'else');
m4o_lexer.set_token(c_ident,'[a-z][a-z0-9_#$]*');
m4o_lexer.set_token(c_num ,'[0-9]+');
m4o_lexer.begin_reading(i_code);
m4o_lexer.eat(c_begin);
parse_stmtseq;
m4o_lexer.eat(c_end);
m4o_lexer.end_reading;
@end
--------------------------------------------------------------------------------
@procedure main*
@begin
parse(
'begin
i = 3
IF FOO = 7 THEN
I = 5
bar = 10
else begin b=0 end end
end');
@end
@package-end
IF FOO = 7 THEN
I = 5
bar = 10
else begin b=0 end end
end');
@end
@package-end
No comments:
Post a Comment