diff --git a/alpaga/grammar.ml b/alpaga/grammar.ml index d3a95dd5c5e2e22287287207577dd2645abe50c8..e747d158cea5ae006756492cabe3f4a2a1bccdc1 100644 --- a/alpaga/grammar.ml +++ b/alpaga/grammar.ml @@ -11,7 +11,7 @@ type rule = { rule_nt: nonterm; } type grammar = { tokens: (tokent * string option) list; - nonterms: nonterm list; + nonterms: (nonterm * string option) list; rules: rule list; mlcode: string option; axiom: nonterm option diff --git a/alpaga/grammar_lexer.mll b/alpaga/grammar_lexer.mll index e2802373e089f6071d553ed23d371275404d9e67..1ea896e2d4dee9c578c3416336f40caff0ecc7ff 100644 --- a/alpaga/grammar_lexer.mll +++ b/alpaga/grammar_lexer.mll @@ -13,9 +13,8 @@ rule token = parse | "/*" { comment_multiline lexbuf } | '\n' { Lexing.new_line lexbuf; EOL } | '{' { action 0 "" lexbuf } + | '<' { ttype "" lexbuf } | "->" { ARROW } - | ">" { GT } - | "<" { LT } | "axiom" { AXIOM } | "tokens" { TOK } | "non-terminals" { NT } @@ -29,8 +28,12 @@ rule token = parse and action level s = parse | '}' { if level = 0 then CODE s else action (level-1) (s ^ "}") lexbuf } | '{' { action (level + 1) (s ^ "{") lexbuf } - | _ as x { if x == '\n' then Lexing.new_line lexbuf; + | _ as x { if x = '\n' then Lexing.new_line lexbuf; action level (Printf.sprintf "%s%c" s x) lexbuf } +and ttype s = parse + | '>' { TTYPE s } + | _ as x { if x = '\n' then Lexing.new_line lexbuf; + ttype (Printf.sprintf "%s%c" s x) lexbuf } and comment = parse | '\n' { Lexing.new_line lexbuf; token lexbuf } | _ { comment lexbuf } diff --git a/alpaga/grammar_parser.ml b/alpaga/grammar_parser.ml index 05925c8e5de928e207b53393f5b1949dd30dbb21..e3a504d228c098bfad32c79c30652307ab5fe5b3 100644 --- a/alpaga/grammar_parser.ml +++ b/alpaga/grammar_parser.ml @@ -26,7 +26,7 @@ let parse_grammar file : grammar * nonterm = List.fold_left (fun (undef, used) prod -> let undef = - if not (List.mem prod (List.map fst gram.tokens) || List.mem prod gram.nonterms || List.mem prod undef) + if not (List.mem_assoc prod gram.tokens || List.mem_assoc prod gram.nonterms || List.mem prod undef) then prod::undef else undef in let used = @@ -46,7 +46,7 @@ let parse_grammar file : grammar * nonterm = | Some axiom -> (* Warn if some non terminals are never seen on the right hand side of a rule. *) - let unused_nts = List.filter (fun nt -> not (List.mem nt used_strings) && Some nt <> gram.axiom) gram.nonterms in + let unused_nts = List.filter (fun nt -> not (List.mem nt used_strings) && Some nt <> gram.axiom) (List.map fst gram.nonterms) in if unused_nts <> [] then Printf.printf "The following non-terminals are declared but never appear on the right hand-side of a rule:\n%a\n" print_list unused_nts; (* Warn if some tokens are never seen on the right hand side of a rule. *) @@ -59,5 +59,5 @@ let parse_grammar file : grammar * nonterm = match Hashtbl.find_opt h r.rule_nt with | None -> Hashtbl.add h r.rule_nt [r] | Some lp -> Hashtbl.replace h r.rule_nt (lp@[r]) ) (gram.rules); - let rules = List.concat (List.map (fun n -> hashget_def h n []) gram.nonterms) in + let rules = List.concat (List.map (fun n -> hashget_def h n []) (List.map fst gram.nonterms)) in { gram with rules = rules }, axiom diff --git a/alpaga/grammar_parser_yacc.mly b/alpaga/grammar_parser_yacc.mly index 7455252bb75b5ef94033daa3640daae70c85ab93..c0161c25433b57a2da218c72c4f75ab2d98c5f3d 100644 --- a/alpaga/grammar_parser_yacc.mly +++ b/alpaga/grammar_parser_yacc.mly @@ -4,9 +4,10 @@ %} -%token EOF EOL TOK NT RULES ARROW AXIOM LT GT +%token EOF EOL TOK NT RULES ARROW AXIOM //LT GT %token<string> IDENTIFIER %token<string> CODE +%token<string> TTYPE %start main %type <Grammar.grammar> main @@ -16,7 +17,7 @@ main: | AXIOM IDENTIFIER EOL main { let r = $4 in {r with axiom = Some $2 }} | TOK list_tokens EOL main { let r = $4 in {r with tokens = r.tokens @ $2} } - | NT list_ident EOL main { let r = $4 in {r with nonterms = r.nonterms @ $2} } + | NT list_nts EOL main { let r = $4 in {r with nonterms = r.nonterms @ $2} } | CODE main { let r = $2 in { r with mlcode = Some ($1) }} | RULES EOL rules EOF { { tokens = []; nonterms = []; axiom = None; rules = $3; mlcode = None } } @@ -24,7 +25,7 @@ ; typed_tokens: - | IDENTIFIER LT IDENTIFIER GT { ($1, Some $3) } + | IDENTIFIER TTYPE { ($1, Some $2) } | IDENTIFIER { ($1, None)} ; @@ -35,6 +36,20 @@ ; + typed_nts: + | IDENTIFIER TTYPE { ($1, Some $2) } + | IDENTIFIER { ($1, None)} + ; + + + list_nts: + | typed_nts list_nts { $1 :: $2} + | { [] } + ; + + + + list_ident: | IDENTIFIER list_ident { $1 :: $2} | { [] } diff --git a/alpaga/ml_parser_generator.ml b/alpaga/ml_parser_generator.ml index 4b0c6d8b92ffd987ed756f488dc5a4de6ab4414d..df837abdf4154dca69f510f41e4be85659604fce 100644 --- a/alpaga/ml_parser_generator.ml +++ b/alpaga/ml_parser_generator.ml @@ -25,8 +25,11 @@ let default_action (pl: string list) : string = let resolve_vars s = Str.global_replace (Str.regexp "\\$\\([0-9]+\\)") "p\\1" s -let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc n () = - Printf.fprintf oc "and parse_%s tokens () =\n" n; +let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc (n,ot) () = + let t = match ot with | None -> "_" | Some t -> t in + let type_annot = Printf.sprintf ": ((%s) * ((token*Lexing.position option) list)) res" t + in + Printf.fprintf oc "and parse_%s (tokens: ((token*Lexing.position option) list)) () %s=\n" n type_annot; Printf.fprintf oc " begin match tokens with\n"; List.iteri (fun i t -> @@ -41,48 +44,37 @@ let make_nt (table: string*string -> lltype list) (toks,nts,rules) oc n () = then Printf.fprintf oc " eat_%s tokens >>= fun (p%d, tokens) ->\n" t (i + 1) else Printf.fprintf oc " parse_%s tokens () >>= fun (p%d, tokens) ->\n" t (i+1)) pl; - Printf.fprintf oc "\n" ; - Printf.fprintf oc " let res =\n" ; - (match act with - | Some act -> Printf.fprintf oc " %s\n" (resolve_vars act) - | _ -> - Printf.fprintf oc " %s\n" (resolve_vars (default_action pl)) - ); - Printf.fprintf oc " in OK (res, tokens)\n" ; - Printf.fprintf oc "end\n"; + let act = match act with Some act -> act | _ -> default_action pl in + Printf.fprintf oc " let res = %s in\n" (resolve_vars act); + Printf.fprintf oc " OK (res, tokens)\n" ; + Printf.fprintf oc " end\n"; ) toks; let expected = List.filter (fun t -> List.length (table (n,t)) > 0) toks in - Printf.fprintf oc " | tokens -> \n"; - Printf.fprintf oc " let got,lexpos = match tokens with [] -> \"EOF\",None | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos) in Error (\n"; - Printf.fprintf oc " (match lexpos with \n"; - Printf.fprintf oc " | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos) \n" n; + Printf.fprintf oc " | tokens ->\n"; + Printf.fprintf oc " let got,lexpos =\n match tokens with\n [] -> \"EOF\",None\n | (symbol, lexpos) :: _ -> (string_of_symbol symbol, lexpos)\n in Error (\n"; + Printf.fprintf oc " (match lexpos with\n"; + Printf.fprintf oc " | Some lexpos -> Printf.sprintf \"At %%s, error while parsing %s\\n\" (string_of_position lexpos)\n" n; Printf.fprintf oc " | None -> Printf.sprintf \"Error while parsing %s\\n\" )^\n" n; - Printf.fprintf oc " Printf.sprintf \"Expected one of \"^\n"; - begin - match expected with - [] -> Printf.fprintf oc "Printf.sprintf \"{}\" ^\n" - | a::r -> - List.iteri (fun i t -> - Printf.fprintf oc "Printf.sprintf \"%s %%s\" (string_of_symbol default_%s)^\n" (if i = 0 then "{" else ",") t; - ) (a::r); - Printf.fprintf oc "Printf.sprintf \"}\" ^ \n" - end; - Printf.fprintf oc " Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ "; - Printf.fprintf oc " Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n"; + Printf.fprintf oc " Printf.sprintf \"Expected one of {%%s}\"\n"; + Printf.fprintf oc " (String.concat \", \" (List.map string_of_symbol [%s])) ^\n" + (String.concat ";" (List.map (fun s -> "default_"^s) expected)) ; + Printf.fprintf oc " Printf.sprintf \" but got '%%s' instead.\\n\" got\n ^ "; + Printf.fprintf oc " Printf.sprintf \" '%%s' \\n\" (String.concat \",\" (List.map (fun (x, _) -> string_of_symbol x) (List.take 10 tokens)))\n"; Printf.fprintf oc " )"; Printf.fprintf oc "\n end\n\n" let make_parser (table: string*string -> lltype list) (toks,nts,rules,mlcode) (typ: (tokent * string) list) + (nttyp: (nonterm * string) list) oc () = Stdlib.Option.iter (fun mlcode -> Printf.fprintf oc "\n\n%s\n\n" mlcode) mlcode; List.iter (fun t -> begin match List.assoc_opt t typ with | Some ty -> begin - Printf.fprintf oc "let is_%s = function \n" t; + Printf.fprintf oc "let is_%s = function\n" t; Printf.fprintf oc " | %s _ -> true\n" t; Printf.fprintf oc " | _ -> false\n"; @@ -96,7 +88,7 @@ let make_parser (table: string*string -> lltype list) ) end | None -> begin - Printf.fprintf oc "let is_%s = function \n" t; + Printf.fprintf oc "let is_%s = function\n" t; Printf.fprintf oc " | %s -> true\n" t; Printf.fprintf oc " | _ -> false\n"; Printf.fprintf oc "let default_%s = %s\n" t t @@ -104,23 +96,23 @@ let make_parser (table: string*string -> lltype list) end; ) toks; List.iter (fun t -> - Printf.fprintf oc "let eat_%s = function \n" t; + Printf.fprintf oc "let eat_%s = function\n" t; begin match List.assoc_opt t typ with | Some _ -> Printf.fprintf oc "| (%s(x),_) :: rtokens -> OK (x, rtokens)\n" t | None -> Printf.fprintf oc "| (%s,_) :: rtokens -> OK ((), rtokens)\n" t end; - Printf.fprintf oc "| (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\""; - Printf.fprintf oc " (string_of_position pos)"; - Printf.fprintf oc " (string_of_symbol default_%s)" t; - Printf.fprintf oc " (string_of_symbol x))"; - Printf.fprintf oc " | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\""; - Printf.fprintf oc " (string_of_symbol default_%s)" t; - Printf.fprintf oc " (string_of_symbol x))"; + Printf.fprintf oc "| (x,Some pos) :: _ -> Error (Printf.sprintf \"At position %%s, expected %%s, got %%s.\\n\"\n"; + Printf.fprintf oc " (string_of_position pos)\n"; + Printf.fprintf oc " (string_of_symbol default_%s)\n" t; + Printf.fprintf oc " (string_of_symbol x))\n"; + Printf.fprintf oc " | (x,None) :: _ -> Error (Printf.sprintf \"Expected %%s, got %%s.\\n\"\n"; + Printf.fprintf oc " (string_of_symbol default_%s)\n" t; + Printf.fprintf oc " (string_of_symbol x))\n"; Printf.fprintf oc " | _ -> Error (Printf.sprintf \"Expected %%s, got EOF.\\n\" (string_of_symbol default_%s))\n" t; ) toks; - Printf.fprintf oc "let rec ____unused = () \n"; - List.iter (fun n -> make_nt table (toks,nts,rules) oc n ()) nts + Printf.fprintf oc "let rec ____unused = ()\n"; + List.iter (fun n -> make_nt table (toks,nts,rules) oc (n, List.assoc_opt n nttyp) ()) nts let nts_ordered start (toks,nts,rules) = let nts = @@ -185,6 +177,11 @@ let _ = | None -> None | Some typ -> Some (t,typ) ) gram.tokens) + (List.filter_map (fun (t,o) -> + match o with + | None -> None + | Some typ -> Some (t,typ) + ) gram.nonterms) oc (); close_out oc ) diff --git a/expr_grammar_action.g b/expr_grammar_action.g index 52bbaf13f443e3a3a0b189366991a599eadd4159..9ab7ecd32a798aa929d04416aa9706f289d1ce20 100644 --- a/expr_grammar_action.g +++ b/expr_grammar_action.g @@ -2,7 +2,7 @@ tokens SYM_EOF SYM_IDENTIFIER<string> SYM_INTEGER<int> SYM_PLUS SYM_MINUS SYM_AS tokens SYM_LPARENTHESIS SYM_RPARENTHESIS SYM_LBRACE SYM_RBRACE tokens SYM_ASSIGN SYM_SEMICOLON SYM_RETURN SYM_IF SYM_WHILE SYM_ELSE SYM_COMMA SYM_PRINT tokens SYM_EQUALITY SYM_NOTEQ SYM_LT SYM_LEQ SYM_GT SYM_GEQ -non-terminals S INSTR INSTRS LINSTRS ELSE EXPR FACTOR +non-terminals S INSTR<tree> INSTRS<tree list> LINSTRS ELSE EXPR FACTOR non-terminals LPARAMS REST_PARAMS non-terminals IDENTIFIER INTEGER non-terminals FUNDEF FUNDEFS diff --git a/src/test_lexer.ml b/src/test_lexer.ml index 6e3adc92fd7602379603b4289325afde92c331f8..72ffda45d229eb9ab198c2de6c5e57ffd349eb2b 100644 --- a/src/test_lexer.ml +++ b/src/test_lexer.ml @@ -4,6 +4,33 @@ open Batteries open Utils open Symbols +let nfa_accepts (n: nfa) (w: char list) : bool = + let rec trav vis s = + if Set.mem s vis then vis + else let en = List.filter_map (fun (oa, n) -> if oa = None then Some n else None) (n.nfa_step s) in + List.fold_left trav (Set.add s vis) en in + let ec s = trav Set.empty s in + let ecs ls = Set.fold (fun q -> Set.union (ec q)) ls Set.empty in + + let rec walk (q: int set) (w: char list) = + let q = ecs q in + match w with + | [] -> Set.exists (fun q -> List.mem q (List.map fst n.nfa_final)) q + | c::w -> + let q' = + Set.fold Set.union (Set.map (fun q -> + (List.filter_map + (fun (cso,q') -> + match cso with + | None -> None + | Some cs -> if Set.mem c cs then Some q' else None + ) + (n.nfa_step q)) |> Set.of_list + ) q) Set.empty + + in walk q' w in + walk (Set.of_list n.nfa_initial) w + let () = let regexp_list = [ (keyword_regexp "while", fun s -> Some (SYM_WHILE)); @@ -110,5 +137,54 @@ let () = let table = Hashtbl.create 10 in build_dfa_table table n (dfa_initial_state n); - expect_set_set "dfa states" (Hashtbl.keys table |> Set.of_enum) (Set.of_list [Set.of_list [1;2;3]; Set.of_list [2;4]; Set.of_list [2]]) + expect_set_set "dfa states" (Hashtbl.keys table |> Set.of_enum) (Set.of_list [Set.of_list [1;2;3]; Set.of_list [2;4]; Set.of_list [2]]); + + let expect_nfa_accepts n s b = + let r = nfa_accepts n (char_list_of_string s) in + if r = b + then Printf.printf "[OK] nfa_accepts %s = %b\n" s r + else Printf.printf "[KO] nfa_accepts %s = %b\n" s r + in + Printf.printf "*** NFA n1 : 'hello'\n"; + let n1, f1 = nfa_of_regexp (keyword_regexp "hello") 1 (fun _ -> None) in + expect_nfa_accepts n1 "hello" true; + expect_nfa_accepts n1 "bonjour" false; + + Printf.printf "*** NFA n2 : 'bonjour'\n"; + let n2, f2 = nfa_of_regexp (keyword_regexp "bonjour") f1 (fun _ -> None) in + expect_nfa_accepts n2 "hello" false; + expect_nfa_accepts n2 "bonjour" true; + + Printf.printf "*** NFA n3 : n1 | n2\n"; + let n3 = alt_nfa n1 n2 in + expect_nfa_accepts n3 "hello" true; + expect_nfa_accepts n3 "bonjour" true; + expect_nfa_accepts n2 "buongiorno" false; + + Printf.printf "*** NFA n4 : n1 . n2 \n"; + let n4 = cat_nfa n1 n2 in + expect_nfa_accepts n4 "hello" false; + expect_nfa_accepts n4 "bonjour" false; + expect_nfa_accepts n4 "hellobonjour" true; + expect_nfa_accepts n4 "bonjourhello" false; + + Printf.printf "*** NFA n5 : n1* \n"; + let n5 = star_nfa n1 (fun _ -> None) in + expect_nfa_accepts n5 "" true; + expect_nfa_accepts n5 "hello" true; + expect_nfa_accepts n5 "hellohello" true; + expect_nfa_accepts n5 "hellobonjour" false; + + Printf.printf "*** NFA n6 : n3* \n"; + let n6 = star_nfa n3 (fun _ -> None) in + expect_nfa_accepts n6 "" true; + expect_nfa_accepts n6 "hello" true; + expect_nfa_accepts n6 "hellohello" true; + expect_nfa_accepts n6 "hellobonjour" true; + expect_nfa_accepts n6 "hellobonjourhello" true; + expect_nfa_accepts n6 "bonjourbonjourbonjourhello" true; + expect_nfa_accepts n6 "bonjlo" false; + + + ignore f2 diff --git a/tests/test.py b/tests/test.py index 91bf090cca70a4e8209ab6691bd7356f71552d46..6c0c90d0ba24dde6ce60a71ecb8498b21a164f2d 100755 --- a/tests/test.py +++ b/tests/test.py @@ -188,7 +188,10 @@ class CommandExecutor(Thread): icon_ko, self.filename, r['compstep'], err) - + elif r["compstep"] == "Parsing": + compstep_td = """ + <td class="good">{}</td> + """.format(icon_ok) elif r["compstep"] == "Lexing": expect_lex_file_name = self.filename + ".expect_lexer" out_lex_file_name = self.filename[:-2] + ".lex" @@ -321,7 +324,8 @@ def main(): exec_thread = CommandExecutor(fname, cmd, args.args, args.make_expect, # 1 colonne pour le lexer - len(args.passes) + 1) + # 1 colonne pour le parser + len(args.passes) + 2) threads.append(exec_thread) exec_thread.start() @@ -400,7 +404,7 @@ def main(): res_html.write(""" <table class="w3-table w3-striped w3-responsive"> <tr><th>File</th>""") - for pass_name in ["Lexer"] + args.passes: + for pass_name in ["Lexer","Parser"] + args.passes: res_html.write("<th style='transform: rotate(180deg); writing-mode: vertical-rl;'>{}</th>\n".format(pass_name)) res_html.write(""" </tr>