The algorithm to generate matchers took a long time to be discovered and refined to its present version. The rest of mgen is mostly boring engineering. Extensive fuzzing ensures that the two core components of mgen (tables and matchers generation) are correct on specific problem instances.
214 lines
5.6 KiB
OCaml
214 lines
5.6 KiB
OCaml
open Cgen
|
|
open Match
|
|
|
|
let mgen ~verbose ~fuzz path lofs input oc =
|
|
let info ?(level = 1) fmt =
|
|
if level <= verbose then
|
|
Printf.eprintf fmt
|
|
else
|
|
Printf.ifprintf stdout fmt
|
|
in
|
|
|
|
let rules =
|
|
match Sexp.(run_parser ppats) input with
|
|
| `Error (ps, err, loc) ->
|
|
Printf.eprintf "%s:%d:%d %s\n"
|
|
path (lofs + ps.Sexp.line) ps.Sexp.coln err;
|
|
Printf.eprintf "%s" loc;
|
|
exit 1
|
|
| `Ok rules -> rules
|
|
in
|
|
|
|
info "adding ac variants...%!";
|
|
let nparsed =
|
|
List.fold_left
|
|
(fun npats (_, _, ps) ->
|
|
npats + List.length ps)
|
|
0 rules
|
|
in
|
|
let varsmap = Hashtbl.create 10 in
|
|
let rules =
|
|
List.concat_map (fun (name, vars, patterns) ->
|
|
(try assert (Hashtbl.find varsmap name = vars)
|
|
with Not_found -> ());
|
|
Hashtbl.replace varsmap name vars;
|
|
List.map
|
|
(fun pattern -> {name; vars; pattern})
|
|
(List.concat_map ac_equiv patterns)
|
|
) rules
|
|
in
|
|
info " %d -> %d patterns\n"
|
|
nparsed (List.length rules);
|
|
|
|
let rnames =
|
|
setify (List.map (fun r -> r.name) rules) in
|
|
|
|
info "generating match tables...%!";
|
|
let sa, am, sm = generate_table rules in
|
|
let numbr = make_numberer sa am sm in
|
|
info " %d states, %d rules\n"
|
|
(Array.length sa) (StateMap.cardinal sm);
|
|
if verbose >= 2 then begin
|
|
info "-------------\nstates:\n";
|
|
Array.iteri (fun i s ->
|
|
info " state %d: %s\n"
|
|
i (show_pattern s.seen)) sa;
|
|
info "-------------\nstatemap:\n";
|
|
Test.print_sm stderr sm;
|
|
info "-------------\n";
|
|
end;
|
|
|
|
info "generating matchers...\n";
|
|
let matchers =
|
|
List.map (fun rname ->
|
|
info "+ %s...%!" rname;
|
|
let m = lr_matcher sm sa rules rname in
|
|
let vars = Hashtbl.find varsmap rname in
|
|
info " %d nodes\n" (Action.size m);
|
|
info ~level:2 " -------------\n";
|
|
info ~level:2 " automaton:\n";
|
|
info ~level:2 "%s\n"
|
|
(Format.asprintf " @[%a@]" Action.pp m);
|
|
info ~level:2 " ----------\n";
|
|
(vars, rname, m)
|
|
) rnames
|
|
in
|
|
|
|
if fuzz then begin
|
|
info ~level:0 "fuzzing statemap...\n";
|
|
let tp = Fuzz.fuzz_numberer rules numbr in
|
|
info ~level:0 "testing %d patterns...\n"
|
|
(List.length rules);
|
|
Fuzz.test_matchers tp numbr rules
|
|
end;
|
|
|
|
info "emitting C...\n";
|
|
flush stderr;
|
|
|
|
let cgopts =
|
|
{ pfx = ""; static = true; oc = oc } in
|
|
emit_c cgopts numbr;
|
|
emit_matchers cgopts matchers;
|
|
|
|
()
|
|
|
|
let read_all ic =
|
|
let bufsz = 4096 in
|
|
let buf = Bytes.create bufsz in
|
|
let data = Buffer.create bufsz in
|
|
let read = ref 0 in
|
|
while
|
|
read := input ic buf 0 bufsz;
|
|
!read <> 0
|
|
do
|
|
Buffer.add_subbytes data buf 0 !read
|
|
done;
|
|
Buffer.contents data
|
|
|
|
let split_c src =
|
|
let begin_re, eoc_re, end_re =
|
|
let re = Str.regexp in
|
|
( re "mgen generated code"
|
|
, re "\\*/"
|
|
, re "end of generated code" )
|
|
in
|
|
let str_match regexp str =
|
|
try
|
|
let _: int =
|
|
Str.search_forward regexp str 0
|
|
in true
|
|
with Not_found -> false
|
|
in
|
|
|
|
let rec go st lofs pfx rules lines =
|
|
let line, lines =
|
|
match lines with
|
|
| [] ->
|
|
failwith (
|
|
match st with
|
|
| `Prefix -> "could not find mgen section"
|
|
| `Rules -> "mgen rules not terminated"
|
|
| `Skip -> "mgen section not terminated"
|
|
)
|
|
| l :: ls -> (l, ls)
|
|
in
|
|
match st with
|
|
| `Prefix ->
|
|
let pfx = line :: pfx in
|
|
if str_match begin_re line
|
|
then
|
|
let lofs = List.length pfx in
|
|
go `Rules lofs pfx rules lines
|
|
else go `Prefix 0 pfx rules lines
|
|
| `Rules ->
|
|
let pfx = line :: pfx in
|
|
if str_match eoc_re line
|
|
then go `Skip lofs pfx rules lines
|
|
else go `Rules lofs pfx (line :: rules) lines
|
|
| `Skip ->
|
|
if str_match end_re line then
|
|
let join = String.concat "\n" in
|
|
let pfx = join (List.rev pfx) ^ "\n\n"
|
|
and rules = join (List.rev rules)
|
|
and sfx = join (line :: lines)
|
|
in (lofs, pfx, rules, sfx)
|
|
else go `Skip lofs pfx rules lines
|
|
in
|
|
|
|
let lines = String.split_on_char '\n' src in
|
|
go `Prefix 0 [] [] lines
|
|
|
|
let () =
|
|
let usage_msg =
|
|
"mgen [--fuzz] [--verbose <N>] <file>" in
|
|
|
|
let fuzz_arg = ref false in
|
|
let verbose_arg = ref 0 in
|
|
let input_paths = ref [] in
|
|
|
|
let anon_fun filename =
|
|
input_paths := filename :: !input_paths in
|
|
|
|
let speclist =
|
|
[ ( "--fuzz", Arg.Set fuzz_arg
|
|
, " Fuzz tables and matchers" )
|
|
; ( "--verbose", Arg.Set_int verbose_arg
|
|
, "<N> Set verbosity level" )
|
|
; ( "--", Arg.Rest_all (List.iter anon_fun)
|
|
, " Stop argument parsing" ) ]
|
|
in
|
|
Arg.parse speclist anon_fun usage_msg;
|
|
|
|
let input_paths = !input_paths in
|
|
let verbose = !verbose_arg in
|
|
let fuzz = !fuzz_arg in
|
|
let input_path, input =
|
|
match input_paths with
|
|
| ["-"] -> ("-", read_all stdin)
|
|
| [path] -> (path, read_all (open_in path))
|
|
| _ ->
|
|
Printf.eprintf
|
|
"%s: single input file expected\n"
|
|
Sys.argv.(0);
|
|
Arg.usage speclist usage_msg; exit 1
|
|
in
|
|
let mgen = mgen ~verbose ~fuzz in
|
|
|
|
if Str.last_chars input_path 2 <> ".c"
|
|
then mgen input_path 0 input stdout
|
|
else
|
|
let tmp_path = input_path ^ ".tmp" in
|
|
Fun.protect
|
|
~finally:(fun () ->
|
|
try Sys.remove tmp_path with _ -> ())
|
|
(fun () ->
|
|
let lofs, pfx, rules, sfx = split_c input in
|
|
let oc = open_out tmp_path in
|
|
output_string oc pfx;
|
|
mgen input_path lofs rules oc;
|
|
output_string oc sfx;
|
|
close_out oc;
|
|
Sys.rename tmp_path input_path;
|
|
());
|
|
|
|
()
|