libqbe/tools/mgen/main.ml
Quentin Carbonneaux a609527752 mgen: match automatons and C generation
The algorithm to generate matchers
took a long time to be discovered
and refined to its present version.
The rest of mgen is mostly boring
engineering.

Extensive fuzzing ensures that the
two core components of mgen (tables
and matchers generation) are correct
on specific problem instances.
2024-04-09 21:45:42 +02:00

214 lines
5.6 KiB
OCaml

open Cgen
open Match
let mgen ~verbose ~fuzz path lofs input oc =
let info ?(level = 1) fmt =
if level <= verbose then
Printf.eprintf fmt
else
Printf.ifprintf stdout fmt
in
let rules =
match Sexp.(run_parser ppats) input with
| `Error (ps, err, loc) ->
Printf.eprintf "%s:%d:%d %s\n"
path (lofs + ps.Sexp.line) ps.Sexp.coln err;
Printf.eprintf "%s" loc;
exit 1
| `Ok rules -> rules
in
info "adding ac variants...%!";
let nparsed =
List.fold_left
(fun npats (_, _, ps) ->
npats + List.length ps)
0 rules
in
let varsmap = Hashtbl.create 10 in
let rules =
List.concat_map (fun (name, vars, patterns) ->
(try assert (Hashtbl.find varsmap name = vars)
with Not_found -> ());
Hashtbl.replace varsmap name vars;
List.map
(fun pattern -> {name; vars; pattern})
(List.concat_map ac_equiv patterns)
) rules
in
info " %d -> %d patterns\n"
nparsed (List.length rules);
let rnames =
setify (List.map (fun r -> r.name) rules) in
info "generating match tables...%!";
let sa, am, sm = generate_table rules in
let numbr = make_numberer sa am sm in
info " %d states, %d rules\n"
(Array.length sa) (StateMap.cardinal sm);
if verbose >= 2 then begin
info "-------------\nstates:\n";
Array.iteri (fun i s ->
info " state %d: %s\n"
i (show_pattern s.seen)) sa;
info "-------------\nstatemap:\n";
Test.print_sm stderr sm;
info "-------------\n";
end;
info "generating matchers...\n";
let matchers =
List.map (fun rname ->
info "+ %s...%!" rname;
let m = lr_matcher sm sa rules rname in
let vars = Hashtbl.find varsmap rname in
info " %d nodes\n" (Action.size m);
info ~level:2 " -------------\n";
info ~level:2 " automaton:\n";
info ~level:2 "%s\n"
(Format.asprintf " @[%a@]" Action.pp m);
info ~level:2 " ----------\n";
(vars, rname, m)
) rnames
in
if fuzz then begin
info ~level:0 "fuzzing statemap...\n";
let tp = Fuzz.fuzz_numberer rules numbr in
info ~level:0 "testing %d patterns...\n"
(List.length rules);
Fuzz.test_matchers tp numbr rules
end;
info "emitting C...\n";
flush stderr;
let cgopts =
{ pfx = ""; static = true; oc = oc } in
emit_c cgopts numbr;
emit_matchers cgopts matchers;
()
let read_all ic =
let bufsz = 4096 in
let buf = Bytes.create bufsz in
let data = Buffer.create bufsz in
let read = ref 0 in
while
read := input ic buf 0 bufsz;
!read <> 0
do
Buffer.add_subbytes data buf 0 !read
done;
Buffer.contents data
let split_c src =
let begin_re, eoc_re, end_re =
let re = Str.regexp in
( re "mgen generated code"
, re "\\*/"
, re "end of generated code" )
in
let str_match regexp str =
try
let _: int =
Str.search_forward regexp str 0
in true
with Not_found -> false
in
let rec go st lofs pfx rules lines =
let line, lines =
match lines with
| [] ->
failwith (
match st with
| `Prefix -> "could not find mgen section"
| `Rules -> "mgen rules not terminated"
| `Skip -> "mgen section not terminated"
)
| l :: ls -> (l, ls)
in
match st with
| `Prefix ->
let pfx = line :: pfx in
if str_match begin_re line
then
let lofs = List.length pfx in
go `Rules lofs pfx rules lines
else go `Prefix 0 pfx rules lines
| `Rules ->
let pfx = line :: pfx in
if str_match eoc_re line
then go `Skip lofs pfx rules lines
else go `Rules lofs pfx (line :: rules) lines
| `Skip ->
if str_match end_re line then
let join = String.concat "\n" in
let pfx = join (List.rev pfx) ^ "\n\n"
and rules = join (List.rev rules)
and sfx = join (line :: lines)
in (lofs, pfx, rules, sfx)
else go `Skip lofs pfx rules lines
in
let lines = String.split_on_char '\n' src in
go `Prefix 0 [] [] lines
let () =
let usage_msg =
"mgen [--fuzz] [--verbose <N>] <file>" in
let fuzz_arg = ref false in
let verbose_arg = ref 0 in
let input_paths = ref [] in
let anon_fun filename =
input_paths := filename :: !input_paths in
let speclist =
[ ( "--fuzz", Arg.Set fuzz_arg
, " Fuzz tables and matchers" )
; ( "--verbose", Arg.Set_int verbose_arg
, "<N> Set verbosity level" )
; ( "--", Arg.Rest_all (List.iter anon_fun)
, " Stop argument parsing" ) ]
in
Arg.parse speclist anon_fun usage_msg;
let input_paths = !input_paths in
let verbose = !verbose_arg in
let fuzz = !fuzz_arg in
let input_path, input =
match input_paths with
| ["-"] -> ("-", read_all stdin)
| [path] -> (path, read_all (open_in path))
| _ ->
Printf.eprintf
"%s: single input file expected\n"
Sys.argv.(0);
Arg.usage speclist usage_msg; exit 1
in
let mgen = mgen ~verbose ~fuzz in
if Str.last_chars input_path 2 <> ".c"
then mgen input_path 0 input stdout
else
let tmp_path = input_path ^ ".tmp" in
Fun.protect
~finally:(fun () ->
try Sys.remove tmp_path with _ -> ())
(fun () ->
let lofs, pfx, rules, sfx = split_c input in
let oc = open_out tmp_path in
output_string oc pfx;
mgen input_path lofs rules oc;
output_string oc sfx;
close_out oc;
Sys.rename tmp_path input_path;
());
()