libqbe/tools/mgen/sexp.ml

293 lines
7.1 KiB
OCaml
Raw Normal View History

type pstate =
{ data: string
; line: int
; coln: int
; indx: int }
type perror =
{ error: string
; ps: pstate }
exception ParseError of perror
type 'a parser =
{ fn: 'r. pstate -> ('a -> pstate -> 'r) -> 'r }
let update_pos ps beg fin =
let l, c = (ref ps.line, ref ps.coln) in
for i = beg to fin - 1 do
if ps.data.[i] = '\n' then
(incr l; c := 0)
else
incr c
done;
{ ps with line = !l; coln = !c }
let pret (type a) (x: a): a parser =
let fn ps k = k x ps in { fn }
let pfail error: 'a parser =
let fn ps _ = raise (ParseError {error; ps})
in { fn }
let por: 'a parser -> 'a parser -> 'a parser =
fun p1 p2 ->
let fn ps k =
try p1.fn ps k with ParseError e1 ->
try p2.fn ps k with ParseError e2 ->
if e1.ps.indx > e2.ps.indx then
raise (ParseError e1)
else
raise (ParseError e2)
in { fn }
let pbind: 'a parser -> ('a -> 'b parser) -> 'b parser =
fun p1 p2 ->
let fn ps k =
p1.fn ps (fun x ps -> (p2 x).fn ps k)
in { fn }
(* handy for recursive rules *)
let papp p x = pbind (pret x) p
let psnd: 'a parser -> 'b parser -> 'b parser =
fun p1 p2 -> pbind p1 (fun _x -> p2)
let pfst: 'a parser -> 'b parser -> 'a parser =
fun p1 p2 -> pbind p1 (fun x -> psnd p2 (pret x))
module Infix = struct
let ( let* ) = pbind
let ( ||| ) = por
let ( |<< ) = pfst
let ( |>> ) = psnd
end
open Infix
let pre: ?what:string -> string -> string parser =
fun ?what re ->
let what =
match what with
| None -> Printf.sprintf "%S" re
| Some what -> what
and re = Str.regexp re in
let fn ps k =
if not (Str.string_match re ps.data ps.indx) then
(let error =
Printf.sprintf "expected to match %s" what in
raise (ParseError {error; ps}));
let ps =
let indx = Str.match_end () in
{ (update_pos ps ps.indx indx) with indx }
in
k (Str.matched_string ps.data) ps
in { fn }
let peoi: unit parser =
let fn ps k =
if ps.indx <> String.length ps.data then
raise (ParseError
{ error = "expected end of input"; ps });
k () ps
in { fn }
let pws = pre "[ \r\n\t*]*"
let pws1 = pre "[ \r\n\t*]+"
let pthen p1 p2 =
let* x1 = p1 in
let* x2 = p2 in
pret (x1, x2)
let rec plist_tail: 'a parser -> ('a list) parser =
fun pitem ->
(pws |>> pre ")" |>> pret []) |||
(let* itm = pitem in
let* itms = plist_tail pitem in
pret (itm :: itms))
let plist pitem =
pws |>> pre ~what:"a list" "("
|>> plist_tail pitem
let plist1p p1 pitem =
pws |>> pre ~what:"a list" "("
|>> pthen p1 (plist_tail pitem)
let ppair p1 p2 =
pws |>> pre ~what:"a pair" "("
|>> pthen p1 p2 |<< pws |<< pre ")"
let run_parser p s =
let ps =
{data = s; line = 1; coln = 0; indx = 0} in
try `Ok (p.fn ps (fun res _ps -> res))
with ParseError e ->
let rec bol i =
if i = 0 then i else
if i < String.length s && s.[i] = '\n'
then i+1 (* XXX BUG *)
else bol (i-1)
in
let rec eol i =
if i = String.length s then i else
if s.[i] = '\n' then i else
eol (i+1)
in
let bol = bol e.ps.indx in
let eol = eol e.ps.indx in
(*
Printf.eprintf "bol:%d eol:%d indx:%d len:%d\n"
bol eol e.ps.indx (String.length s); (* XXX debug *)
*)
let lines =
String.split_on_char '\n'
(String.sub s bol (eol - bol))
in
let nl = List.length lines in
let caret = ref (e.ps.indx - bol) in
let msg = ref [] in
let pfx = " > " in
lines |> List.iteri (fun ln l ->
if ln <> nl - 1 || l <> "" then begin
let ll = String.length l + 1 in
msg := (pfx ^ l ^ "\n") :: !msg;
if !caret <= ll then begin
let pad = String.make !caret ' ' in
msg := (pfx ^ pad ^ "^\n") :: !msg;
end;
caret := !caret - ll;
end;
);
`Error
( e.ps, e.error
, String.concat "" (List.rev !msg) )
(* ---------------------------------------- *)
(* pattern parsing *)
(* ---------------------------------------- *)
(* Example syntax:
(with-vars (a b c d)
(patterns
(ob (add (tmp a) (con d)))
(bsm (add (tmp b) (mul (tmp m) (con 2 4 8)))) ))
*)
open Match
let pint64 =
let* s = pre "[-]?[0-9_]+" in
pret (Int64.of_string s)
let pid =
pre ~what:"an identifer"
"[a-zA-Z][a-zA-Z0-9_]*"
let pop_base =
let sob, obs = show_op_base, op_bases in
let* s = pre ~what:"an operator"
(String.concat "\\|" (List.map sob obs))
in pret (List.find (fun o -> s = sob o) obs)
let pop = let* ob = pop_base in pret (Kl, ob)
let rec ppat vs =
let pcons_tail =
let* cs = plist_tail (pws1 |>> pint64) in
match cs with
| [] -> pret [AnyCon]
| _ -> pret (List.map (fun c -> Con c) cs)
in
let pvar =
let* id = pid in
if not (List.mem id vs) then
pfail ("unbound variable: " ^ id)
else
pret id
in
pws |>> (
( let* c = pint64 in pret [Atm (Con c)] )
|||
( pre "(con)" |>> pret [Atm AnyCon] ) |||
( let* cs = pre "(con" |>> pcons_tail in
pret (List.map (fun c -> Atm c) cs) ) |||
( let* v = pre "(con" |>> pws1 |>> pvar in
let* cs = pcons_tail in
pret (List.map (fun c -> Var (v, c)) cs) )
|||
( pre "(tmp)" |>> pret [Atm Tmp] ) |||
( let* v = pre "(tmp" |>> pws1 |>> pvar in
pws |>> pre ")" |>> pret [Var (v, Tmp)] )
|||
( let* (op, rands) =
plist1p (pws |>> pop) (papp ppat vs) in
let nrands = List.length rands in
if nrands < 2 then
pfail ( "binary op requires at least"
^ " two arguments" )
else
let mk x y = Bnr (op, x, y) in
pret
(products rands []
(fun rands pats ->
(* construct a left-heavy tree *)
let r0 = List.hd rands in
let rs = List.tl rands in
List.fold_left mk r0 rs :: pats)) )
)
let pwith_vars ?(vs = []) p =
( let* vs =
pws |>> pre "(with-vars" |>> pws |>>
plist (pws |>> pid)
in pws |>> p vs |<< pws |<< pre ")" )
||| p vs
let ppats =
pwith_vars @@ fun vs ->
pre "(patterns" |>> plist_tail
(pwith_vars ~vs @@ fun vs ->
let* n, ps = ppair pid (ppat vs) in
pret (n, vs, ps))
(* ---------------------------------------- *)
(* tests *)
(* ---------------------------------------- *)
let () =
if false then
let show_patterns ps =
"[" ^ String.concat "; "
(List.map show_pattern ps) ^ "]"
in
let pat s =
Printf.printf "parse %s = " s;
let vars =
[ "foobar"; "a"; "b"; "d"
; "m"; "s"; "x" ]
in
match run_parser (ppat vars) s with
| `Ok p ->
Printf.printf "%s\n" (show_patterns p)
| `Error (_, e, _) ->
Printf.printf "ERROR: %s\n" e
in
pat "42";
pat "(tmp)";
pat "(tmp foobar)";
pat "(con)";
pat "(con 1 2 3)";
pat "(con x 1 2 3)";
pat "(add 1 2)";
pat "(add 1 2 3 4)";
pat "(sub 1 2)";
pat "(sub 1 2 3)";
pat "(tmp unbound_var)";
pat "(add 0)";
pat "(add 1 (add 2 3))";
pat "(add (tmp a) (con d))";
pat "(add (tmp b) (mul (tmp m) (con s 2 4 8)))";
pat "(add (con 1 2) (con 3 4))";
()