2020-02-20 12:43:52 +03:00
|
|
|
-- Ostap.
|
|
|
|
|
-- (C) Dmitry Boulytchev, JetBrains Research, St. Petersburg State University, 2020
|
|
|
|
|
--
|
|
|
|
|
-- This unit provides an implementation of monadic parser combinators in CPS with
|
|
|
|
|
-- memoization.
|
|
|
|
|
|
2020-01-20 03:38:43 +03:00
|
|
|
import List;
|
|
|
|
|
import Collection;
|
|
|
|
|
import Ref;
|
|
|
|
|
import Fun;
|
2020-01-21 22:03:11 +03:00
|
|
|
import Matcher;
|
2022-01-31 23:46:18 +03:00
|
|
|
import Data;
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2021-01-31 22:25:31 +03:00
|
|
|
var tab, hct, restab, log = false;
|
2020-01-30 23:36:15 +03:00
|
|
|
|
|
|
|
|
public fun logOn () {
|
|
|
|
|
log := true
|
|
|
|
|
}
|
2020-01-21 22:03:11 +03:00
|
|
|
|
|
|
|
|
public fun initOstap () {
|
2020-08-06 14:56:41 +03:00
|
|
|
tab := ref (emptyHashTab (1024, hash, compare));
|
2021-01-31 22:57:12 +03:00
|
|
|
restab := emptyCustomMemo (fun (x) {case x of #str -> true | _ -> false esac}, compare);
|
2020-01-31 01:30:03 +03:00
|
|
|
hct := emptyMemo ()
|
2020-01-21 22:03:11 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-22 22:30:34 +03:00
|
|
|
public fun memo (f) {
|
2022-01-31 23:46:18 +03:00
|
|
|
-- f := lookupMemo (hct, f);
|
2020-01-21 22:03:11 +03:00
|
|
|
|
2020-03-13 19:41:14 +03:00
|
|
|
if log then printf ("Memoizing %x=%s\n", f, f.string) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
|
|
|
|
|
case findHashTab (deref (tab), f) of
|
2020-01-31 01:30:03 +03:00
|
|
|
None -> if log then printf ("new table...\n") fi;
|
2020-08-06 14:56:41 +03:00
|
|
|
tab ::= addHashTab (deref (tab), f, ref (emptyMap (compare)))
|
2020-03-13 19:41:14 +03:00
|
|
|
|
2020-01-31 01:30:03 +03:00
|
|
|
| Some (tt) -> skip
|
2020-01-21 22:03:11 +03:00
|
|
|
esac;
|
|
|
|
|
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var t =
|
2020-03-13 19:41:14 +03:00
|
|
|
case findHashTab (deref (tab), f) of
|
|
|
|
|
Some (t) -> t
|
|
|
|
|
esac;
|
2020-01-30 23:36:15 +03:00
|
|
|
if log then printf ("Applying memoized parser to %s\n", s.string) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
case findMap (deref (t), s) of
|
|
|
|
|
None ->
|
2020-09-01 06:20:39 +03:00
|
|
|
t ::= addMap (deref (t), s, [addSet (emptySet (compare), k), emptySet (fun (r1, r2) {
|
|
|
|
|
case [r1, r2] of
|
|
|
|
|
[Fail (_, _, _), Fail (_, _, _)] -> 0
|
|
|
|
|
| _ -> compare (r1, r2)
|
|
|
|
|
esac
|
|
|
|
|
})]);
|
2020-01-21 22:03:11 +03:00
|
|
|
f (fun (r) {
|
2020-01-31 01:30:03 +03:00
|
|
|
r := lookupMemo (restab, r);
|
2020-01-30 23:36:15 +03:00
|
|
|
if log then printf ("Running continuation with result %s\n", r.string) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
case findMap (deref (t), s) of
|
2020-08-27 07:24:46 +03:00
|
|
|
Some ([ks, rs]) ->
|
2020-01-21 22:03:11 +03:00
|
|
|
if memSet (rs, r)
|
|
|
|
|
then skip
|
|
|
|
|
else
|
|
|
|
|
t ::= addMap (deref (t), s, [ks, addSet (rs, r)]);
|
|
|
|
|
iterSet (fun (k) {k (r)}, ks)
|
|
|
|
|
fi
|
|
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
(s)
|
|
|
|
|
| Some ([ks, rs]) ->
|
|
|
|
|
t ::= addMap (deref (t), s, [addSet (ks, k), rs]);
|
|
|
|
|
iterSet (k, rs)
|
2020-01-20 03:38:43 +03:00
|
|
|
esac
|
|
|
|
|
}
|
2020-01-21 22:03:11 +03:00
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun token (x) {
|
2020-03-08 00:57:25 +03:00
|
|
|
case x of
|
2021-01-31 22:57:12 +03:00
|
|
|
#str -> memo $ fun (k) {fun (s) {k $ matchString (s, x)}}
|
|
|
|
|
| _ -> memo $ fun (k) {fun (s) {k $ matchRegexp (s, x)}}
|
2020-03-08 00:57:25 +03:00
|
|
|
esac
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-04-11 21:09:51 +03:00
|
|
|
public fun loc (k) {
|
|
|
|
|
fun (s) {
|
|
|
|
|
k $ Succ ([s.getLine, s.getCol], s)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun eof (k) {
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (s) {
|
2020-03-13 19:41:14 +03:00
|
|
|
k (endOfMatcher (s))
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun empty (k) {
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (s) {k (Succ ({}, s))}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun alt (a, b) {
|
2020-03-13 19:41:14 +03:00
|
|
|
memo $
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
2020-01-30 23:36:15 +03:00
|
|
|
if log then printf ("Running alt at %s\n", s.string) fi;
|
2020-01-20 03:38:43 +03:00
|
|
|
a (k) (s);
|
|
|
|
|
b (k) (s)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun seq (a, b) {
|
2020-03-13 19:41:14 +03:00
|
|
|
memo $
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
2020-01-30 23:36:15 +03:00
|
|
|
if log then printf ("Running seq at %s\n", s.string) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
a (fun (ar) {
|
2020-01-30 23:36:15 +03:00
|
|
|
case ar of
|
|
|
|
|
Succ (x, s) -> b (x) (k) (s)
|
|
|
|
|
| _ -> k (ar)
|
|
|
|
|
esac
|
2020-01-21 22:03:11 +03:00
|
|
|
}) (s)
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public infixr | before !! (a, b) {alt (a, b)}
|
|
|
|
|
public infixr |> after | (a, b) {seq (a, b)}
|
|
|
|
|
|
|
|
|
|
public infix @ at * (a, f) {
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
2020-01-21 22:03:11 +03:00
|
|
|
a (fun (x) {k (case x of
|
|
|
|
|
Succ (x, s) -> Succ (f (x), s)
|
|
|
|
|
| _ -> x
|
|
|
|
|
esac)}) (s)
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-31 23:46:18 +03:00
|
|
|
public infix @@ at * ([name, a], f) {
|
|
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
|
|
|
|
var aa =
|
|
|
|
|
a (fun (x) {k (case x of
|
|
|
|
|
Succ (x, s) -> Succ (f (x), s)
|
|
|
|
|
| _ -> x
|
|
|
|
|
esac)});
|
|
|
|
|
-- printf ("aa=%s\n", aa.string);
|
|
|
|
|
-- printf ("@@: %s\n", name);
|
|
|
|
|
aa (s)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun lift (f) {
|
|
|
|
|
fun (x) {f}
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun bypass (f) {
|
|
|
|
|
fun (x) {f @ lift (x)}
|
|
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun rep0 (a) {
|
2020-03-13 19:41:14 +03:00
|
|
|
empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}}
|
2020-01-21 22:03:11 +03:00
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-03-13 19:41:14 +03:00
|
|
|
public fun rep (a) {
|
|
|
|
|
a |> (fun (x) {rep0 (a) @ fun (as) {x : as}})
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun listBy (item, sep) {
|
|
|
|
|
item |> fun (i) {rep0 (sep |> lift (item)) @ fun (is) {i : is}}
|
|
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun list0By (item, sep) {
|
|
|
|
|
empty @ lift ({}) | listBy (item, sep)
|
|
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun list (item) {
|
|
|
|
|
listBy (item, token (","))
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun list0 (item) {
|
|
|
|
|
list0By (item, token (","))
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun observe (name, f) {
|
2020-01-20 03:38:43 +03:00
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
|
|
|
|
printf ("%s at %s\n", name, s.string);
|
|
|
|
|
f (k)(s)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-04 02:45:34 +03:00
|
|
|
public fun showStream (name) {
|
|
|
|
|
fun (k) {
|
|
|
|
|
fun (s) {
|
|
|
|
|
printf ("%s: %s\n", name, showMatcher (s));
|
|
|
|
|
k (Succ ({}, s))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-22 22:30:34 +03:00
|
|
|
fun createResult () {
|
2021-01-31 22:25:31 +03:00
|
|
|
var errors = ref ({}),
|
2020-01-21 22:03:11 +03:00
|
|
|
line = ref (0),
|
|
|
|
|
col = ref (0),
|
|
|
|
|
value = ref ({}),
|
|
|
|
|
hasError = ref (false),
|
|
|
|
|
hasValue = ref (false);
|
|
|
|
|
|
|
|
|
|
fun k (x) {
|
2020-05-04 02:45:34 +03:00
|
|
|
if log then printf ("Result: %s\n", x.string) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
case x of
|
2021-01-31 21:07:17 +03:00
|
|
|
Succ (v, s) ->
|
2020-05-04 02:45:34 +03:00
|
|
|
if log then printf ("Result stream: %s\n", showMatcher (s)) fi;
|
2020-01-21 22:03:11 +03:00
|
|
|
if deref (hasValue)
|
2021-01-31 21:07:17 +03:00
|
|
|
then failure (sprintf ("Ostap: ambiguous parsing (%s vs. %s)", deref (value).string, v.string))
|
2020-01-21 22:03:11 +03:00
|
|
|
else
|
|
|
|
|
hasValue ::= true;
|
2021-01-31 21:07:17 +03:00
|
|
|
value ::= v
|
2020-01-21 22:03:11 +03:00
|
|
|
fi
|
|
|
|
|
| Fail (err, l, c) ->
|
|
|
|
|
if deref (hasError)
|
|
|
|
|
then
|
2021-01-31 22:25:31 +03:00
|
|
|
var c = compare ([line, col], [l, c]);
|
2020-01-21 22:03:11 +03:00
|
|
|
|
|
|
|
|
if c == 0 then errors ::= err : deref (errors)
|
2021-01-31 22:25:31 +03:00
|
|
|
elif c < 0 then errors ::= {err}; line ::= l; col ::= c
|
2020-01-21 22:03:11 +03:00
|
|
|
fi
|
|
|
|
|
else
|
|
|
|
|
hasError ::= true;
|
2021-01-31 22:25:31 +03:00
|
|
|
errors ::= {err};
|
2020-01-21 22:03:11 +03:00
|
|
|
line ::= l;
|
|
|
|
|
col ::= c
|
|
|
|
|
fi
|
|
|
|
|
esac
|
|
|
|
|
}
|
2020-01-20 03:38:43 +03:00
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
fun get () {
|
|
|
|
|
if deref (hasValue) then Succ (deref (value))
|
|
|
|
|
elif deref (hasError) then Fail (deref (errors), deref (line), deref (col))
|
2020-01-30 23:36:15 +03:00
|
|
|
else failure ("Ostap::createAcceptor::get: nothing to return\n")
|
2020-01-21 22:03:11 +03:00
|
|
|
fi
|
2020-01-20 03:38:43 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
[k, get]
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-22 22:30:34 +03:00
|
|
|
fun k (acc) {
|
2020-01-21 22:03:11 +03:00
|
|
|
acc [0]
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-22 22:30:34 +03:00
|
|
|
fun result (acc) {
|
2020-01-21 22:03:11 +03:00
|
|
|
acc [1] ()
|
|
|
|
|
}
|
2020-01-22 22:30:34 +03:00
|
|
|
|
|
|
|
|
public fun parse (p, m) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var acc = createResult ();
|
2020-01-22 22:30:34 +03:00
|
|
|
|
|
|
|
|
p (acc.k) (m);
|
|
|
|
|
|
|
|
|
|
acc.result
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public fun parseString (p, s) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var acc = createResult ();
|
2020-01-22 22:30:34 +03:00
|
|
|
|
|
|
|
|
p (acc.k) (initMatcher (s));
|
|
|
|
|
|
|
|
|
|
acc.result
|
2020-01-26 06:06:14 +03:00
|
|
|
}
|
|
|
|
|
|
2020-04-11 21:09:51 +03:00
|
|
|
public fun left (op, f) {
|
2020-02-20 12:43:52 +03:00
|
|
|
fun (c, x) {
|
|
|
|
|
fun (y) {
|
2020-04-11 21:09:51 +03:00
|
|
|
f (c (x), op, y)
|
2020-02-20 12:43:52 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-04-11 21:09:51 +03:00
|
|
|
public fun right (op, f) {
|
2020-02-20 12:43:52 +03:00
|
|
|
fun (c, x) {
|
|
|
|
|
fun (y) {
|
2020-04-11 21:09:51 +03:00
|
|
|
c (f (x, op, y))
|
2020-02-20 12:43:52 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
--- ops -> fun (x, y) {x `op` y}
|
|
|
|
|
fun altl (level) {
|
|
|
|
|
case level of
|
|
|
|
|
[assoc, ps] ->
|
2021-01-31 22:25:31 +03:00
|
|
|
var assfun = case assoc of Left -> left | Right -> right | Nona -> left esac;
|
2020-02-20 12:43:52 +03:00
|
|
|
case map (fun (p) {
|
|
|
|
|
case p of
|
2020-04-11 21:09:51 +03:00
|
|
|
[op, sema] -> op @ fun (op) {assfun (op, sema)}
|
2020-02-20 12:43:52 +03:00
|
|
|
esac
|
|
|
|
|
}, ps) of
|
|
|
|
|
p : ps -> foldl (infix |, p, ps)
|
|
|
|
|
esac
|
|
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public fun expr (ops, opnd) {
|
|
|
|
|
fun inner (ops) {
|
|
|
|
|
case ops of
|
|
|
|
|
{} -> fun (c) {opnd @ c}
|
|
|
|
|
| level : tl ->
|
2021-01-31 22:25:31 +03:00
|
|
|
var lops = altl (level),
|
2022-01-31 23:46:18 +03:00
|
|
|
next = inner (tl);
|
2020-02-20 12:43:52 +03:00
|
|
|
|
|
|
|
|
case level.fst of
|
|
|
|
|
Nona ->
|
|
|
|
|
fun this (c) {
|
|
|
|
|
next (id) |> fun (l) {lops |> fun (op) {next (id) @ fun (r) {c (op)(id, l)(r)}}}
|
|
|
|
|
| next (id) @ c
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this
|
|
|
|
|
|
|
|
|
|
| _ ->
|
|
|
|
|
fun this (c) {
|
|
|
|
|
next (id) |> fun (l) {lops |> fun (op) {this (op (c, l))}}
|
|
|
|
|
| next (id) @ c
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this
|
|
|
|
|
esac
|
|
|
|
|
esac
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-04 17:58:26 +03:00
|
|
|
inner (ops) (id)
|
2020-02-20 12:43:52 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-30 23:36:15 +03:00
|
|
|
initOstap ()
|