mirror of
https://github.com/ProgramSnail/Lama.git
synced 2025-12-06 06:48:48 +00:00
297 lines
6.1 KiB
Text
297 lines
6.1 KiB
Text
-- Ostap.
|
|
-- (C) Dmitry Boulytchev, JetBrains Research, St. Petersburg State University, 2020
|
|
--
|
|
-- This unit provides an implementation of monadic parser combinators in CPS with
|
|
-- memoization.
|
|
|
|
import List;
|
|
import Collection;
|
|
import Ref;
|
|
import Fun;
|
|
import Matcher;
|
|
|
|
local tab, hct, restab, log = false;
|
|
|
|
public fun logOn () {
|
|
log := true
|
|
}
|
|
|
|
public fun initOstap () {
|
|
tab := ref (emptyHashTab ());
|
|
restab := emptyMemo ();
|
|
hct := emptyMemo ()
|
|
}
|
|
|
|
public fun memo (f) {
|
|
f := lookupMemo (hct, f);
|
|
|
|
if log then printf ("Memoizing: %x=%s\n", f, f.string) fi;
|
|
|
|
case findHashTab (deref (tab), f) of
|
|
None -> if log then printf ("new table...\n") fi;
|
|
tab ::= addHashTab (deref (tab), f, ref (emptyMap ()))
|
|
| Some (tt) -> skip
|
|
esac;
|
|
|
|
fun (k) {
|
|
fun (s) {
|
|
local t = case findHashTab (deref (tab), f) of Some (t) -> t esac;
|
|
if log then printf ("Applying memoized parser to %s\n", s.string) fi;
|
|
case findMap (deref (t), s) of
|
|
None ->
|
|
t ::= addMap (deref (t), s, [addSet (emptySet (), k), emptySet ()]);
|
|
f (fun (r) {
|
|
r := lookupMemo (restab, r);
|
|
if log then printf ("Running continuation with result %s\n", r.string) fi;
|
|
case findMap (deref (t), s) of
|
|
Some ([ks, rs]) ->
|
|
if memSet (rs, r)
|
|
then skip
|
|
else
|
|
t ::= addMap (deref (t), s, [ks, addSet (rs, r)]);
|
|
iterSet (fun (k) {k (r)}, ks)
|
|
fi
|
|
esac
|
|
}
|
|
)
|
|
(s)
|
|
| Some ([ks, rs]) ->
|
|
t ::= addMap (deref (t), s, [addSet (ks, k), rs]);
|
|
iterSet (k, rs)
|
|
esac
|
|
}
|
|
}
|
|
}
|
|
|
|
public fun token (x) {
|
|
fun (k) {
|
|
fun (s) {
|
|
k $ matchString (s, x)
|
|
}
|
|
}
|
|
}
|
|
|
|
public fun eof (k) {
|
|
fun (s) {
|
|
k (endOf (s))
|
|
}
|
|
}
|
|
|
|
public fun empty (k) {
|
|
fun (s) {k (Succ ({}, s))}
|
|
}
|
|
|
|
public fun alt (a, b) {
|
|
fun (k) {
|
|
fun (s) {
|
|
if log then printf ("Running alt at %s\n", s.string) fi;
|
|
a (k) (s);
|
|
b (k) (s)
|
|
}
|
|
}
|
|
}
|
|
|
|
public fun seq (a, b) {
|
|
fun (k) {
|
|
fun (s) {
|
|
if log then printf ("Running seq at %s\n", s.string) fi;
|
|
a (fun (ar) {
|
|
case ar of
|
|
Succ (x, s) -> b (x) (k) (s)
|
|
| _ -> k (ar)
|
|
esac
|
|
}) (s)
|
|
}
|
|
}
|
|
}
|
|
|
|
public infixr | before !! (a, b) {alt (a, b)}
|
|
public infixr |> after | (a, b) {seq (a, b)}
|
|
|
|
public infix @ at * (a, f) {
|
|
fun (k) {
|
|
fun (s) {
|
|
a (fun (x) {k (case x of
|
|
Succ (x, s) -> Succ (f (x), s)
|
|
| _ -> x
|
|
esac)}) (s)
|
|
}
|
|
}
|
|
}
|
|
|
|
public fun lift (f) {
|
|
fun (x) {f}
|
|
}
|
|
|
|
public fun bypass (f) {
|
|
fun (x) {f @ lift (x)}
|
|
}
|
|
|
|
public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}}
|
|
|
|
public fun rep0 (a) {
|
|
--memo
|
|
(empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}})
|
|
}
|
|
|
|
public fun rep (a) {
|
|
--memo
|
|
(a |> (fun (x) {rep0 (a) @ fun (as) {x : as}}))
|
|
}
|
|
|
|
public fun listBy (item, sep) {
|
|
item |> fun (i) {rep0 (sep |> lift (item)) @ fun (is) {i : is}}
|
|
}
|
|
|
|
public fun list0By (item, sep) {
|
|
empty @ lift ({}) | listBy (item, sep)
|
|
}
|
|
|
|
public fun list (item) {
|
|
listBy (item, token (","))
|
|
}
|
|
|
|
public fun list0 (item) {
|
|
list0By (item, token (","))
|
|
}
|
|
|
|
public fun observe (name, f) {
|
|
fun (k) {
|
|
fun (s) {
|
|
printf ("%s at %s\n", name, s.string);
|
|
f (k)(s)
|
|
}
|
|
}
|
|
}
|
|
|
|
fun createResult () {
|
|
local errors = ref ({}),
|
|
line = ref (0),
|
|
col = ref (0),
|
|
value = ref ({}),
|
|
hasError = ref (false),
|
|
hasValue = ref (false);
|
|
|
|
fun k (x) {
|
|
case x of
|
|
Succ (val, _) ->
|
|
if deref (hasValue)
|
|
then failure (sprintf ("Ostap: ambiguous parsing (%s vs. %s)", deref (value).string, val.string))
|
|
else
|
|
hasValue ::= true;
|
|
value ::= val
|
|
fi
|
|
| Fail (err, l, c) ->
|
|
if deref (hasError)
|
|
then
|
|
local c = compare ([line, col], [l, c]);
|
|
|
|
if c == 0 then errors ::= err : deref (errors)
|
|
elif c < 0 then errors ::= singleton (err); line ::= l; col ::= c
|
|
fi
|
|
else
|
|
hasError ::= true;
|
|
errors ::= singleton (err);
|
|
line ::= l;
|
|
col ::= c
|
|
fi
|
|
esac
|
|
}
|
|
|
|
fun get () {
|
|
if deref (hasValue) then Succ (deref (value))
|
|
elif deref (hasError) then Fail (deref (errors), deref (line), deref (col))
|
|
else failure ("Ostap::createAcceptor::get: nothing to return\n")
|
|
fi
|
|
}
|
|
|
|
[k, get]
|
|
}
|
|
|
|
fun k (acc) {
|
|
acc [0]
|
|
}
|
|
|
|
fun result (acc) {
|
|
acc [1] ()
|
|
}
|
|
|
|
public fun parse (p, m) {
|
|
local acc = createResult ();
|
|
|
|
p (acc.k) (m);
|
|
|
|
acc.result
|
|
}
|
|
|
|
public fun parseString (p, s) {
|
|
local acc = createResult ();
|
|
|
|
p (acc.k) (initMatcher (s));
|
|
|
|
acc.result
|
|
}
|
|
|
|
public fun left (f) {
|
|
fun (c, x) {
|
|
fun (y) {
|
|
f (c (x), y)
|
|
}
|
|
}
|
|
}
|
|
|
|
public fun right (f) {
|
|
fun (c, x) {
|
|
fun (y) {
|
|
c (f (x, y))
|
|
}
|
|
}
|
|
}
|
|
|
|
--- ops -> fun (x, y) {x `op` y}
|
|
fun altl (level) {
|
|
case level of
|
|
[assoc, ps] ->
|
|
local assfun = case assoc of Left -> left | Right -> right | Nona -> left esac;
|
|
case map (fun (p) {
|
|
case p of
|
|
[op, sema] -> op @ lift(assfun (sema))
|
|
esac
|
|
}, ps) of
|
|
p : ps -> foldl (infix |, p, ps)
|
|
esac
|
|
esac
|
|
}
|
|
|
|
public fun expr (ops, opnd) {
|
|
fun inner (ops) {
|
|
case ops of
|
|
{} -> fun (c) {opnd @ c}
|
|
| level : tl ->
|
|
local lops = altl (level),
|
|
next = inner (tl);
|
|
|
|
case level.fst of
|
|
Nona ->
|
|
fun this (c) {
|
|
next (id) |> fun (l) {lops |> fun (op) {next (id) @ fun (r) {c (op)(id, l)(r)}}}
|
|
| next (id) @ c
|
|
}
|
|
|
|
this
|
|
|
|
| _ ->
|
|
fun this (c) {
|
|
next (id) |> fun (l) {lops |> fun (op) {this (op (c, l))}}
|
|
| next (id) @ c
|
|
}
|
|
|
|
this
|
|
esac
|
|
esac
|
|
}
|
|
|
|
inner (ops)
|
|
}
|
|
|
|
initOstap ()
|