lama_byterun/stdlib/Ostap.lama

339 lines
7.5 KiB
Text
Raw Normal View History

2020-02-20 12:43:52 +03:00
-- Ostap.
-- (C) Dmitry Boulytchev, JetBrains Research, St. Petersburg State University, 2020
--
-- This unit provides an implementation of monadic parser combinators in CPS with
-- memoization.
import List;
import Collection;
import Ref;
import Fun;
2020-01-21 22:03:11 +03:00
import Matcher;
2022-01-31 23:46:18 +03:00
import Data;
var tab, hct, restab, log = false;
2020-01-30 23:36:15 +03:00
public fun logOn () {
log := true
}
2020-01-21 22:03:11 +03:00
public fun initOstap () {
2020-08-06 14:56:41 +03:00
tab := ref (emptyHashTab (1024, hash, compare));
2021-01-31 22:57:12 +03:00
restab := emptyCustomMemo (fun (x) {case x of #str -> true | _ -> false esac}, compare);
2020-01-31 01:30:03 +03:00
hct := emptyMemo ()
2020-01-21 22:03:11 +03:00
}
2020-01-22 22:30:34 +03:00
public fun memo (f) {
2022-01-31 23:46:18 +03:00
-- f := lookupMemo (hct, f);
2020-01-21 22:03:11 +03:00
if log then printf ("Memoizing %x=%s\n", f, f.string) fi;
2020-01-21 22:03:11 +03:00
case findHashTab (deref (tab), f) of
2020-01-31 01:30:03 +03:00
None -> if log then printf ("new table...\n") fi;
2020-08-06 14:56:41 +03:00
tab ::= addHashTab (deref (tab), f, ref (emptyMap (compare)))
2020-01-31 01:30:03 +03:00
| Some (tt) -> skip
2020-01-21 22:03:11 +03:00
esac;
fun (k) {
fun (s) {
var t =
case findHashTab (deref (tab), f) of
Some (t) -> t
esac;
2020-01-30 23:36:15 +03:00
if log then printf ("Applying memoized parser to %s\n", s.string) fi;
2020-01-21 22:03:11 +03:00
case findMap (deref (t), s) of
None ->
2020-09-01 06:20:39 +03:00
t ::= addMap (deref (t), s, [addSet (emptySet (compare), k), emptySet (fun (r1, r2) {
case [r1, r2] of
[Fail (_, _, _), Fail (_, _, _)] -> 0
| _ -> compare (r1, r2)
esac
})]);
2020-01-21 22:03:11 +03:00
f (fun (r) {
2020-01-31 01:30:03 +03:00
r := lookupMemo (restab, r);
2020-01-30 23:36:15 +03:00
if log then printf ("Running continuation with result %s\n", r.string) fi;
2020-01-21 22:03:11 +03:00
case findMap (deref (t), s) of
2020-08-27 07:24:46 +03:00
Some ([ks, rs]) ->
2020-01-21 22:03:11 +03:00
if memSet (rs, r)
then skip
else
t ::= addMap (deref (t), s, [ks, addSet (rs, r)]);
iterSet (fun (k) {k (r)}, ks)
fi
esac
}
)
(s)
| Some ([ks, rs]) ->
t ::= addMap (deref (t), s, [addSet (ks, k), rs]);
iterSet (k, rs)
esac
}
2020-01-21 22:03:11 +03:00
}
}
2020-01-21 22:03:11 +03:00
public fun token (x) {
2020-03-08 00:57:25 +03:00
case x of
2021-01-31 22:57:12 +03:00
#str -> memo $ fun (k) {fun (s) {k $ matchString (s, x)}}
| _ -> memo $ fun (k) {fun (s) {k $ matchRegexp (s, x)}}
2020-03-08 00:57:25 +03:00
esac
}
2020-04-11 21:09:51 +03:00
public fun loc (k) {
fun (s) {
k $ Succ ([s.getLine, s.getCol], s)
}
}
2020-01-21 22:03:11 +03:00
public fun eof (k) {
fun (s) {
k (endOfMatcher (s))
}
}
2020-01-21 22:03:11 +03:00
public fun empty (k) {
fun (s) {k (Succ ({}, s))}
}
2020-01-21 22:03:11 +03:00
public fun alt (a, b) {
memo $
fun (k) {
fun (s) {
2020-01-30 23:36:15 +03:00
if log then printf ("Running alt at %s\n", s.string) fi;
a (k) (s);
b (k) (s)
}
}
}
2020-01-21 22:03:11 +03:00
public fun seq (a, b) {
memo $
fun (k) {
fun (s) {
2020-01-30 23:36:15 +03:00
if log then printf ("Running seq at %s\n", s.string) fi;
2020-01-21 22:03:11 +03:00
a (fun (ar) {
2020-01-30 23:36:15 +03:00
case ar of
Succ (x, s) -> b (x) (k) (s)
| _ -> k (ar)
esac
2020-01-21 22:03:11 +03:00
}) (s)
}
}
}
2020-01-21 22:03:11 +03:00
public infixr | before !! (a, b) {alt (a, b)}
public infixr |> after | (a, b) {seq (a, b)}
public infix @ at * (a, f) {
fun (k) {
fun (s) {
2020-01-21 22:03:11 +03:00
a (fun (x) {k (case x of
Succ (x, s) -> Succ (f (x), s)
| _ -> x
esac)}) (s)
}
}
}
2022-01-31 23:46:18 +03:00
public infix @@ at * ([name, a], f) {
fun (k) {
fun (s) {
var aa =
a (fun (x) {k (case x of
Succ (x, s) -> Succ (f (x), s)
| _ -> x
esac)});
-- printf ("aa=%s\n", aa.string);
-- printf ("@@: %s\n", name);
aa (s)
}
}
}
2020-01-21 22:03:11 +03:00
public fun lift (f) {
fun (x) {f}
}
2020-01-21 22:03:11 +03:00
public fun bypass (f) {
fun (x) {f @ lift (x)}
}
2020-01-21 22:03:11 +03:00
public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}}
2020-01-21 22:03:11 +03:00
public fun rep0 (a) {
empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}}
2020-01-21 22:03:11 +03:00
}
public fun rep (a) {
a |> (fun (x) {rep0 (a) @ fun (as) {x : as}})
}
2020-01-21 22:03:11 +03:00
public fun listBy (item, sep) {
item |> fun (i) {rep0 (sep |> lift (item)) @ fun (is) {i : is}}
}
2020-01-21 22:03:11 +03:00
public fun list0By (item, sep) {
empty @ lift ({}) | listBy (item, sep)
}
2020-01-21 22:03:11 +03:00
public fun list (item) {
listBy (item, token (","))
}
2020-01-21 22:03:11 +03:00
public fun list0 (item) {
list0By (item, token (","))
}
2020-01-21 22:03:11 +03:00
public fun observe (name, f) {
fun (k) {
fun (s) {
printf ("%s at %s\n", name, s.string);
f (k)(s)
}
}
}
2020-05-04 02:45:34 +03:00
public fun showStream (name) {
fun (k) {
fun (s) {
printf ("%s: %s\n", name, showMatcher (s));
k (Succ ({}, s))
}
}
}
2020-01-22 22:30:34 +03:00
fun createResult () {
var errors = ref ({}),
2020-01-21 22:03:11 +03:00
line = ref (0),
col = ref (0),
value = ref ({}),
hasError = ref (false),
hasValue = ref (false);
fun k (x) {
2020-05-04 02:45:34 +03:00
if log then printf ("Result: %s\n", x.string) fi;
2020-01-21 22:03:11 +03:00
case x of
2021-01-31 21:07:17 +03:00
Succ (v, s) ->
2020-05-04 02:45:34 +03:00
if log then printf ("Result stream: %s\n", showMatcher (s)) fi;
2020-01-21 22:03:11 +03:00
if deref (hasValue)
2021-01-31 21:07:17 +03:00
then failure (sprintf ("Ostap: ambiguous parsing (%s vs. %s)", deref (value).string, v.string))
2020-01-21 22:03:11 +03:00
else
hasValue ::= true;
2021-01-31 21:07:17 +03:00
value ::= v
2020-01-21 22:03:11 +03:00
fi
| Fail (err, l, c) ->
if deref (hasError)
then
var c = compare ([line, col], [l, c]);
2020-01-21 22:03:11 +03:00
if c == 0 then errors ::= err : deref (errors)
elif c < 0 then errors ::= {err}; line ::= l; col ::= c
2020-01-21 22:03:11 +03:00
fi
else
hasError ::= true;
errors ::= {err};
2020-01-21 22:03:11 +03:00
line ::= l;
col ::= c
fi
esac
}
2020-01-21 22:03:11 +03:00
fun get () {
if deref (hasValue) then Succ (deref (value))
elif deref (hasError) then Fail (deref (errors), deref (line), deref (col))
2020-01-30 23:36:15 +03:00
else failure ("Ostap::createAcceptor::get: nothing to return\n")
2020-01-21 22:03:11 +03:00
fi
}
2020-01-21 22:03:11 +03:00
[k, get]
}
2020-01-22 22:30:34 +03:00
fun k (acc) {
2020-01-21 22:03:11 +03:00
acc [0]
}
2020-01-22 22:30:34 +03:00
fun result (acc) {
2020-01-21 22:03:11 +03:00
acc [1] ()
}
2020-01-22 22:30:34 +03:00
public fun parse (p, m) {
var acc = createResult ();
2020-01-22 22:30:34 +03:00
p (acc.k) (m);
acc.result
}
public fun parseString (p, s) {
var acc = createResult ();
2020-01-22 22:30:34 +03:00
p (acc.k) (initMatcher (s));
acc.result
}
2020-04-11 21:09:51 +03:00
public fun left (op, f) {
2020-02-20 12:43:52 +03:00
fun (c, x) {
fun (y) {
2020-04-11 21:09:51 +03:00
f (c (x), op, y)
2020-02-20 12:43:52 +03:00
}
}
}
2020-04-11 21:09:51 +03:00
public fun right (op, f) {
2020-02-20 12:43:52 +03:00
fun (c, x) {
fun (y) {
2020-04-11 21:09:51 +03:00
c (f (x, op, y))
2020-02-20 12:43:52 +03:00
}
}
}
--- ops -> fun (x, y) {x `op` y}
fun altl (level) {
case level of
[assoc, ps] ->
var assfun = case assoc of Left -> left | Right -> right | Nona -> left esac;
2020-02-20 12:43:52 +03:00
case map (fun (p) {
case p of
2020-04-11 21:09:51 +03:00
[op, sema] -> op @ fun (op) {assfun (op, sema)}
2020-02-20 12:43:52 +03:00
esac
}, ps) of
p : ps -> foldl (infix |, p, ps)
esac
esac
}
public fun expr (ops, opnd) {
fun inner (ops) {
case ops of
{} -> fun (c) {opnd @ c}
| level : tl ->
var lops = altl (level),
2022-01-31 23:46:18 +03:00
next = inner (tl);
2020-02-20 12:43:52 +03:00
case level.fst of
Nona ->
fun this (c) {
next (id) |> fun (l) {lops |> fun (op) {next (id) @ fun (r) {c (op)(id, l)(r)}}}
| next (id) @ c
}
this
| _ ->
fun this (c) {
next (id) |> fun (l) {lops |> fun (op) {this (op (c, l))}}
| next (id) @ c
}
this
esac
esac
}
inner (ops) (id)
2020-02-20 12:43:52 +03:00
}
2020-01-30 23:36:15 +03:00
initOstap ()