More ostap

This commit is contained in:
Dmitry Boulytchev 2020-01-21 22:03:11 +03:00
parent f1f3c8aff0
commit 9163747ff3
7 changed files with 175 additions and 145 deletions

View file

@ -535,7 +535,7 @@ int inner_hash (int depth, unsigned acc, void *p) {
case STRING_TAG: {
char *p = a->contents;
while (p) {
while (*p) {
acc = HASH_APPEND(acc, (int) *p++);
}
@ -1164,7 +1164,7 @@ extern void __gc_root_scan_stack ();
/* ======================================== */
// static size_t SPACE_SIZE = 32;
static size_t SPACE_SIZE = 32 * 1024;
static size_t SPACE_SIZE = 32 * 1024 * 100;
// static size_t SPACE_SIZE = 128;
// static size_t SPACE_SIZE = 1024 * 1024;

View file

@ -4,21 +4,20 @@ public fun id (x) {
x
}
public infixl $ after * (f, x) {
public infixl $ after := (f, x) {
f (x)
}
public infix # at $ (f, g) {
public infix # after * (f, g) {
fun (x) {
f (g (x))
}
}
public fun fix (f) {
local knot = ref (0);
knot ::= fun () {fun (x) {f (deref (knot) ()) (x)}};
local knot = ref ({});
knot ::= fun (x) {f (deref (knot)) (x)};
deref (knot) ()
deref (knot)
}

View file

@ -1,4 +1,4 @@
public fun list (x) {
public fun singleton (x) {
x : {}
}

View file

@ -18,7 +18,7 @@ public fun createRegexp (r, name) {
-- line, col --- line and column numbers
-- This function is internal, do not use it directly.
-- To initially create a matcher use initMatcher function (see below).
fun matcherCreate (buf, pos, line, col) {
fun createMatcher (buf, pos, line, col) {
-- Shows a matcher in a readable form
fun show () {
sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col)
@ -41,14 +41,14 @@ fun matcherCreate (buf, pos, line, col) {
esac
od;
matcherCreate (buf, pos + n, l, c)
createMatcher (buf, pos + n, l, c)
}
fun matchString (s) {
if s.length > rest ()
then Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
elif matchSubString (buf, s, pos) then Succ (shift (s.length), s)
else Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
then Fail (sprintf ("""%s"" expected", s), line, col)
elif matchSubString (buf, s, pos) then Succ (s, shift (s.length))
else Fail (sprintf ("""%s"" expected at", s), line, col)
fi
}
@ -56,13 +56,16 @@ fun matcherCreate (buf, pos, line, col) {
local n;
if (n := regexpMatch (r[0], buf, pos)) > 0
then Succ (shift (n), substring (buf, pos, n))
else Fail (sprintf ("%s expected at %d:%d", r[1], line, col))
then Succ (substring (buf, pos, n), shift (n))
else Fail (sprintf ("%s expected", r[1]), line, col)
fi
}
fun eof () {
rest () == 0
if rest () == 0
then Succ ("", shift (0))
else Fail ("EOF expected", line, col)
fi
}
[show,
@ -89,6 +92,6 @@ public fun matchRegexp (m, r) {
}
-- Creates a fresh matcher from a string buffer
public fun matcherInit (buf) {
matcherCreate (buf, 0, 1, 1)
public fun initMatcher (buf) {
createMatcher (buf, 0, 1, 1)
}

View file

@ -2,125 +2,24 @@ import List;
import Collection;
import Ref;
import Fun;
import Matcher;
fun token_k (x) {
fun (k) {
fun (s) {
case s of
h : t -> if compare (h, x) == 0 then k (Succ (x, t)) else k (Fail ("expected " ++ x.string)) fi
| _ -> k (Fail ("expected " ++ x.string))
esac
}
}
local tab, hct;
public fun initOstap () {
tab := ref (emptyHashTab ());
hct := emptyMemo ()
}
fun token (x) {
fun (s) {
case s of
h : t -> if compare (h, x) == 0 then return Succ (list ([x, t]), {}) fi
| _ -> skip
esac;
Fail (list ("expected " ++ x.string))
}
}
fun eof_k (k) {
fun (s) {
case s of
{} -> k (Succ ({}, {}))
| _ -> k (Fail ("expected <EOF>"))
esac
}
}
fun eof (s) {
case s of
{} -> Succ (list ([{}, {}]), {})
| _ -> Fail (list ("expected <EOF>"))
esac
}
fun empty_k (k) {
fun (s) {k (Succ ({}, s))}
}
fun empty (s) {
Succ ({[{}, s]}, {})
}
infix ** at * (a, b) {
case [a, b] of
[Succ (r1, e1), Succ (r2, e2)] -> Succ (r1 +++ r2, e1 +++ e2)
| [Fail (e1) , Fail (e2) ] -> Fail (e1 +++ e2)
| [Succ (r1, e1), Fail (e2) ] -> Succ (r1, e1 +++ e2)
| [Fail (e1) , Succ (r2, e2)] -> Succ (r2, e1 +++ e2)
esac
}
fun alt_k (a, b) {
fun (k) {
fun (s) {
a (k) (s);
b (k) (s)
}
}
}
fun alt (a, b) {
fun (s) {a (s) ** b (s)}
}
fun seq_nm (a, b) {
fun (k) {
fun (s) {
a (fun (ar) {case ar of Fail (_) -> k (ar) | Succ (x, s) -> b (k) (s) esac}) (s)
}
}
}
fun seq_k (a, b) {
fun (k) {
fun (s) {
a (fun (ar) {case ar of Fail (_) -> k (ar) | Succ (x, s) -> b (x) (k) (s) esac}) (s)
}
}
}
fun seq (a, b) {
fun (s) {
case a (s) of
r@(Fail (_)) -> r
| Succ (r, e) ->
foldl (fun (r, p) {r ** b (p.fst) (p.snd)}, Fail (e), r)
esac
}
}
--infixr | before !! (a, b) {alt (a, b)}
--infixr |> after | (a, b) {seq (a, b)}
infixr || before !! (a, b) {alt_k (a, b)}
infixr ||> after || (a, b) {seq_k (a, b)}
infix @ at * (a, f) {
fun (k) {
fun (s) {
a (fun (x) {k (case x of Fail (_) -> x | Succ (x, s) -> Succ (f (x), s) esac)}) (s)
}
}
}
local tab = emptyMemo (), memo_tab = ref (emptyHashTab ());
fun memo (f) {
local t;
--f := lookupMemo (tab, f);
printf ("Memoizing: f=%x\n", f);
case findHashTab (deref (memo_tab), f) of
None -> t := ref (emptyMap ()); memo_tab ::= addHashTab (deref (memo_tab), f, t)
f := lookupMemo (hct, f);
--printf ("Memoizing: %x=%s\n", f, f.string);
case findHashTab (deref (tab), f) of
None -> t := ref (emptyMap ()); tab ::= addHashTab (deref (tab), f, t)
| Some (tt) -> t := tt
esac;
@ -152,11 +51,95 @@ fun memo (f) {
}
}
fun lift (f) {
public fun token (x) {
fun (k) {
fun (s) {
k $ matchString (s, x)
}
}
}
public fun eof (k) {
fun (s) {
k (endOf (s))
}
}
public fun empty (k) {
fun (s) {k (Succ ({}, s))}
}
public fun alt (a, b) {
fun (k) {
fun (s) {
a (k) (s);
b (k) (s)
}
}
}
public fun seq (a, b) {
fun (k) {
fun (s) {
a (fun (ar) {
case ar of
Succ (x, s) -> b (x) (k) (s)
| _ -> k (ar)
esac
}) (s)
}
}
}
public infixr | before !! (a, b) {alt (a, b)}
public infixr |> after | (a, b) {seq (a, b)}
public infix @ at * (a, f) {
fun (k) {
fun (s) {
a (fun (x) {k (case x of
Succ (x, s) -> Succ (f (x), s)
| _ -> x
esac)}) (s)
}
}
}
public fun lift (f) {
fun (x) {f}
}
fun observe (name, f) {
public fun bypass (f) {
fun (x) {f @ lift (x)}
}
public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}}
public fun rep0 (a) {
memo (empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}})
}
public fun rep (a) {
memo (a |> (fun (x) {rep0 (a) @ fun (as) {x : as}}))
}
public fun listBy (item, sep) {
item |> fun (i) {rep0 (sep |> lift (item)) @ fun (is) {i : is}}
}
public fun list0By (item, sep) {
empty @ lift ({}) | listBy (item, sep)
}
public fun list (item) {
listBy (item, token (","))
}
public fun list0 (item) {
list0By (item, token (","))
}
public fun observe (name, f) {
fun (k) {
fun (s) {
printf ("%s at %s\n", name, s.string);
@ -165,14 +148,54 @@ fun observe (name, f) {
}
}
fun print (x) {printf ("k: %s\n", x.string)}
public fun createResult () {
local errors = ref ({}),
line = ref (0),
col = ref (0),
value = ref ({}),
hasError = ref (false),
hasValue = ref (false);
local a = token_k ("a");
fun k (x) {
case x of
Succ (val, _) ->
if deref (hasValue)
then failure (sprintf ("Ostap: ambiguous parsing (%s vs. %s)", deref (value).string, val.string))
else
hasValue ::= true;
value ::= val
fi
| Fail (err, l, c) ->
if deref (hasError)
then
local c = compare ([line, col], [l, c]);
local as = memo (
fun (k) {
(empty_k @ fun (x) {""} || as ||> fun(as) {a @ fun (a) {as ++ a}}) $ k
if c == 0 then errors ::= err : deref (errors)
elif c < 0 then errors ::= singleton (err); line ::= l; col ::= c
fi
else
hasError ::= true;
errors ::= singleton (err);
line ::= l;
col ::= c
fi
esac
}
);
(as ||> fun (as) {eof_k @ lift (as)}) (print) ({"a", "a"})
fun get () {
if deref (hasValue) then Succ (deref (value))
elif deref (hasError) then Fail (deref (errors), deref (line), deref (col))
else failure ("Ostap::createAcceptor::get: nothing to return")
fi
}
[k, get]
}
public fun k (acc) {
acc [0]
}
public fun result (acc) {
acc [1] ()
}

View file

@ -0,0 +1,5 @@
Parsing a*| against "aa"... Succ ({"a", "a"})
Parsing a+| against "aa"... Succ ({"a", "a"})
Parsing list(a)| against "a"... Succ ({"a"})
Parsing list(a)| against "a,a"... Succ ({"a", "a"})
Parsing list0(a)| against ""... Succ (0)

View file

@ -1,6 +1,6 @@
import Matcher;
local m = matcherInit (" -- asdasdakm ,m.,msd .,m.,asd\n \n\n abc");
local m = initMatcher (" -- asdasdakm ,m.,msd .,m.,asd\n \n\n abc");
local
lident = createRegexp ("[a-z][a-zA-Z_]*", "lowercase identifier"),
@ -29,7 +29,7 @@ fun const (m) {
infixl @ before * (p, f) {
fun (m) {
case p (m) of
Succ (m, x) -> Succ (m, f (x))
Succ (x, m) -> Succ (f (x), m)
| err -> err
esac
}
@ -38,7 +38,7 @@ infixl @ before * (p, f) {
infixr |> after !! (l, r) {
fun (m) {
case l (m) of
Succ (m, s) -> r (s) (m)
Succ (s, m) -> r (s) (m)
| err -> err
esac
}
@ -56,4 +56,4 @@ infixr || after |> (l, r) {
local expr = lid @ fun (s) {Lid (s)} || const @ fun (s) {Dec (s)},
assn = lid |> fun (id) {token (":=") |> fun (s) {expr @ fun (e) {Assn (id, e)}}};
printf ("%s\n", case assn (matcherInit ("x:=3")) of Fail (err) -> err | Succ (_, s) -> s.string esac)
printf ("%s\n", case assn (initMatcher ("x:=3")) of Fail (err) -> err | Succ (s, _) -> s.string esac)