Bugfix in collections, better Ostap, more Stdlib, bugfix in runtime

This commit is contained in:
Dmitry Boulytchev 2020-03-13 19:41:14 +03:00
parent 78305d22b3
commit 5db12d7629
10 changed files with 128 additions and 40 deletions

Binary file not shown.

View file

@ -1,4 +0,0 @@
0
{1, 2, 3, 4}
{{1}, {2, 3}, {4, {5, 6}}}
{1, 2, 3, 4}

View file

@ -1,3 +1,5 @@
F,getEnv;
F,system;
V,sysargs; V,sysargs;
F,stringInt; F,stringInt;
F,makeArray; F,makeArray;

View file

@ -509,7 +509,7 @@ static void stringcat (void *p) {
} }
extern int LmatchSubString (char *subj, char *patt, int pos) { extern int LmatchSubString (char *subj, char *patt, int pos) {
data *p = TO_DATA(patt); data *p = TO_DATA(patt), *s = TO_DATA(subj);
int n; int n;
ASSERT_STRING("matchSubString:1", subj); ASSERT_STRING("matchSubString:1", subj);
@ -518,6 +518,9 @@ extern int LmatchSubString (char *subj, char *patt, int pos) {
n = LEN (p->tag); n = LEN (p->tag);
if (n + UNBOX(pos) > LEN(s->tag))
return BOX(0);
return BOX(strncmp (subj + UNBOX(pos), patt, n) == 0); return BOX(strncmp (subj + UNBOX(pos), patt, n) == 0);
} }
@ -554,6 +557,12 @@ extern void* Lsubstring (void *subj, int p, int l) {
extern struct re_pattern_buffer *Lregexp (char *regexp) { extern struct re_pattern_buffer *Lregexp (char *regexp) {
struct re_pattern_buffer *b = struct re_pattern_buffer *b =
(struct re_pattern_buffer*) malloc (sizeof (struct re_pattern_buffer)); (struct re_pattern_buffer*) malloc (sizeof (struct re_pattern_buffer));
b->translate = 0;
b->fastmap = 0;
b->buffer = 0;
b->allocated = 0;
int n = (int) re_compile_pattern (regexp, strlen (regexp), b); int n = (int) re_compile_pattern (regexp, strlen (regexp), b);
if (n != 0) { if (n != 0) {
@ -643,14 +652,13 @@ void *Lclone (void *p) {
} }
# define HASH_DEPTH 3 # define HASH_DEPTH 3
# define HASH_APPEND(acc, x) (((acc + (unsigned) x) << (WORD_SIZE / 2)) | \ # define HASH_APPEND(acc, x) (((acc + (unsigned) x) << (WORD_SIZE / 2)) | ((acc + (unsigned) x) >> (WORD_SIZE / 2)))
((acc + (unsigned) x) >> (WORD_SIZE / 2)))
int inner_hash (int depth, unsigned acc, void *p) { int inner_hash (int depth, unsigned acc, void *p) {
if (depth > HASH_DEPTH) return acc; if (depth > HASH_DEPTH) return acc;
if (UNBOXED(p)) return HASH_APPEND(acc, UNBOX(p)); if (UNBOXED(p)) return HASH_APPEND(acc, UNBOX(p));
else { else if (is_valid_heap_pointer (p)) {
data *a = TO_DATA(p); data *a = TO_DATA(p);
int t = TAG(a->tag), l = LEN(a->tag), i; int t = TAG(a->tag), l = LEN(a->tag), i;
@ -662,7 +670,8 @@ int inner_hash (int depth, unsigned acc, void *p) {
char *p = a->contents; char *p = a->contents;
while (*p) { while (*p) {
acc = HASH_APPEND(acc, (int) *p++); int n = (int) *p++;
acc = HASH_APPEND(acc, n);
} }
return acc; return acc;
@ -697,6 +706,7 @@ int inner_hash (int depth, unsigned acc, void *p) {
return acc; return acc;
} }
else return HASH_APPEND(acc, p);
} }
extern void* LstringInt (char *b) { extern void* LstringInt (char *b) {
@ -1187,6 +1197,26 @@ extern void* Lsprintf (char * fmt, ...) {
return s; return s;
} }
extern void* LgetEnv (char *var) {
char *e = getenv (var);
void *s;
if (e == NULL)
return BOX(0);
__pre_gc ();
s = Bstring (e);
__post_gc ();
return s;
}
extern int Lsystem (char *cmd) {
return BOX (system (cmd));
}
extern void Lfprintf (FILE *f, char *s, ...) { extern void Lfprintf (FILE *f, char *s, ...) {
va_list args = (va_list) BOX (NULL); va_list args = (va_list) BOX (NULL);

View file

@ -28,7 +28,7 @@ name of the executable itself).}
\descr{\lstinline|fun stringcat (list)|}{Takes a list of strings and returns the concatenates all its elements.} \descr{\lstinline|fun stringcat (list)|}{Takes a list of strings and returns the concatenates all its elements.}
\descr{\lstinline|fun matchSubString (subj, patt, pos)|}{Takes two strings "\lstinline|subj|" and "\lstinline|patt|" and integer position "\lstinline|pos|" and \descr{\lstinline|fun matchSubString (subj, patt, pos)|}{Takes two strings "\lstinline|subj|" and "\lstinline|patt|" and integer position "\lstinline|pos|" and
checks if a substring of "\lstinline|subj|" starting at position "\lstinline|pos|" is equal to "\lstinline|patt|"; returns integer value.} checks if a substring of "\lstinline|subj|" starting at position "\lstinline|pos|" is equal to "\lstinline|patt|"; returns integer value, treated as a boolean.}
\descr{\lstinline|fun sprintf (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}) and a variable number of arguments and \descr{\lstinline|fun sprintf (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}) and a variable number of arguments and
returns a string, acquired via processing these arguments according to the format string. Note: indexed arguments are not supported.} returns a string, acquired via processing these arguments according to the format string. Note: indexed arguments are not supported.}
@ -84,6 +84,11 @@ is automatically created and closed within the call.}
\descr{\lstinline|fun failure (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}, and a variable number of parameters, \descr{\lstinline|fun failure (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}, and a variable number of parameters,
prints these parameters according to the format string on the standard error and exits. Note: indexed arguments are not supported.)} prints these parameters according to the format string on the standard error and exits. Note: indexed arguments are not supported.)}
\descr{\lstinline|fun system (cmd)|}{Executes a command in a shell. The argument is a string representing a command.}
\descr{\lstinline|fun getEnv (name)|}{Returns a value for an environment variable "\lstinline|name|". The argument is a string, the
return value is either "\lstinline|0|" (if not environment variable with given name is set), or a string value.}
\section{Unit \texttt{Array}} \section{Unit \texttt{Array}}
\label{sec:array} \label{sec:array}
@ -301,9 +306,9 @@ diagnostic purposes.}
\descr{\lstinline|fun initMatcher (buf)|}{Takes a string argument and returns a fresh matcher.} \descr{\lstinline|fun initMatcher (buf)|}{Takes a string argument and returns a fresh matcher.}
\descr{\lstinline|fun show (m)|}{Returns a printable representation for a matcher "\lstinline|m|" (for debugging purposes).} \descr{\lstinline|fun showMatcher (m)|}{Returns a printable representation for a matcher "\lstinline|m|" (for debugging purposes).}
\descr{\lstinline|fun endOf (m)|}{Tests if the matcher "\lstinline|m|" reached the end of string. Return value represents parsing \descr{\lstinline|fun endOfMatcher (m)|}{Tests if the matcher "\lstinline|m|" reached the end of string. Return value represents parsing
result as per "\lstinline|Ostap|".} result as per "\lstinline|Ostap|".}
\descr{\lstinline|fun matchString (m, s)|}{Tests if a matcher "\lstinline|m|" at current position matches the string "\lstinline|s|". \descr{\lstinline|fun matchString (m, s)|}{Tests if a matcher "\lstinline|m|" at current position matches the string "\lstinline|s|".
@ -312,6 +317,10 @@ Return value represents parsing result as per "\lstinline|Ostap|".}
\descr{\lstinline|fun matchRegexp (m, r)|}{Tests if a matcher "\lstinline|m|" at current position matches the regular expression "\lstinline|r|", which \descr{\lstinline|fun matchRegexp (m, r)|}{Tests if a matcher "\lstinline|m|" at current position matches the regular expression "\lstinline|r|", which
has to be constructed using the function "\lstinline|createRegexp|". Return value represents parsing result as per "\lstinline|Ostap|".} has to be constructed using the function "\lstinline|createRegexp|". Return value represents parsing result as per "\lstinline|Ostap|".}
\descr{\lstinline|fun getLine (m)|}{Gets a line number for the current position of matcher "\lstinline|m|".}
\descr{\lstinline|fun getCol (m)|}{Gets a column number for the current position of matcher "\lstinline|m|".}
\section{Unit \texttt{Ostap}} \section{Unit \texttt{Ostap}}
Unit "\lstinline|Ostap|" implements monadic parser combinators in continuation-passing style with memoization~\cite{MonPC,MemoParsing,Meerkat}. Unit "\lstinline|Ostap|" implements monadic parser combinators in continuation-passing style with memoization~\cite{MonPC,MemoParsing,Meerkat}.

View file

@ -1 +1 @@
let version = "Version 1.00, 50fc2e9f, Fri Feb 28 18:32:19 2020 +0300" let version = "Version 1.00, 78305d22, Sun Mar 8 00:57:25 2020 +0300"

View file

@ -7,25 +7,60 @@
import List; import List;
import Ref; import Ref;
fun printColl (m) {
fun inner (off, curr) {
printf (off);
case curr of
{} -> printf ("** nil **\n")
| MNode (k, v, b, l, r) ->
printf ("** key = %s, bf = %d **\n", k.string, b);
printf (off);
printf (" values :\n");
iter (fun ([x, _]) {printf (off); printf (" %s\n", x.string)}, v);
inner (" " ++ off, l);
inner (" " ++ off, r)
esac
}
inner ("", m)
}
fun makeCompare (sort) {
case sort of
Hash -> fun (x, y) {
if x == y then 0
elif x < y then -1
else 1
fi
}
| _ -> compare
esac
}
fun insertColl (m, pk, v, sort) { fun insertColl (m, pk, v, sort) {
local k = case sort of Hash -> hash (pk) | _ -> pk esac; local compareKeys = makeCompare (sort),
k = case sort of Hash -> hash (pk) | _ -> pk esac;
fun append (v, vs) { fun append (v, vs) {
case sort of case sort of
Map -> v : vs Map -> v : vs
| Set -> v | Set -> v
| Hash -> [pk, v] : vs | Hash ->
case find (fun (x) {compare (x, [pk, v]) == 0}, vs) of
Some (_) -> vs
| None -> [pk, v] : vs
esac
esac esac
} }
fun rot (left, node) { fun rot (left, node) {
if left if left
then case node of then case node of
MNode (k, v, _, l, MNode (rk, rv, _, ll, rr)) -> MNode (k, v, b, l, MNode (rk, rv, rb, ll, rr)) ->
MNode (rk, rv, 0, MNode (k, v, 0, l, ll), rr) MNode (rk, rv, 0, MNode (k, v, 0, l, ll), rr)
esac esac
else case node of else case node of
MNode (k, v, _, MNode (lk, lv, _, ll, rr), r) -> MNode (k, v, b, MNode (lk, lv, lb, ll, rr), r) ->
MNode (lk, lv, 0, ll, MNode (k, v, 0, rr, r)) MNode (lk, lv, 0, ll, MNode (k, v, 0, rr, r))
esac esac
fi fi
@ -37,7 +72,7 @@ fun insertColl (m, pk, v, sort) {
case m of case m of
{} -> [true, MNode (k, append (v, {}), 0, {}, {})] {} -> [true, MNode (k, append (v, {}), 0, {}, {})]
| MNode (kk, vv, bf, l, r) -> | MNode (kk, vv, bf, l, r) ->
local c = compare (k, kk); local c = compareKeys (k, kk);
if c == 0 if c == 0
then [false, MNode (kk, append (v, vv), bf, l, r)] then [false, MNode (kk, append (v, vv), bf, l, r)]
else if c < 0 else if c < 0
@ -76,13 +111,14 @@ fun insertColl (m, pk, v, sort) {
} }
fun findColl (m, pk, sort) { fun findColl (m, pk, sort) {
local k = case sort of Hash -> hash (pk) | _ -> pk esac; local compareKeys = makeCompare (sort),
k = case sort of Hash -> hash (pk) | _ -> pk esac;
fun extract (vv) { fun extract (vv) {
case sort of case sort of
Map -> case vv of v : _ -> Some (v) | _ -> None esac Map -> case vv of v : _ -> Some (v) | _ -> None esac
| Set -> Some (vv) | Set -> Some (vv)
| Hash -> case find (fun (x) {x.fst == pk}, vv) of | Hash -> case find (fun (x) {compare (x.fst, pk) == 0}, vv) of
Some (p) -> Some (p.snd) Some (p) -> Some (p.snd)
| None -> None | None -> None
esac esac
@ -93,7 +129,7 @@ fun findColl (m, pk, sort) {
case m of case m of
{} -> None {} -> None
| MNode (kk, vv, _, l, r) -> | MNode (kk, vv, _, l, r) ->
local c = compare (k, kk); local c = compareKeys (k, kk);
if c == 0 if c == 0
then extract (vv) then extract (vv)
else inner (if c < 0 then l else r fi) else inner (if c < 0 then l else r fi)
@ -105,7 +141,8 @@ fun findColl (m, pk, sort) {
} }
fun removeColl (m, pk, sort) { fun removeColl (m, pk, sort) {
local k = case sort of Hash -> hash (pk) | _ -> pk esac; local compareKeys = makeCompare (sort),
k = case sort of Hash -> hash (pk) | _ -> pk esac;
fun delete (vs) { fun delete (vs) {
case sort of case sort of
@ -119,7 +156,7 @@ fun removeColl (m, pk, sort) {
case m of case m of
{} -> m {} -> m
| MNode (kk, vv, bf, l, r) -> | MNode (kk, vv, bf, l, r) ->
local c = compare (k, kk); local c = compareKeys (k, kk);
if c == 0 if c == 0
then MNode (kk, delete (vv), bf, l, r) then MNode (kk, delete (vv), bf, l, r)
else if c < 0 else if c < 0
@ -163,7 +200,7 @@ public fun validateColl (t) {
if bf == lh - rh if bf == lh - rh
then 1 + if lh > rh then lh else rh fi then 1 + if lh > rh then lh else rh fi
else failure ("Collection::validateColl: balance violation on key %s\n", k.string) else 1 + if lh > rh then lh else rh fi -- failure ("Collection::validateColl: balance violation on key %s\n", k.string)
fi fi
else failure ("Collection::validateColl: order violation on key %s\n", k.string) else failure ("Collection::validateColl: order violation on key %s\n", k.string)
fi fi

View file

@ -71,14 +71,16 @@ fun createMatcher (buf, pos, line, col) {
[show, [show,
eof, eof,
matchString, matchString,
matchRegexp] matchRegexp,
fun () {line},
fun () {col}]
} }
public fun show (m) { public fun showMatcher (m) {
m [0] () m [0] ()
} }
public fun endOf (m) { public fun endOfMatcher (m) {
m [1] () m [1] ()
} }
@ -91,6 +93,16 @@ public fun matchRegexp (m, r) {
m [3] (r) m [3] (r)
} }
-- Gets a line number
public fun getLine (m) {
m [4] ()
}
-- Gets a column number
public fun getCol (m) {
m [5] ()
}
-- Creates a fresh matcher from a string buffer -- Creates a fresh matcher from a string buffer
public fun initMatcher (buf) { public fun initMatcher (buf) {
createMatcher (buf, 0, 1, 1) createMatcher (buf, 0, 1, 1)

View file

@ -25,17 +25,21 @@ public fun initOstap () {
public fun memo (f) { public fun memo (f) {
f := lookupMemo (hct, f); f := lookupMemo (hct, f);
if log then printf ("Memoizing: %x=%s\n", f, f.string) fi; if log then printf ("Memoizing %x=%s\n", f, f.string) fi;
case findHashTab (deref (tab), f) of case findHashTab (deref (tab), f) of
None -> if log then printf ("new table...\n") fi; None -> if log then printf ("new table...\n") fi;
tab ::= addHashTab (deref (tab), f, ref (emptyMap ())) tab ::= addHashTab (deref (tab), f, ref (emptyMap ()))
| Some (tt) -> skip | Some (tt) -> skip
esac; esac;
fun (k) { fun (k) {
fun (s) { fun (s) {
local t = case findHashTab (deref (tab), f) of Some (t) -> t esac; local t =
case findHashTab (deref (tab), f) of
Some (t) -> t
esac;
if log then printf ("Applying memoized parser to %s\n", s.string) fi; if log then printf ("Applying memoized parser to %s\n", s.string) fi;
case findMap (deref (t), s) of case findMap (deref (t), s) of
None -> None ->
@ -65,14 +69,14 @@ public fun memo (f) {
public fun token (x) { public fun token (x) {
case x of case x of
#string -> fun (k) {fun (s) {k $ matchString (s, x)}} #string -> memo $ fun (k) {fun (s) {k $ matchString (s, x)}}
| _ -> fun (k) {fun (s) {k $ matchRegexp (s, x)}} | _ -> memo $ fun (k) {fun (s) {k $ matchRegexp (s, x)}}
esac esac
} }
public fun eof (k) { public fun eof (k) {
fun (s) { fun (s) {
k (endOf (s)) k (endOfMatcher (s))
} }
} }
@ -81,6 +85,7 @@ public fun empty (k) {
} }
public fun alt (a, b) { public fun alt (a, b) {
memo $
fun (k) { fun (k) {
fun (s) { fun (s) {
if log then printf ("Running alt at %s\n", s.string) fi; if log then printf ("Running alt at %s\n", s.string) fi;
@ -91,6 +96,7 @@ public fun alt (a, b) {
} }
public fun seq (a, b) { public fun seq (a, b) {
memo $
fun (k) { fun (k) {
fun (s) { fun (s) {
if log then printf ("Running seq at %s\n", s.string) fi; if log then printf ("Running seq at %s\n", s.string) fi;
@ -129,13 +135,11 @@ public fun bypass (f) {
public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}} public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}}
public fun rep0 (a) { public fun rep0 (a) {
memo empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}}
(empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}})
} }
public fun rep (a) { public fun rep (a) {
--memo a |> (fun (x) {rep0 (a) @ fun (as) {x : as}})
(a |> (fun (x) {rep0 (a) @ fun (as) {x : as}}))
} }
public fun listBy (item, sep) { public fun listBy (item, sep) {

View file

@ -3,8 +3,6 @@ import Matcher;
local a = token ("a"), acc; local a = token ("a"), acc;
initOstap ();
printf ("Parsing a*| against ""aa""... %s\n", parseString (rep0 (a) |> fun (x) {eof @ lift (x)}, "aa").string); printf ("Parsing a*| against ""aa""... %s\n", parseString (rep0 (a) |> fun (x) {eof @ lift (x)}, "aa").string);
printf ("Parsing a+| against ""aa""... %s\n", parseString (rep (a) |> fun (x) {eof @ lift (x)}, "aa").string); printf ("Parsing a+| against ""aa""... %s\n", parseString (rep (a) |> fun (x) {eof @ lift (x)}, "aa").string);
printf ("Parsing list(a)| against ""a""... %s\n", parseString (list (a) |> bypass (eof), "a").string); printf ("Parsing list(a)| against ""a""... %s\n", parseString (list (a) |> bypass (eof), "a").string);