diff --git a/lama-spec.pdf b/lama-spec.pdf index 372dbaef1..3460207f7 100644 Binary files a/lama-spec.pdf and b/lama-spec.pdf differ diff --git a/regression/x86only/test003.log b/regression/x86only/test003.log deleted file mode 100644 index 663d7b156..000000000 --- a/regression/x86only/test003.log +++ /dev/null @@ -1,4 +0,0 @@ -0 -{1, 2, 3, 4} -{{1}, {2, 3}, {4, {5, 6}}} -{1, 2, 3, 4} diff --git a/runtime/Std.i b/runtime/Std.i index d7848520c..9b818c011 100644 --- a/runtime/Std.i +++ b/runtime/Std.i @@ -1,3 +1,5 @@ +F,getEnv; +F,system; V,sysargs; F,stringInt; F,makeArray; diff --git a/runtime/runtime.c b/runtime/runtime.c index 9b2c7c352..0d5a0dcda 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -509,7 +509,7 @@ static void stringcat (void *p) { } extern int LmatchSubString (char *subj, char *patt, int pos) { - data *p = TO_DATA(patt); + data *p = TO_DATA(patt), *s = TO_DATA(subj); int n; ASSERT_STRING("matchSubString:1", subj); @@ -517,6 +517,9 @@ extern int LmatchSubString (char *subj, char *patt, int pos) { ASSERT_UNBOXED("matchSubString:3", pos); n = LEN (p->tag); + + if (n + UNBOX(pos) > LEN(s->tag)) + return BOX(0); return BOX(strncmp (subj + UNBOX(pos), patt, n) == 0); } @@ -554,6 +557,12 @@ extern void* Lsubstring (void *subj, int p, int l) { extern struct re_pattern_buffer *Lregexp (char *regexp) { struct re_pattern_buffer *b = (struct re_pattern_buffer*) malloc (sizeof (struct re_pattern_buffer)); + + b->translate = 0; + b->fastmap = 0; + b->buffer = 0; + b->allocated = 0; + int n = (int) re_compile_pattern (regexp, strlen (regexp), b); if (n != 0) { @@ -643,14 +652,13 @@ void *Lclone (void *p) { } # define HASH_DEPTH 3 -# define HASH_APPEND(acc, x) (((acc + (unsigned) x) << (WORD_SIZE / 2)) | \ - ((acc + (unsigned) x) >> (WORD_SIZE / 2))) +# define HASH_APPEND(acc, x) (((acc + (unsigned) x) << (WORD_SIZE / 2)) | ((acc + (unsigned) x) >> (WORD_SIZE / 2))) int inner_hash (int depth, unsigned acc, void *p) { if (depth > HASH_DEPTH) return acc; if (UNBOXED(p)) return HASH_APPEND(acc, UNBOX(p)); - else { + else if (is_valid_heap_pointer (p)) { data *a = TO_DATA(p); int t = TAG(a->tag), l = LEN(a->tag), i; @@ -662,7 +670,8 @@ int inner_hash (int depth, unsigned acc, void *p) { char *p = a->contents; while (*p) { - acc = HASH_APPEND(acc, (int) *p++); + int n = (int) *p++; + acc = HASH_APPEND(acc, n); } return acc; @@ -697,6 +706,7 @@ int inner_hash (int depth, unsigned acc, void *p) { return acc; } + else return HASH_APPEND(acc, p); } extern void* LstringInt (char *b) { @@ -1187,6 +1197,26 @@ extern void* Lsprintf (char * fmt, ...) { return s; } +extern void* LgetEnv (char *var) { + char *e = getenv (var); + void *s; + + if (e == NULL) + return BOX(0); + + __pre_gc (); + + s = Bstring (e); + + __post_gc (); + + return s; +} + +extern int Lsystem (char *cmd) { + return BOX (system (cmd)); +} + extern void Lfprintf (FILE *f, char *s, ...) { va_list args = (va_list) BOX (NULL); diff --git a/spec/06.standard_library.tex b/spec/06.standard_library.tex index 896ca1d81..eb0c12b1f 100644 --- a/spec/06.standard_library.tex +++ b/spec/06.standard_library.tex @@ -28,7 +28,7 @@ name of the executable itself).} \descr{\lstinline|fun stringcat (list)|}{Takes a list of strings and returns the concatenates all its elements.} \descr{\lstinline|fun matchSubString (subj, patt, pos)|}{Takes two strings "\lstinline|subj|" and "\lstinline|patt|" and integer position "\lstinline|pos|" and -checks if a substring of "\lstinline|subj|" starting at position "\lstinline|pos|" is equal to "\lstinline|patt|"; returns integer value.} +checks if a substring of "\lstinline|subj|" starting at position "\lstinline|pos|" is equal to "\lstinline|patt|"; returns integer value, treated as a boolean.} \descr{\lstinline|fun sprintf (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}) and a variable number of arguments and returns a string, acquired via processing these arguments according to the format string. Note: indexed arguments are not supported.} @@ -84,6 +84,11 @@ is automatically created and closed within the call.} \descr{\lstinline|fun failure (fmt, ...)|}{Takes a format string (as per GNU C Library~\cite{GNUCLib}, and a variable number of parameters, prints these parameters according to the format string on the standard error and exits. Note: indexed arguments are not supported.)} +\descr{\lstinline|fun system (cmd)|}{Executes a command in a shell. The argument is a string representing a command.} + +\descr{\lstinline|fun getEnv (name)|}{Returns a value for an environment variable "\lstinline|name|". The argument is a string, the +return value is either "\lstinline|0|" (if not environment variable with given name is set), or a string value.} + \section{Unit \texttt{Array}} \label{sec:array} @@ -301,9 +306,9 @@ diagnostic purposes.} \descr{\lstinline|fun initMatcher (buf)|}{Takes a string argument and returns a fresh matcher.} -\descr{\lstinline|fun show (m)|}{Returns a printable representation for a matcher "\lstinline|m|" (for debugging purposes).} +\descr{\lstinline|fun showMatcher (m)|}{Returns a printable representation for a matcher "\lstinline|m|" (for debugging purposes).} -\descr{\lstinline|fun endOf (m)|}{Tests if the matcher "\lstinline|m|" reached the end of string. Return value represents parsing +\descr{\lstinline|fun endOfMatcher (m)|}{Tests if the matcher "\lstinline|m|" reached the end of string. Return value represents parsing result as per "\lstinline|Ostap|".} \descr{\lstinline|fun matchString (m, s)|}{Tests if a matcher "\lstinline|m|" at current position matches the string "\lstinline|s|". @@ -312,6 +317,10 @@ Return value represents parsing result as per "\lstinline|Ostap|".} \descr{\lstinline|fun matchRegexp (m, r)|}{Tests if a matcher "\lstinline|m|" at current position matches the regular expression "\lstinline|r|", which has to be constructed using the function "\lstinline|createRegexp|". Return value represents parsing result as per "\lstinline|Ostap|".} +\descr{\lstinline|fun getLine (m)|}{Gets a line number for the current position of matcher "\lstinline|m|".} + +\descr{\lstinline|fun getCol (m)|}{Gets a column number for the current position of matcher "\lstinline|m|".} + \section{Unit \texttt{Ostap}} Unit "\lstinline|Ostap|" implements monadic parser combinators in continuation-passing style with memoization~\cite{MonPC,MemoParsing,Meerkat}. diff --git a/src/version.ml b/src/version.ml index 5483d03b6..3cac5b0f6 100644 --- a/src/version.ml +++ b/src/version.ml @@ -1 +1 @@ -let version = "Version 1.00, 50fc2e9f, Fri Feb 28 18:32:19 2020 +0300" +let version = "Version 1.00, 78305d22, Sun Mar 8 00:57:25 2020 +0300" diff --git a/stdlib/Collection.lama b/stdlib/Collection.lama index b3623b021..f10f6e55b 100644 --- a/stdlib/Collection.lama +++ b/stdlib/Collection.lama @@ -7,25 +7,60 @@ import List; import Ref; +fun printColl (m) { + fun inner (off, curr) { + printf (off); + case curr of + {} -> printf ("** nil **\n") + | MNode (k, v, b, l, r) -> + printf ("** key = %s, bf = %d **\n", k.string, b); + printf (off); + printf (" values :\n"); + iter (fun ([x, _]) {printf (off); printf (" %s\n", x.string)}, v); + inner (" " ++ off, l); + inner (" " ++ off, r) + esac + } + + inner ("", m) +} + +fun makeCompare (sort) { + case sort of + Hash -> fun (x, y) { + if x == y then 0 + elif x < y then -1 + else 1 + fi + } + | _ -> compare + esac +} + fun insertColl (m, pk, v, sort) { - local k = case sort of Hash -> hash (pk) | _ -> pk esac; + local compareKeys = makeCompare (sort), + k = case sort of Hash -> hash (pk) | _ -> pk esac; fun append (v, vs) { case sort of Map -> v : vs | Set -> v - | Hash -> [pk, v] : vs + | Hash -> + case find (fun (x) {compare (x, [pk, v]) == 0}, vs) of + Some (_) -> vs + | None -> [pk, v] : vs + esac esac } fun rot (left, node) { if left - then case node of - MNode (k, v, _, l, MNode (rk, rv, _, ll, rr)) -> + then case node of + MNode (k, v, b, l, MNode (rk, rv, rb, ll, rr)) -> MNode (rk, rv, 0, MNode (k, v, 0, l, ll), rr) esac else case node of - MNode (k, v, _, MNode (lk, lv, _, ll, rr), r) -> + MNode (k, v, b, MNode (lk, lv, lb, ll, rr), r) -> MNode (lk, lv, 0, ll, MNode (k, v, 0, rr, r)) esac fi @@ -37,7 +72,7 @@ fun insertColl (m, pk, v, sort) { case m of {} -> [true, MNode (k, append (v, {}), 0, {}, {})] | MNode (kk, vv, bf, l, r) -> - local c = compare (k, kk); + local c = compareKeys (k, kk); if c == 0 then [false, MNode (kk, append (v, vv), bf, l, r)] else if c < 0 @@ -76,13 +111,14 @@ fun insertColl (m, pk, v, sort) { } fun findColl (m, pk, sort) { - local k = case sort of Hash -> hash (pk) | _ -> pk esac; + local compareKeys = makeCompare (sort), + k = case sort of Hash -> hash (pk) | _ -> pk esac; fun extract (vv) { case sort of Map -> case vv of v : _ -> Some (v) | _ -> None esac | Set -> Some (vv) - | Hash -> case find (fun (x) {x.fst == pk}, vv) of + | Hash -> case find (fun (x) {compare (x.fst, pk) == 0}, vv) of Some (p) -> Some (p.snd) | None -> None esac @@ -93,7 +129,7 @@ fun findColl (m, pk, sort) { case m of {} -> None | MNode (kk, vv, _, l, r) -> - local c = compare (k, kk); + local c = compareKeys (k, kk); if c == 0 then extract (vv) else inner (if c < 0 then l else r fi) @@ -105,7 +141,8 @@ fun findColl (m, pk, sort) { } fun removeColl (m, pk, sort) { - local k = case sort of Hash -> hash (pk) | _ -> pk esac; + local compareKeys = makeCompare (sort), + k = case sort of Hash -> hash (pk) | _ -> pk esac; fun delete (vs) { case sort of @@ -119,7 +156,7 @@ fun removeColl (m, pk, sort) { case m of {} -> m | MNode (kk, vv, bf, l, r) -> - local c = compare (k, kk); + local c = compareKeys (k, kk); if c == 0 then MNode (kk, delete (vv), bf, l, r) else if c < 0 @@ -163,7 +200,7 @@ public fun validateColl (t) { if bf == lh - rh then 1 + if lh > rh then lh else rh fi - else failure ("Collection::validateColl: balance violation on key %s\n", k.string) + else 1 + if lh > rh then lh else rh fi -- failure ("Collection::validateColl: balance violation on key %s\n", k.string) fi else failure ("Collection::validateColl: order violation on key %s\n", k.string) fi diff --git a/stdlib/Matcher.lama b/stdlib/Matcher.lama index ce7f48cbb..65864fb3f 100644 --- a/stdlib/Matcher.lama +++ b/stdlib/Matcher.lama @@ -71,14 +71,16 @@ fun createMatcher (buf, pos, line, col) { [show, eof, matchString, - matchRegexp] + matchRegexp, + fun () {line}, + fun () {col}] } -public fun show (m) { +public fun showMatcher (m) { m [0] () } -public fun endOf (m) { +public fun endOfMatcher (m) { m [1] () } @@ -91,6 +93,16 @@ public fun matchRegexp (m, r) { m [3] (r) } +-- Gets a line number +public fun getLine (m) { + m [4] () +} + +-- Gets a column number +public fun getCol (m) { + m [5] () +} + -- Creates a fresh matcher from a string buffer public fun initMatcher (buf) { createMatcher (buf, 0, 1, 1) diff --git a/stdlib/Ostap.lama b/stdlib/Ostap.lama index 845b14b35..90c891742 100644 --- a/stdlib/Ostap.lama +++ b/stdlib/Ostap.lama @@ -25,17 +25,21 @@ public fun initOstap () { public fun memo (f) { f := lookupMemo (hct, f); - if log then printf ("Memoizing: %x=%s\n", f, f.string) fi; + if log then printf ("Memoizing %x=%s\n", f, f.string) fi; case findHashTab (deref (tab), f) of None -> if log then printf ("new table...\n") fi; tab ::= addHashTab (deref (tab), f, ref (emptyMap ())) + | Some (tt) -> skip esac; fun (k) { fun (s) { - local t = case findHashTab (deref (tab), f) of Some (t) -> t esac; + local t = + case findHashTab (deref (tab), f) of + Some (t) -> t + esac; if log then printf ("Applying memoized parser to %s\n", s.string) fi; case findMap (deref (t), s) of None -> @@ -65,14 +69,14 @@ public fun memo (f) { public fun token (x) { case x of - #string -> fun (k) {fun (s) {k $ matchString (s, x)}} - | _ -> fun (k) {fun (s) {k $ matchRegexp (s, x)}} + #string -> memo $ fun (k) {fun (s) {k $ matchString (s, x)}} + | _ -> memo $ fun (k) {fun (s) {k $ matchRegexp (s, x)}} esac } public fun eof (k) { fun (s) { - k (endOf (s)) + k (endOfMatcher (s)) } } @@ -81,6 +85,7 @@ public fun empty (k) { } public fun alt (a, b) { + memo $ fun (k) { fun (s) { if log then printf ("Running alt at %s\n", s.string) fi; @@ -91,6 +96,7 @@ public fun alt (a, b) { } public fun seq (a, b) { + memo $ fun (k) { fun (s) { if log then printf ("Running seq at %s\n", s.string) fi; @@ -129,13 +135,11 @@ public fun bypass (f) { public fun opt (a) {empty @ lift (None) | a @ fun (x) {Some (x)}} public fun rep0 (a) { - memo - (empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}}) + empty @ lift({}) | a |> fun (x) {rep0 (a) @ fun (as) {x : as}} } -public fun rep (a) { - --memo - (a |> (fun (x) {rep0 (a) @ fun (as) {x : as}})) +public fun rep (a) { + a |> (fun (x) {rep0 (a) @ fun (as) {x : as}}) } public fun listBy (item, sep) { diff --git a/stdlib/regression/test09.lama b/stdlib/regression/test09.lama index 6d1e8de4c..2c40da1bc 100644 --- a/stdlib/regression/test09.lama +++ b/stdlib/regression/test09.lama @@ -3,8 +3,6 @@ import Matcher; local a = token ("a"), acc; -initOstap (); - printf ("Parsing a*| against ""aa""... %s\n", parseString (rep0 (a) |> fun (x) {eof @ lift (x)}, "aa").string); printf ("Parsing a+| against ""aa""... %s\n", parseString (rep (a) |> fun (x) {eof @ lift (x)}, "aa").string); printf ("Parsing list(a)| against ""a""... %s\n", parseString (list (a) |> bypass (eof), "a").string);