Better chars

This commit is contained in:
Dmitry Boulytchev 2020-01-03 01:38:49 +03:00
parent cf2b696803
commit 27c091129a
4 changed files with 114 additions and 27 deletions

View file

@ -5,6 +5,9 @@ F,tl;
F,readLine;
F,stringcat;
F,matchSubString;
F,substring;
F,regexp;
F,regexpMatch;
F,sprintf;
F,makeString;
F,printf;

View file

@ -8,6 +8,7 @@
# include <sys/mman.h>
# include <assert.h>
# include <errno.h>
# include <regex.h>
# define __ENABLE_GC__
# ifndef __ENABLE_GC__
@ -295,6 +296,44 @@ extern int LmatchSubString (char *subj, char *patt, int pos) {
return BOX(strncmp (subj + UNBOX(pos), patt, n) == 0);
}
extern void* Lsubstring (void *subj, int p, int l) {
data *d = TO_DATA(subj);
int pp = UNBOX (p), ll = UNBOX (l);
if (pp + ll <= LEN(d->tag)) {
data *r;
__pre_gc ();
r = (data*) alloc (ll + 1 + sizeof (int));
r->tag = STRING_TAG | (ll << 3);
strncpy (r->contents, (char*) subj + pp, ll);
__post_gc ();
return r->contents;
}
failure ("substring: index out of bounds (position=%d, length=%d, subject length=%d)", pp, ll, LEN(d->tag));
}
extern struct re_pattern_buffer *Lregexp (char *regexp) {
struct re_pattern_buffer *b = (struct re_pattern_buffer*) malloc (sizeof (struct re_pattern_buffer));
int n = re_compile_pattern (regexp, strlen (regexp), b);
if (n != 0) {
failure ("%", strerror (n));
};
return b;
}
extern int LregexpMatch (struct re_pattern_buffer *b, char *s, int pos) {
return BOX (re_match (b, s, LEN(TO_DATA(s)->tag), UNBOX(pos), 0));
}
extern int Lcompare (void *p, void *q) {
# define COMPARE_AND_RETURN(x,y) do if (x != y) return BOX(x - y); while (0)
if (UNBOXED(p)) {
@ -630,7 +669,7 @@ extern void Bmatch_failure (void *v, char *fname, int line, int col) {
failure ("match failure at %s:%d:%d, value '%s'\n", fname, line, col, stringBuf.contents);
}
extern void* /*Lstrcat*/ i__Infix_4343 (void *a, void *b) {
extern void* /*Lstrcat*/ Li__Infix_4343 (void *a, void *b) {
data *da = (data*) BOX (NULL);
data *db = (data*) BOX (NULL);
data *d = (data*) BOX (NULL);
@ -642,10 +681,12 @@ extern void* /*Lstrcat*/ i__Infix_4343 (void *a, void *b) {
d = (data *) alloc (sizeof(int) + LEN(da->tag) + LEN(db->tag) + 1);
d->tag = LEN(da->tag) + LEN(db->tag);
d->tag = STRING_TAG | ((LEN(da->tag) + LEN(db->tag)) << 3);
strcpy (d->contents, da->contents);
strcat (d->contents, db->contents);
strncpy (d->contents , da->contents, LEN(da->tag));
strncpy (d->contents + LEN(da->tag), db->contents, LEN(db->tag));
d->contents[LEN(da->tag) + LEN(db->tag)] = 0;
__post_gc();

View file

@ -589,8 +589,12 @@ class env prg =
let rec iterate i =
if i < n
then (
if x.[i] = '"' then Buffer.add_string buf "\\\""
else Buffer.add_char buf x.[i];
(match x.[i] with
| '"' -> Buffer.add_string buf "\\\""
| '\n' -> Buffer.add_string buf "\n"
| '\t' -> Buffer.add_string buf "\t"
| c -> Buffer.add_char buf c
);
iterate (i+1)
)
in

View file

@ -1,24 +1,56 @@
-- Matcher library for Ostap
-- (C) Dmitry Boulytchev, St. Petersburg State University, JetBrains Research, 2020
-- Matcher: simple string matching library.
fun matcherCreate (pos, buf, line, col) {
-- Create a regular expression representation.
-- Arguments:
-- r --- a string representation for regular expression (as per GNU regexp)
-- name --- a string describing the meaning of the expression in free form
-- (e.g. "identifier", "string constant", etc.), used for error
-- reporting
fun createRegexp (r, name) {
return [regexp (r), name]
}
-- Create an immutable matcher.
-- Arguments:
-- buf --- a string to match in
-- pos --- an integer beginning position to match from
-- line, col --- line and column numbers
-- This function is internal, do not use it directly.
-- To initially create a matcher use initMatcher function (see below).
fun matcherCreate (buf, pos, line, col) {
-- Shows a matcher in a readable form
fun show () {
return sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col)
}
-- Calculates the number of remaining unmatched characters in the buffer
fun rest () {
return buf.length - pos
}
-- Moves the position pointer on given number of characters within one line (i.e.
fun shift (n) {
return matcherCreate (pos + n, buf, line, col + n)
return matcherCreate (buf, pos + n, line, col + n)
}
fun matchString (s) {
return
if s.length > rest ()
then None
elif matchSubString (buf, s, pos) then Some (shift (s.length))
else None
then Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
elif matchSubString (buf, s, pos) then Succ (shift (s.length), s)
else Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
fi
}
fun matchRegexp (r) {
local n;
return
if (n := regexpMatch (r[0], buf, pos)) > 0
then Succ (shift (n), substring (buf, pos, n))
else Fail (sprintf ("%s expected at %d:%d", r[1], line, col))
fi
}
@ -29,32 +61,39 @@ fun matcherCreate (pos, buf, line, col) {
return [
show,
eof,
matchString
matchString,
matchRegexp
]
}
fun show (m) {
return m [0]
return m [0] ()
}
fun eof (m) {
return m [1]
return m [1] ()
}
fun matchString (m, s) {
return m [2] (s)
}
fun matcherInit (buf) {
return matcherCreate (0, buf, 1, 1)
fun matchRegexp (m, r) {
return m [3] (r)
}
fun matcherInit (buf) {
return matcherCreate (buf, 0, 1, 1)
}
--fun parse (a) {
--}
local m = matcherInit ("abc");
printf ("%s", m.show ());
printf ("eof: %s\n", m.eof ().string);
printf ("matchString(""u""): %s\n", case m.matchString ("u") of Some (m) -> m.show () | _ -> "None" esac);
printf ("matchString(""a""): %s\n", case m.matchString ("a") of Some (m) -> m.show () | _ -> "None" esac);
printf ("matchString(""ab""): %s\n", case m.matchString ("ab") of Some (m) -> m.show () | _ -> "None" esac);
printf ("matchString(""abc""): %s\n", case m.matchString ("abc") of Some (m) -> m.show () | _ -> "None" esac);
printf ("matchString(""abcd""): %s\n", case m.matchString ("abcd") of Some (m) -> m.show () | _ -> "None" esac)
local ident = createRegexp ("[a-z][a-zA-Z_]*", "identifier");
local ws = createRegexp ("[ \n\t]+", "whitespace");
local str = createRegexp ("""\([^""]\|""""\)*""", "string literal");
printf ("ident: %s\n", case m.matchRegexp (ident) of Succ (m, s) -> "(" ++ m.show ++ ", " ++ s ++ ")" | Fail (err) -> err.string esac)