lama_byterun/stdlib/Matcher.expr

177 lines
4.3 KiB
Text
Raw Normal View History

2020-01-03 01:38:49 +03:00
-- (C) Dmitry Boulytchev, St. Petersburg State University, JetBrains Research, 2020
-- Matcher: simple string matching library.
2019-12-31 00:59:28 +03:00
2020-01-03 01:38:49 +03:00
-- Create a regular expression representation.
-- Arguments:
-- r --- a string representation for regular expression (as per GNU regexp)
-- name --- a string describing the meaning of the expression in free form
-- (e.g. "identifier", "string constant", etc.), used for error
-- reporting
fun createRegexp (r, name) {
return [regexp (r), name]
}
-- Create an immutable matcher.
-- Arguments:
-- buf --- a string to match in
-- pos --- an integer beginning position to match from
-- line, col --- line and column numbers
-- This function is internal, do not use it directly.
-- To initially create a matcher use initMatcher function (see below).
fun matcherCreate (buf, pos, line, col) {
-- Shows a matcher in a readable form
2019-12-31 00:59:28 +03:00
fun show () {
return sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col)
}
2020-01-03 01:38:49 +03:00
-- Calculates the number of remaining unmatched characters in the buffer
2019-12-31 00:59:28 +03:00
fun rest () {
return buf.length - pos
}
2020-01-03 01:38:49 +03:00
2020-01-04 21:50:14 +03:00
-- Moves the position pointer on given number of characters.
2019-12-31 00:59:28 +03:00
fun shift (n) {
2020-01-04 21:50:14 +03:00
local i, l = line, c = col;
for i := pos, i < n, i := i+1 do
case buf [i] of
'\n' -> l := l + 1; c := 1
| '\t' -> c := c + 8
| _ -> c := c + 1
esac
od;
return matcherCreate (buf, pos + n, l, c)
2019-12-31 00:59:28 +03:00
}
2020-01-03 01:38:49 +03:00
2019-12-31 00:59:28 +03:00
fun matchString (s) {
return
if s.length > rest ()
2020-01-03 01:38:49 +03:00
then Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
elif matchSubString (buf, s, pos) then Succ (shift (s.length), s)
else Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
2019-12-31 00:59:28 +03:00
fi
}
2020-01-03 01:38:49 +03:00
fun matchRegexp (r) {
local n;
return
if (n := regexpMatch (r[0], buf, pos)) > 0
then Succ (shift (n), substring (buf, pos, n))
else Fail (sprintf ("%s expected at %d:%d", r[1], line, col))
fi
}
2020-01-04 21:50:14 +03:00
2019-12-31 00:59:28 +03:00
fun eof () {
return rest () == 0
}
return [
show,
eof,
2020-01-03 01:38:49 +03:00
matchString,
matchRegexp
2019-12-31 00:59:28 +03:00
]
}
fun show (m) {
2020-01-03 01:38:49 +03:00
return m [0] ()
2019-12-31 00:59:28 +03:00
}
2020-01-04 21:50:14 +03:00
fun endOf (m) {
2020-01-03 01:38:49 +03:00
return m [1] ()
2019-12-31 00:59:28 +03:00
}
fun matchString (m, s) {
return m [2] (s)
}
2020-01-04 21:50:14 +03:00
-- Matches against a regexp
2020-01-03 01:38:49 +03:00
fun matchRegexp (m, r) {
return m [3] (r)
}
2020-01-04 21:50:14 +03:00
-- Creates a fresh matcher from a string buffer
public fun matcherInit (buf) {
2020-01-03 01:38:49 +03:00
return matcherCreate (buf, 0, 1, 1)
2019-12-31 00:59:28 +03:00
}
2020-01-03 01:38:49 +03:00
--fun parse (a) {
--}
2020-01-04 21:50:14 +03:00
local m = matcherInit (" -- asdasdakm ,m.,msd .,m.,asd\n \n\n abc");
local
lident = createRegexp ("[a-z][a-zA-Z_]*", "lowercase identifier"),
uident = createRegexp ("[A-Z][a-zA-Z_]*", "uppercase identifier"),
ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace"),
str = createRegexp ("""\([^""]\|""""\)*""", "string literal"),
decimal = createRegexp ("[0-9]+", "decimal literal"),
chr = createRegexp ("'[^']'", "character literal");
fun token (s) {
return fun (m) {return m.matchString (s)}
}
fun lid (m) {
return m.matchRegexp (lident)
}
fun uid (m) {
return m.matchRegexp (uident)
}
fun const (m) {
return m.matchRegexp (decimal)
}
infixl "@" before "*" (p, f) {
return fun (m) {
return
case p (m) of
Succ (m, x) -> Succ (m, f (x))
| err -> err
esac
}
}
infixr "|>" after "!!" (l, r) {
return fun (m) {
return
case l (m) of
Succ (m, s) -> r (s) (m)
| err -> err
esac
}
}
infixr "||" after "|>" (l, r) {
return fun (m) {
return
case l (m) of
s@Succ (_, _) -> s
| err -> r (m)
esac
}
}
local expr = lid @ fun (s) {return Lid (s)} ||
const @ fun (s) {return Dec (s)},
assn = lid |> fun (id) {return token (":=") |> fun (s) {return expr @ fun (e) {return Assn (id, e)}}};
printf ("%s\n", assn (matcherInit ("x:=3")).string)
--local ident = createRegexp ("[a-z][a-zA-Z_]*", "identifier");
--local ws = createRegexp ("[ \n\t]+", "whitespace");
--local ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace");
--local str = createRegexp ("""\([^""]\|""""\)*""", "string literal");
--local lineComment = createRegexp ("--[^\n]*\n", "line comment");
--printf ("ws: %s\n", case m.matchRegexp (ws) of Succ (m, s) -> "(" ++ m.show ++ ", " ++ s ++ ")" | Fail (err) -> err.string esac);
2019-12-31 00:59:28 +03:00
2020-01-03 01:38:49 +03:00