2020-01-03 01:38:49 +03:00
|
|
|
-- (C) Dmitry Boulytchev, St. Petersburg State University, JetBrains Research, 2020
|
|
|
|
|
-- Matcher: simple string matching library.
|
2019-12-31 00:59:28 +03:00
|
|
|
|
2020-01-03 01:38:49 +03:00
|
|
|
-- Create a regular expression representation.
|
|
|
|
|
-- Arguments:
|
|
|
|
|
-- r --- a string representation for regular expression (as per GNU regexp)
|
|
|
|
|
-- name --- a string describing the meaning of the expression in free form
|
|
|
|
|
-- (e.g. "identifier", "string constant", etc.), used for error
|
|
|
|
|
-- reporting
|
2020-01-14 17:08:35 +03:00
|
|
|
public fun createRegexp (r, name) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var l = [regexp (r), name];
|
2020-03-21 13:05:14 +03:00
|
|
|
l
|
2020-01-03 01:38:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-- Create an immutable matcher.
|
|
|
|
|
-- Arguments:
|
|
|
|
|
-- buf --- a string to match in
|
|
|
|
|
-- pos --- an integer beginning position to match from
|
|
|
|
|
-- line, col --- line and column numbers
|
|
|
|
|
-- This function is internal, do not use it directly.
|
|
|
|
|
-- To initially create a matcher use initMatcher function (see below).
|
2020-01-21 22:03:11 +03:00
|
|
|
fun createMatcher (buf, pos, line, col) {
|
2020-01-03 01:38:49 +03:00
|
|
|
-- Shows a matcher in a readable form
|
2019-12-31 00:59:28 +03:00
|
|
|
fun show () {
|
2020-01-05 03:46:19 +03:00
|
|
|
sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col)
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
2020-01-03 01:38:49 +03:00
|
|
|
|
|
|
|
|
-- Calculates the number of remaining unmatched characters in the buffer
|
2019-12-31 00:59:28 +03:00
|
|
|
fun rest () {
|
2020-01-05 03:46:19 +03:00
|
|
|
buf.length - pos
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
2020-01-03 01:38:49 +03:00
|
|
|
|
2020-01-04 21:50:14 +03:00
|
|
|
-- Moves the position pointer on given number of characters.
|
2019-12-31 00:59:28 +03:00
|
|
|
fun shift (n) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var i, l = line, c = col;
|
2020-08-25 23:21:39 +03:00
|
|
|
|
|
|
|
|
for i := pos, i < pos+n, i := i+1 do
|
2020-01-04 21:50:14 +03:00
|
|
|
case buf [i] of
|
|
|
|
|
'\n' -> l := l + 1; c := 1
|
|
|
|
|
| '\t' -> c := c + 8
|
|
|
|
|
| _ -> c := c + 1
|
|
|
|
|
esac
|
|
|
|
|
od;
|
|
|
|
|
|
2020-01-21 22:03:11 +03:00
|
|
|
createMatcher (buf, pos + n, l, c)
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
2020-01-03 01:38:49 +03:00
|
|
|
|
2019-12-31 00:59:28 +03:00
|
|
|
fun matchString (s) {
|
2022-09-13 09:19:28 +03:00
|
|
|
fun min (x, y) {
|
|
|
|
|
if x < y then x else y fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-- printf ("Matching string %s against %s...\n", s, substring (buf, pos, min (10, buf.length - pos)));
|
|
|
|
|
|
2020-01-05 03:46:19 +03:00
|
|
|
if s.length > rest ()
|
2020-01-21 22:03:11 +03:00
|
|
|
then Fail (sprintf ("""%s"" expected", s), line, col)
|
|
|
|
|
elif matchSubString (buf, s, pos) then Succ (s, shift (s.length))
|
|
|
|
|
else Fail (sprintf ("""%s"" expected at", s), line, col)
|
2020-01-05 03:46:19 +03:00
|
|
|
fi
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-03 01:38:49 +03:00
|
|
|
fun matchRegexp (r) {
|
2021-01-31 22:25:31 +03:00
|
|
|
var n;
|
2022-09-13 09:19:28 +03:00
|
|
|
|
|
|
|
|
fun min (x, y) {
|
|
|
|
|
if x < y then x else y fi
|
|
|
|
|
}
|
2020-01-03 01:38:49 +03:00
|
|
|
|
2022-11-01 09:22:19 +03:00
|
|
|
-- printf ("Matching regexp %x against %s...\n", r, substring (buf, pos, min (10, buf.length - pos)));
|
2022-09-13 09:19:28 +03:00
|
|
|
|
2020-03-08 00:57:25 +03:00
|
|
|
if (n := regexpMatch (r[0], buf, pos)) >= 0
|
2020-01-21 22:03:11 +03:00
|
|
|
then Succ (substring (buf, pos, n), shift (n))
|
|
|
|
|
else Fail (sprintf ("%s expected", r[1]), line, col)
|
2020-01-05 03:46:19 +03:00
|
|
|
fi
|
2020-01-03 01:38:49 +03:00
|
|
|
}
|
2020-01-04 21:50:14 +03:00
|
|
|
|
2019-12-31 00:59:28 +03:00
|
|
|
fun eof () {
|
2020-01-21 22:03:11 +03:00
|
|
|
if rest () == 0
|
|
|
|
|
then Succ ("", shift (0))
|
|
|
|
|
else Fail ("EOF expected", line, col)
|
|
|
|
|
fi
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-05 03:46:19 +03:00
|
|
|
[show,
|
|
|
|
|
eof,
|
|
|
|
|
matchString,
|
2020-03-13 19:41:14 +03:00
|
|
|
matchRegexp,
|
|
|
|
|
fun () {line},
|
|
|
|
|
fun () {col}]
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-03-13 19:41:14 +03:00
|
|
|
public fun showMatcher (m) {
|
2020-01-05 03:46:19 +03:00
|
|
|
m [0] ()
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-03-13 19:41:14 +03:00
|
|
|
public fun endOfMatcher (m) {
|
2020-01-05 03:46:19 +03:00
|
|
|
m [1] ()
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-14 17:08:35 +03:00
|
|
|
public fun matchString (m, s) {
|
2020-01-05 03:46:19 +03:00
|
|
|
m [2] (s)
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|
|
|
|
|
|
2020-01-04 21:50:14 +03:00
|
|
|
-- Matches against a regexp
|
2020-01-14 17:08:35 +03:00
|
|
|
public fun matchRegexp (m, r) {
|
2020-01-05 03:46:19 +03:00
|
|
|
m [3] (r)
|
2020-01-03 01:38:49 +03:00
|
|
|
}
|
|
|
|
|
|
2020-03-13 19:41:14 +03:00
|
|
|
-- Gets a line number
|
|
|
|
|
public fun getLine (m) {
|
|
|
|
|
m [4] ()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
-- Gets a column number
|
|
|
|
|
public fun getCol (m) {
|
|
|
|
|
m [5] ()
|
|
|
|
|
}
|
|
|
|
|
|
2020-01-04 21:50:14 +03:00
|
|
|
-- Creates a fresh matcher from a string buffer
|
2020-01-21 22:03:11 +03:00
|
|
|
public fun initMatcher (buf) {
|
|
|
|
|
createMatcher (buf, 0, 1, 1)
|
2019-12-31 00:59:28 +03:00
|
|
|
}
|