mirror of
https://github.com/ProgramSnail/Lama.git
synced 2025-12-06 14:58:50 +00:00
176 lines
4.3 KiB
Text
176 lines
4.3 KiB
Text
-- (C) Dmitry Boulytchev, St. Petersburg State University, JetBrains Research, 2020
|
|
-- Matcher: simple string matching library.
|
|
|
|
-- Create a regular expression representation.
|
|
-- Arguments:
|
|
-- r --- a string representation for regular expression (as per GNU regexp)
|
|
-- name --- a string describing the meaning of the expression in free form
|
|
-- (e.g. "identifier", "string constant", etc.), used for error
|
|
-- reporting
|
|
fun createRegexp (r, name) {
|
|
return [regexp (r), name]
|
|
}
|
|
|
|
-- Create an immutable matcher.
|
|
-- Arguments:
|
|
-- buf --- a string to match in
|
|
-- pos --- an integer beginning position to match from
|
|
-- line, col --- line and column numbers
|
|
-- This function is internal, do not use it directly.
|
|
-- To initially create a matcher use initMatcher function (see below).
|
|
fun matcherCreate (buf, pos, line, col) {
|
|
-- Shows a matcher in a readable form
|
|
fun show () {
|
|
return sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col)
|
|
}
|
|
|
|
-- Calculates the number of remaining unmatched characters in the buffer
|
|
fun rest () {
|
|
return buf.length - pos
|
|
}
|
|
|
|
-- Moves the position pointer on given number of characters.
|
|
fun shift (n) {
|
|
local i, l = line, c = col;
|
|
|
|
for i := pos, i < n, i := i+1 do
|
|
case buf [i] of
|
|
'\n' -> l := l + 1; c := 1
|
|
| '\t' -> c := c + 8
|
|
| _ -> c := c + 1
|
|
esac
|
|
od;
|
|
|
|
return matcherCreate (buf, pos + n, l, c)
|
|
}
|
|
|
|
fun matchString (s) {
|
|
return
|
|
if s.length > rest ()
|
|
then Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
|
|
elif matchSubString (buf, s, pos) then Succ (shift (s.length), s)
|
|
else Fail (sprintf ("""%s"" expected at %d:%d", s, line, col))
|
|
fi
|
|
}
|
|
|
|
fun matchRegexp (r) {
|
|
local n;
|
|
|
|
return
|
|
if (n := regexpMatch (r[0], buf, pos)) > 0
|
|
then Succ (shift (n), substring (buf, pos, n))
|
|
else Fail (sprintf ("%s expected at %d:%d", r[1], line, col))
|
|
fi
|
|
}
|
|
|
|
fun eof () {
|
|
return rest () == 0
|
|
}
|
|
|
|
return [
|
|
show,
|
|
eof,
|
|
matchString,
|
|
matchRegexp
|
|
]
|
|
}
|
|
|
|
fun show (m) {
|
|
return m [0] ()
|
|
}
|
|
|
|
fun endOf (m) {
|
|
return m [1] ()
|
|
}
|
|
|
|
fun matchString (m, s) {
|
|
return m [2] (s)
|
|
}
|
|
|
|
-- Matches against a regexp
|
|
fun matchRegexp (m, r) {
|
|
return m [3] (r)
|
|
}
|
|
|
|
-- Creates a fresh matcher from a string buffer
|
|
public fun matcherInit (buf) {
|
|
return matcherCreate (buf, 0, 1, 1)
|
|
}
|
|
|
|
--fun parse (a) {
|
|
|
|
--}
|
|
|
|
local m = matcherInit (" -- asdasdakm ,m.,msd .,m.,asd\n \n\n abc");
|
|
|
|
local
|
|
lident = createRegexp ("[a-z][a-zA-Z_]*", "lowercase identifier"),
|
|
uident = createRegexp ("[A-Z][a-zA-Z_]*", "uppercase identifier"),
|
|
ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace"),
|
|
str = createRegexp ("""\([^""]\|""""\)*""", "string literal"),
|
|
decimal = createRegexp ("[0-9]+", "decimal literal"),
|
|
chr = createRegexp ("'[^']'", "character literal");
|
|
|
|
fun token (s) {
|
|
return fun (m) {return m.matchString (s)}
|
|
}
|
|
|
|
fun lid (m) {
|
|
return m.matchRegexp (lident)
|
|
}
|
|
|
|
fun uid (m) {
|
|
return m.matchRegexp (uident)
|
|
}
|
|
|
|
fun const (m) {
|
|
return m.matchRegexp (decimal)
|
|
}
|
|
|
|
infixl "@" before "*" (p, f) {
|
|
return fun (m) {
|
|
return
|
|
case p (m) of
|
|
Succ (m, x) -> Succ (m, f (x))
|
|
| err -> err
|
|
esac
|
|
}
|
|
}
|
|
|
|
infixr "|>" after "!!" (l, r) {
|
|
return fun (m) {
|
|
return
|
|
case l (m) of
|
|
Succ (m, s) -> r (s) (m)
|
|
| err -> err
|
|
esac
|
|
}
|
|
}
|
|
|
|
infixr "||" after "|>" (l, r) {
|
|
return fun (m) {
|
|
return
|
|
case l (m) of
|
|
s@Succ (_, _) -> s
|
|
| err -> r (m)
|
|
esac
|
|
}
|
|
}
|
|
|
|
local expr = lid @ fun (s) {return Lid (s)} ||
|
|
const @ fun (s) {return Dec (s)},
|
|
assn = lid |> fun (id) {return token (":=") |> fun (s) {return expr @ fun (e) {return Assn (id, e)}}};
|
|
|
|
printf ("%s\n", assn (matcherInit ("x:=3")).string)
|
|
|
|
--local ident = createRegexp ("[a-z][a-zA-Z_]*", "identifier");
|
|
--local ws = createRegexp ("[ \n\t]+", "whitespace");
|
|
|
|
--local ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace");
|
|
|
|
--local str = createRegexp ("""\([^""]\|""""\)*""", "string literal");
|
|
--local lineComment = createRegexp ("--[^\n]*\n", "line comment");
|
|
|
|
--printf ("ws: %s\n", case m.matchRegexp (ws) of Succ (m, s) -> "(" ++ m.show ++ ", " ++ s ++ ")" | Fail (err) -> err.string esac);
|
|
|
|
|