-- (C) Dmitry Boulytchev, St. Petersburg State University, JetBrains Research, 2020 -- Matcher: simple string matching library. -- Create a regular expression representation. -- Arguments: -- r --- a string representation for regular expression (as per GNU regexp) -- name --- a string describing the meaning of the expression in free form -- (e.g. "identifier", "string constant", etc.), used for error -- reporting fun createRegexp (r, name) { return [regexp (r), name] } -- Create an immutable matcher. -- Arguments: -- buf --- a string to match in -- pos --- an integer beginning position to match from -- line, col --- line and column numbers -- This function is internal, do not use it directly. -- To initially create a matcher use initMatcher function (see below). fun matcherCreate (buf, pos, line, col) { -- Shows a matcher in a readable form fun show () { return sprintf ("buf : %-40s\npos : %d\nline: %d\ncol : %d\n", buf, pos, line, col) } -- Calculates the number of remaining unmatched characters in the buffer fun rest () { return buf.length - pos } -- Moves the position pointer on given number of characters. fun shift (n) { local i, l = line, c = col; for i := pos, i < n, i := i+1 do case buf [i] of '\n' -> l := l + 1; c := 1 | '\t' -> c := c + 8 | _ -> c := c + 1 esac od; return matcherCreate (buf, pos + n, l, c) } fun matchString (s) { return if s.length > rest () then Fail (sprintf ("""%s"" expected at %d:%d", s, line, col)) elif matchSubString (buf, s, pos) then Succ (shift (s.length), s) else Fail (sprintf ("""%s"" expected at %d:%d", s, line, col)) fi } fun matchRegexp (r) { local n; return if (n := regexpMatch (r[0], buf, pos)) > 0 then Succ (shift (n), substring (buf, pos, n)) else Fail (sprintf ("%s expected at %d:%d", r[1], line, col)) fi } fun eof () { return rest () == 0 } return [ show, eof, matchString, matchRegexp ] } fun show (m) { return m [0] () } fun endOf (m) { return m [1] () } fun matchString (m, s) { return m [2] (s) } -- Matches against a regexp fun matchRegexp (m, r) { return m [3] (r) } -- Creates a fresh matcher from a string buffer public fun matcherInit (buf) { return matcherCreate (buf, 0, 1, 1) } --fun parse (a) { --} local m = matcherInit (" -- asdasdakm ,m.,msd .,m.,asd\n \n\n abc"); local lident = createRegexp ("[a-z][a-zA-Z_]*", "lowercase identifier"), uident = createRegexp ("[A-Z][a-zA-Z_]*", "uppercase identifier"), ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace"), str = createRegexp ("""\([^""]\|""""\)*""", "string literal"), decimal = createRegexp ("[0-9]+", "decimal literal"), chr = createRegexp ("'[^']'", "character literal"); fun token (s) { return fun (m) {return m.matchString (s)} } fun lid (m) { return m.matchRegexp (lident) } fun uid (m) { return m.matchRegexp (uident) } fun const (m) { return m.matchRegexp (decimal) } infixl "@" before "*" (p, f) { return fun (m) { return case p (m) of Succ (m, x) -> Succ (m, f (x)) | err -> err esac } } infixr "|>" after "!!" (l, r) { return fun (m) { return case l (m) of Succ (m, s) -> r (s) (m) | err -> err esac } } infixr "||" after "|>" (l, r) { return fun (m) { return case l (m) of s@Succ (_, _) -> s | err -> r (m) esac } } local expr = lid @ fun (s) {return Lid (s)} || const @ fun (s) {return Dec (s)}, assn = lid |> fun (id) {return token (":=") |> fun (s) {return expr @ fun (e) {return Assn (id, e)}}}; printf ("%s\n", assn (matcherInit ("x:=3")).string) --local ident = createRegexp ("[a-z][a-zA-Z_]*", "identifier"); --local ws = createRegexp ("[ \n\t]+", "whitespace"); --local ws = createRegexp ("\\([ \t\n]\\|--[^\n]*\n\\)*", "whitespace"); --local str = createRegexp ("""\([^""]\|""""\)*""", "string literal"); --local lineComment = createRegexp ("--[^\n]*\n", "line comment"); --printf ("ws: %s\n", case m.matchRegexp (ws) of Succ (m, s) -> "(" ++ m.show ++ ", " ++ s ++ ")" | Fail (err) -> err.string esac);