mirror of
https://github.com/ProgramSnail/Lama.git
synced 2025-12-16 11:48:47 +00:00
Continue Spec
This commit is contained in:
parent
34eed3c71a
commit
a79cb93cf1
8 changed files with 314 additions and 70 deletions
110
doc/spec/03.01.lexical_structure.tex
Normal file
110
doc/spec/03.01.lexical_structure.tex
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
\section{Lexical Structure}
|
||||
|
||||
The character set for the language is \textsc{ASCII}, case-sensitive. In the following lexical description we will use
|
||||
the GNU Regexp syntax~\cite{GNULib} in lexical definitions.
|
||||
|
||||
\subsection{Whitespaces and Comments}
|
||||
|
||||
Whitespaces and comments are \textsc{ASCII} sequences which serve as delimiters for other tokens but otherwise are
|
||||
ignored.
|
||||
|
||||
The following characters are treated as whitespaces:
|
||||
|
||||
\begin{itemize}
|
||||
\item blank character "\texttt{ }";
|
||||
\item newline character "\texttt{\textbackslash n}";
|
||||
\item tabulation character "\texttt{\textbackslash t}".
|
||||
\end{itemize}
|
||||
|
||||
Additionally, two kinds of comments are recognized:
|
||||
|
||||
\begin{itemize}
|
||||
\item end-of-line comment "\texttt{--}" escapes the rest of the line, including itself;
|
||||
\item block comment "\texttt{(*} ... \texttt{*)}" escapes all the text between
|
||||
"\texttt{(*}" and "\texttt{*)}".
|
||||
\end{itemize}
|
||||
|
||||
There is a number of specific cases which have to be considered explicitly.
|
||||
|
||||
First, block comments can be properly nested. Then, the occurencies of comment symbols inside string literals (see below) do not
|
||||
considered as comments.
|
||||
|
||||
End-of-line comment encountered \emph{outside} of a block comment escapes block comment symbols:
|
||||
|
||||
\begin{lstlisting}
|
||||
-- the following symbols are not considered as a block comment: (*
|
||||
-- same here: *)
|
||||
\end{lstlisting}
|
||||
|
||||
Similarly, an end-of-line comment encountered inside a block comment is escaped:
|
||||
|
||||
\begin{lstlisting}
|
||||
(* Block comment starts here ...
|
||||
-- and ends here: *)
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Identifiers and Constants}
|
||||
|
||||
The language distinguishes identifiers, signed decimal literals, string and character literals (see Fig.~\ref{idents_and_consts}). There are
|
||||
two kinds of identifiers: those beginning with uppercase characters (\token{UIDENT}) and lowercase characters (\token{LIDENT}).
|
||||
|
||||
String literals cannot span multiple lines; a blockquote character (") inside a string literal has to be doubled to prevent from
|
||||
being considered as this literal's delimiter.
|
||||
|
||||
Character literals as a rule are comprised of a single \textsc{ASCII} character; if this character is a quote (') it has to be doubled. Additionally
|
||||
two-character abbreviations "\textbackslash t" and "\textbackslash n" are recognized and converted into a single-character representation.
|
||||
|
||||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcl}
|
||||
\token{UIDENT} & = &\mbox{\texttt{[A-Z][a-zA-Z\_0-9]*}}\\
|
||||
\token{LIDENT} & = &\mbox{\texttt{[a-z][a-zA-Z\_0-9]*}}\\
|
||||
\token{DECIMAL}& = &\mbox{\texttt{-?[0-9]+}}\\
|
||||
\token{STRING} & = &\mbox{\texttt{"([\^{}\textbackslash"]|"")*"}}\\
|
||||
\token{CHAR} & = &\mbox{\texttt{'([\^{}']|''|\textbackslash n|\textbackslash t)'}}
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Identifiers and constants}
|
||||
\label{idents_and_consts}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\subsection{Keywords}
|
||||
|
||||
The following identifiers are reserved for keywords:
|
||||
|
||||
\begin{lstlisting}
|
||||
after array at before boxed case do elif else
|
||||
esac false fi for fun if import infix infixl
|
||||
infixr length local od of public repeat return sexp
|
||||
skip string string then true unboxed until when while
|
||||
\end{lstlisting}
|
||||
|
||||
\subsection{Infix Operators}
|
||||
|
||||
Infix operators defined as follows:
|
||||
|
||||
\[
|
||||
\token{INFIX}=\mbox{\texttt{[+*/\%\$\#@!|\&\^{}~?<>:=\textbackslash-]+}}
|
||||
\]
|
||||
|
||||
There is a predefined set of built-in infix operators (see~\ref{binary_expressions}); additionally
|
||||
an end-used can define custom infix operators (see~\ref{custom_infix}). Note, sometimes
|
||||
additional whitespaces are required to disambiguate infix operator applications. For example, if a
|
||||
custom infix operator "\lstinline|+-|" is defined, then the expression "\lstinline|a +- b|" can no longer be
|
||||
considered as "\lstinline|a +(-b)|". Note also that a custom operator "\lstinline|--|" can not be
|
||||
defined due to lexical conventions.
|
||||
|
||||
\subsection{Delimiters}
|
||||
|
||||
The following symbols are treated as delimiters:
|
||||
|
||||
\begin{lstlisting}
|
||||
. , ( ) { }
|
||||
; # ->
|
||||
\end{lstlisting}
|
||||
|
||||
Despite custom infix operators can coincide with delimiters "\lstinline|#|" and "\lstinline|->|" they can
|
||||
never clash as both these delimiters can not be encountered in expressions.
|
||||
|
||||
|
||||
21
doc/spec/03.02.compilation_units.tex
Normal file
21
doc/spec/03.02.compilation_units.tex
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcl}
|
||||
\defterm{compilationUnit} & : & \nonterm{import}^\star\s\nonterm{scopeExpression}\\
|
||||
\defterm{import} & : & \term{import}\s\token{UIDENT}\s\term{;}
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Compilation unit concrete syntax}
|
||||
\label{compilation_unit}
|
||||
\end{figure}
|
||||
|
||||
\section{Compilation Units}
|
||||
|
||||
Compilation unit is a minimal structure recognized by a parser. An application can contain multiple units, compiled separatedly.
|
||||
In order to use other units they have to be imported. In particular, the standard library is comprized of a number of precompiled units,
|
||||
which can be imported by an end-user application.
|
||||
|
||||
The concrete syntax for compilation unit is show on Fig.~\ref{compilation_unit}. Besides optional imports a unit must contain
|
||||
a \nonterm{scopeExpression}, which may contain some definitions and computations. Note, a unit can not be empty. The computations described in
|
||||
a unit are performed at unit initialization time (see~\ref{separate_compilation}).
|
||||
|
||||
105
doc/spec/03.03.scope_expressions.tex
Normal file
105
doc/spec/03.03.scope_expressions.tex
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcl}
|
||||
\defterm{scopeExpression} & : & \nonterm{definition}^\star\s\nonterm{expression}\\
|
||||
\defterm{definition} & : & \nonterm{variableDefinition}\alt\nonterm{functionDefinition}\alt\nonterm{infixDefinition}\\
|
||||
\defterm{variableDefinition} & : & (\s\term{local}\alt\term{public}\s)\s\nonterm{variableDefinitionSequence}\s\term{;}\\
|
||||
\defterm{variableDefinitionSequence} & : & \nonterm{variableDefinitionSequenceItem}\s(\s\term{,}\s\nonterm{variableDefinitionSequenceItem}\s)^\star\\
|
||||
\defterm{variableDefinitionSequenceItem} & : & \token{LIDENT}\s[\s\term{=}\s\nonterm{basicExpression}\s]\\
|
||||
\defterm{functionDefinition} & : & [\s\term{public}\s]\s\term{fun}\s\token{LIDENT}\s\term{(}\s\nonterm{functionArguments}\s\term{)}\s\nonterm{functionBody}\\
|
||||
\defterm{functionArguments} & : & [\s\token{LIDENT}\s(\s\term{,}\s\token{LIDENT}\s)^\star\s]\\
|
||||
\defterm{functionBody} & : & \term{\{}\s\nonterm{scopeExpression}\s\term{\}}
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Scope expression concrete syntax}
|
||||
\label{scope_expression}
|
||||
\end{figure}
|
||||
|
||||
\section{Scope Expressions}
|
||||
|
||||
Scope expressions provide a mean to put expressions is a scoped context. The definitions in scoped expressions comprise of function definitions and
|
||||
variable definitions (see Fig.~\ref{scope_expression}). For example:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x, y, z; -- variable definitions
|
||||
|
||||
fun id (x) {x} -- function definition
|
||||
\end{lstlisting}
|
||||
|
||||
As scope expressions are expressions, they can be nested:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x;
|
||||
|
||||
{ -- nested scope begins here
|
||||
local y;
|
||||
skip
|
||||
} -- nested scope ends here
|
||||
\end{lstlisting}
|
||||
|
||||
The definitions on the top-level of compilation unit can be tagged as ``\lstinline|public|'', in which case they are exported and become visible by
|
||||
other units which import the given one. Nested scopes can not contain public definitions.
|
||||
|
||||
The nesting relation has the shape of a tree, and in a concrete node of the tree all definitions in all enclosing scopes are visible:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x;
|
||||
|
||||
{local y;
|
||||
{local z;
|
||||
skip -- x, y, and z are visible here
|
||||
};
|
||||
{local t;
|
||||
skip -- x, y, and t are visible here
|
||||
};
|
||||
skip -- x and y are visible here
|
||||
};
|
||||
skip -- only x is visible here
|
||||
\end{lstlisting}
|
||||
|
||||
Multiple definitions of the same name in the same scope are prohibited:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x;
|
||||
fun x () {0} -- error
|
||||
\end{lstlisting}
|
||||
|
||||
However, a definition is a nested scope can override a definition in an enclosing one:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x;
|
||||
|
||||
{
|
||||
fun x () {0} -- ok
|
||||
skip -- here x is associated with the function
|
||||
};
|
||||
|
||||
skip -- here x is asociated with the variable
|
||||
\end{lstlisting}
|
||||
|
||||
A function can freely use all visible definitions; in particular, functions defined in the
|
||||
same scope can be mutually recursive:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x;
|
||||
fun f () {0}
|
||||
|
||||
{
|
||||
fun g () {f () + h () + y} -- ok
|
||||
fun h () {g () + x} -- ok
|
||||
local y;
|
||||
skip
|
||||
};
|
||||
skip
|
||||
\end{lstlisting}
|
||||
|
||||
A variable, defined in a scope, can be attributed with an expression, calcualting its initial value.
|
||||
These expressions, however, are evaluated in the order of variable declaration. Thus, while
|
||||
technically it is possible to have forward references in the initialization expression, their
|
||||
behaviour is undefined. For example:
|
||||
|
||||
\begin{lstlisting}
|
||||
local x = y + 2; -- undefined, as y is not yet initialized at this point
|
||||
local y = x + 2;
|
||||
skip
|
||||
\end{lstlisting}
|
||||
45
doc/spec/03.04.expressions.tex
Normal file
45
doc/spec/03.04.expressions.tex
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcll}
|
||||
\defterm{expression} & : & \nonterm{basicExpression}\s(\s\term{;}\s\nonterm{expression}\s)&\\
|
||||
\defterm{basicExpression} & : & \nonterm{binaryExpression}&\\
|
||||
\defterm{binaryExpression} & : & \nonterm{binaryOperand}\s\token{INFIX}\s\nonterm{binaryOperand}&\alt\\
|
||||
& & \nonterm{binaryOperand}&\\
|
||||
\defterm{binaryOperand} & : & \nonterm{binaryExpression}&\alt\\
|
||||
& & [\s\term{-}\s]\s\nonterm{postfixExpression}&\\
|
||||
\defterm{postfixExpression} & : & \nonterm{primary}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{(}\s[\s\nonterm{expression}\s(\s\term{,}\s\nonterm{expression}\s)^\star\s]\s\term{)}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{[}\s\nonterm{expression}\s\term{]}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{.}\s\term{length}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{.}\s\term{string}&\\
|
||||
\defterm{primary} & : & \token{DECIMAL}&\alt\\
|
||||
& & \token{STRING}&\alt\\
|
||||
& & \token{CHAR}&\alt\\
|
||||
& & \token{LIDENT}&\alt\\
|
||||
& & \term{true}&\alt\\
|
||||
& & \term{false}&\alt\\
|
||||
& & \term{infix}\s\token{INFIX}&\alt\\
|
||||
& & \term{skip}&\alt\\
|
||||
& & \term{return}\s[\s\nonterm{basicExpression}\s]&\alt\\
|
||||
& & \term{fun}\s\term{(}\s\nonterm{functionArguments}\s\term{)}\s\nonterm{functionBody}&\alt\\
|
||||
& & \term{\{}\s\nonterm{scopeExpression}\s\term{\}}&\alt\\
|
||||
& & \nonterm{listExpression}&\alt\\
|
||||
& & \nonterm{arrayExpression}&\alt\\
|
||||
& & \nonterm{S-expression}&\alt\\
|
||||
& & \nonterm{ifExpression}&\alt\\
|
||||
& & \nonterm{whileExpression}&\alt\\
|
||||
& & \nonterm{repeatExpression}&\alt\\
|
||||
& & \nonterm{forExpression}&\alt\\
|
||||
& & \nonterm{caseExpression}&\alt\\
|
||||
& & \term{(}\s\nonterm{expression}\s\term{)}&
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Expression concrete syntax}
|
||||
\label{expressions}
|
||||
\end{figure}
|
||||
|
||||
\section{Expressions}
|
||||
\label{sec:expressions}
|
||||
|
||||
Expressions
|
||||
|
||||
|
|
@ -1,71 +1,26 @@
|
|||
\chapter{Concrete Syntax}
|
||||
|
||||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcl}
|
||||
\defterm{compilationUnit} & : & \nonterm{import}^\star\s\nonterm{scopeExpression}\\
|
||||
\defterm{import} & : & \term{import}\s\token{UIDENT}\s\term{;}
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Compilation unit concrete syntax}
|
||||
\end{figure}
|
||||
In this chapter we describe the concrete syntax of the language as it is recognized by the parser. In the
|
||||
syntactic description we will use extended Backus-Naur form with the following conventions:
|
||||
|
||||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcll}
|
||||
\defterm{expression} & : & \nonterm{basicExpression}\s(\s\term{;}\s\nonterm{expression}\s)&\\
|
||||
\defterm{basicExpression} & : & \nonterm{binaryExpression}&\\
|
||||
\defterm{binaryExpression} & : & \nonterm{binaryOperand}\s\token{INFIX}\s\nonterm{binaryOperand}&\alt\\
|
||||
& & \nonterm{binaryOperand}&\\
|
||||
\defterm{binaryOperand} & : & \nonterm{binaryExpression}&\alt\\
|
||||
& & [\s\term{-}\s]\s\nonterm{postfixExpression}&\\
|
||||
\defterm{postfixExpression} & : & \nonterm{primary}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{(}\s[\s\nonterm{expression}\s(\s\term{,}\s\nonterm{expression}\s)^\star\s]\s\term{)}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{[}\s\nonterm{expression}\s\term{]}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{.}\s\term{length}&\alt\\
|
||||
& & \nonterm{postfixExpression}\s\term{.}\s\term{string}&\\
|
||||
\begin{itemize}
|
||||
\item nonterminals are presented in \nonterm{italics};
|
||||
\item concrete terminals are \term{grayed out};
|
||||
\item classes of terminals are \token{CAPITALIZED};
|
||||
\item a postfix ``$^\star$'' designates zero-or-more repetitions;
|
||||
\item square brackets ``$[\dots]$'' designate zero-or-one repetition;
|
||||
\item round brackets ``$(\dots)$'' are used for grouping;
|
||||
\item alteration is denoted by ``$\alt$'', sequencing by juxaposition;
|
||||
\item a colon ``$:$'' separates a nonterminal being defined from its definition.
|
||||
\end{itemize}
|
||||
|
||||
\defterm{primary} & : & \token{DECIMAL}&\alt\\
|
||||
& & \token{STRING}&\alt\\
|
||||
& & \token{CHAR}&\alt\\
|
||||
& & \token{LIDENT}&\alt\\
|
||||
& & \term{true}&\alt\\
|
||||
& & \term{false}&\alt\\
|
||||
& & \term{infix}\s\token{INFIX}&\alt\\
|
||||
& & \term{skip}&\alt\\
|
||||
& & \term{fun}\s\term{(}\s\nonterm{functionArguments}\s\term{)}\s\nonterm{functionBody}&\alt\\
|
||||
& & \term{\{}\s\nonterm{scopeExpression}\s\term{\}}&\alt\\
|
||||
& & \nonterm{listExpression}&\alt\\
|
||||
& & \nonterm{arrayExpression}&\alt\\
|
||||
& & \nonterm{S-expression}&\alt\\
|
||||
& & \nonterm{ifExpression}&\alt\\
|
||||
& & \nonterm{whileExpression}&\alt\\
|
||||
& & \nonterm{repeatExpression}&\alt\\
|
||||
& & \nonterm{forExpression}&\alt\\
|
||||
& & \nonterm{caseExpression}&\alt\\
|
||||
& & \term{(}\s\nonterm{expression}\s\term{)}&
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Expression concrete syntax}
|
||||
\end{figure}
|
||||
|
||||
|
||||
\begin{figure}[t]
|
||||
\[
|
||||
\begin{array}{rcl}
|
||||
\defterm{scopeExpression} & : & \nonterm{definition}^\star\s\nonterm{expression}\\
|
||||
\defterm{definition} & : & \nonterm{variableDefinition}\alt\nonterm{functionDefinition}\alt\nonterm{infixDefinition}\\
|
||||
\defterm{variableDefinition} & : & (\s\term{local}\alt\term{public}\s)\s\nonterm{variableDefinitionSequence}\s\term{;}\\
|
||||
\defterm{variableDefinitionSequence} & : & \nonterm{variableDefinitionSequenceItem}\s(\s\term{,}\s\nonterm{variableDefinitionSequenceItem}\s)^\star\\
|
||||
\defterm{variableDefinitionSequenceItem} & : & \token{LIDENT}\s[\s\term{=}\s\nonterm{basicExpression}\s]\\
|
||||
\defterm{functionDefinition} & : & [\s\term{public}\s]\s\term{fun}\s\token{LIDENT}\s\term{(}\s\nonterm{functionArguments}\s\term{)}\s\nonterm{functionBody}\\
|
||||
\defterm{functionArguments} & : & [\s\token{LIDENT}\s(\s\term{,}\s\token{LIDENT}\s)^\star\s]\\
|
||||
\defterm{functionBody} & : & \term{\{}\s\nonterm{scopeExpression}\s\term{\}}
|
||||
\end{array}
|
||||
\]
|
||||
\caption{Scope expression concrete syntax}
|
||||
\end{figure}
|
||||
In the description below we will take an in-line code samples in blockquotes "..." which are not considered as a
|
||||
part of concrete syntax.
|
||||
|
||||
\input{03.01.lexical_structure}
|
||||
\input{03.02.compilation_units}
|
||||
\input{03.03.scope_expressions}
|
||||
\input{03.04.expressions}
|
||||
|
||||
\begin{figure}[t]
|
||||
\[
|
||||
|
|
|
|||
6
doc/spec/spec.bib
Normal file
6
doc/spec/spec.bib
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
@manual{GNULib,
|
||||
title = "{The GNU Portability Library}",
|
||||
organization = "{Free Software Foundation}",
|
||||
bibdate = "February 24, 2019",
|
||||
bibsource = "https://www.gnu.org/software/gnulib/manual"
|
||||
}
|
||||
|
|
@ -86,30 +86,32 @@
|
|||
\newcommand{\alt}{\s\mid\s}
|
||||
\newcommand{\s}{\:\:}
|
||||
|
||||
\lstdefinelanguage{lama}{
|
||||
keywords={fun, case, esac, do, od, if, then, else, elif, fi, skip, repeat, until, for, local},
|
||||
\lstdefinelanguage{alm}{
|
||||
keywords={skip,if,then,else,elif,fi,while,do,od,repeat,until,for,fun,local,public,return,import,length,
|
||||
string,case,of,esac,when,boxed,unboxed,string,sexp,array,infix,infixl,infixr,at,before,after,true,false},
|
||||
sensitive=true,
|
||||
%basicstyle=\small,
|
||||
commentstyle=\scriptsize\rmfamily,
|
||||
basicstyle=\small,
|
||||
%commentstyle=\scriptsize\rmfamily,
|
||||
keywordstyle=\ttfamily\bfseries,
|
||||
identifierstyle=\ttfamily,
|
||||
basewidth={0.5em,0.5em},
|
||||
columns=fixed,
|
||||
fontadjust=true,
|
||||
literate={->}{{$\to$}}3,
|
||||
morecomment=[s]{(*}{*)}
|
||||
morecomment=[s][\ttfamily]{(*}{*)},
|
||||
morecomment=[l][\ttfamily]{--}
|
||||
}
|
||||
|
||||
\lstset{
|
||||
mathescape=true,
|
||||
%basicstyle=\small,
|
||||
basicstyle=\small,
|
||||
identifierstyle=\ttfamily,
|
||||
keywordstyle=\bfseries,
|
||||
commentstyle=\scriptsize\rmfamily,
|
||||
basewidth={0.5em,0.5em},
|
||||
fontadjust=true,
|
||||
escapechar=!,
|
||||
language=lama
|
||||
language=alm
|
||||
}
|
||||
|
||||
\sloppy
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue