lama_byterun/doc/lectures/07.tex
2019-11-19 16:53:30 +03:00

155 lines
6.1 KiB
TeX

\section{Arrays and strings}
An array can be represented as a pair: the length of the array and a mapping from indices to elements. If we denote
$\mathscr E$ the set of elements then the set of all arrays $\mathscr A (\mathscr E)$ can be defined as follows:
\[
\mathscr A(\mathscr E) = \mathbb N \times (\mathbb N \to \mathscr E)
\]
For an array $(n, f)$ we assume $\dom{f}=[0\,..\,n-1]$. An element selection function:
\[
\begin{array}{c}
\bullet[\bullet] : \mathscr A (\mathscr E) \to \mathbb N \to \mathscr E\\[2mm]
(n, f) [i] = \left\{
\begin{array}{rcl}
f\;i &, & i < n\\
\bot&,&\;\mbox{otherwise}
\end{array}
\right.
\end{array}
\]
We represent arrays by \emph{references}. Thus, we introduce a (linearly) ordered set of locations
\[
\mathscr L = \{l_0, l_1, \dots\}
\]
Now, the set of all values the programs operate on can be described as follows:
\[
\mathscr V = \mathbb Z \uplus \mathscr L
\]
Here, every value is either an integer, or a reference (some location). The disjoint union ``$\uplus$'' makes it possible to
unambiguously discriminate between the shapes of each value. To access arrays, we introduce an abstraction of memory:
\[
\mathscr M = \mathscr L \to \mathscr A\,(\mathscr V)
\]
We now add two more components to the configurations: a memory function $\mu$ and the first free memory location $l_m$, and
define the following primitive:
\[
\primi{mem}{\inbr{s,\,\mu,\,l_m,\,i,\,o,\,v}}=\mu
\]
which gives a memory function from a configuration.
\subsection{Adding arrays on expression level}
On expression level, abstractly/concretely:
\[
\begin{array}{rclll}
\mathscr E += & &\mathscr E \mathtt{[} \mathscr E \mathtt{]} & (\llang{$a$ [$e$]}) & \mbox{taking an element}\\
& \mid & \mathtt{[} \mathscr E^* \mathtt{]} & (\llang{[$e_1$, $e_2$,.., $e_k$]}) & \mbox{creating an array}\\
& \mid & \mathscr E \mathtt{.length} & ($e$\mathtt{.length}) & \mbox{taking the length}
\end{array}
\]
%In addition, in a concrete syntax we supply two special forms for strings: \llang{'$x$'} as a denotation for integer code of ASCII symbol
%$x$, and \llang{"..."}~--- a string constant.
The semantics of enriched expressions is modified as follows. First, we add two additional premises to the rule for binary operators:
\setsubarrow{_{\mathscr E}}
\arraycolsep=10pt
\[\trule{\begin{array}{cc}
\withenv{\Phi}{\trans{c}{A}{c^\prime}} & \withenv{\Phi}{\trans{c^\prime}{B}{c^{\prime\prime}}}\\
\primi{val}{c^\prime}\in\mathbb Z & \primi{val}{c^{\prime\prime}}\in\mathbb Z
\end{array}
}
{\withenv{\Phi}{\trans{c}{A\otimes B}{\primi{ret}{c^{\prime\prime}\;(\primi{val}{c^\prime}\oplus \primi{val}{c^{\prime\prime}})}}}}
\ruleno{Binop}
\]
These two premises ensure that both operand expressions are evaluated into integer values. Second, we have to add the rules for new
kinds of expressions (see Figure~\ref{array_expressions}).
\begin{figure}
\arraycolsep=10pt
\[\trule{\begin{array}{cc}
\withenv{\Phi}{\trans{c}{e}{c^\prime}} &\withenv{\Phi}{\trans{c^\prime}{j}{c^{\prime\prime}}}\\
l=\primi{val}{c^\prime} &j=\primi{val}{c^{\prime\prime}}\\
l\in\mathscr L &j\in\mathbb N\\
(n,\,f)=\primi{mem}{l} &j<n
\end{array}}
{\withenv{\Phi}{\trans{c}{e\,\mathtt{[}j\mathtt{]}}{\primi{ret}{c^{\prime\prime}(f\;j)}}}}
\ruleno{ArrayElement}
\]
\vskip5mm
\[\trule{\begin{array}{c}
\withenv{\Phi}{\trans{c_j}{e_j}{c_{j+1}}},\,j\in [0..k]\\
\inbr{s,\,\mu,\,l_m,\,i,\,o,\,\_}=c_{k+1}
\end{array}
}
{\withenv{\Phi}{\trans{c_0}{\mathtt{[}e_0, e_1,...,e_k\mathtt{]}}{\inbr{s,\,\mu\,[l_m\gets(k+1,\,\lambda n.\primi{val}{c_n})],\,l_{m+1},\,i,\,o,\,l_m}}}}
\ruleno{Array}
\]
\vskip5mm
\[\trule{\begin{array}{c}
\withenv{\Phi}{\trans{c}{e}{c^\prime}}\\
l=\primi{val}{c^\prime}\\
l\in\mathscr L\\
(n,\,f)=(\primi{mem}{c^\prime})\;l
\end{array}
}
{\withenv{\Phi}{\trans{c}{e\mathtt{.length}}{\primi{return}{c^\prime\;n}}}}
\ruleno{ArrayLength}
\]
\caption{Big-step Operational Semantics for Array Expressions}
\label{array_expressions}
\end{figure}
\subsection{Adding arrays on statement level}
On statement level, we add the single construct:
\[
\mathscr S += \mathscr E \mathtt{[} \mathscr E \mathtt{]} \llang{:=} \mathscr E
\]
This construct is interpreted as an assignment to an element of an array. The semantics of this construct is described by the following rule:
\[
\trule{\setsubarrow{_{\mathscr E}}
\arraycolsep=10pt
\begin{array}{ccc}
\withenv{\Phi}{\trans{c}{e}{c^\prime}} & \withenv{\Phi}{\trans{c^\prime}{j}{c^{\prime\prime}}} & \withenv{\Phi}{\trans{c^{\prime\prime}}{g}{\inbr{s,\mu,l_m,i,o,v}}}\\
l=\primi{val}{c^\prime} & i=\primi{val}{c^{\prime\prime}} & \\
l\in\mathscr L & i\in\mathbb N & \\[3mm]
\multicolumn{3}{c}{(n,\,f)=\mu\;l}\\
\multicolumn{3}{c}{i<n}\\
\multicolumn{3}{c}{\setsubarrow{}\withenv{\llang{skip},\,\Phi}{\trans{\inbr{s,\,\mu\,[l\gets (n,\,f\,[i\gets x])],\,l_m,\,i,\,o,\,\hbox{---}}}{K}{\widetilde{c}}}}
\end{array}
}
{\setsubarrow{}\withenv{K,\,\Phi}{\trans{c}{e\mathtt{[}j\mathtt{]}\llang{:=}g}{\widetilde{c}}}}
\ruleno{ArrayAssign}
\]
\subsection{Strings}
With arrays in our hands, we can easily add strings as arrays of characters. In fact, on the source language the strings can be
introduced as a syntactic extension:
\begin{enumerate}
\item we add a character constants \llang{'c'} as a shortcut for their integer codes;
\item we add a string literals \llang{"abcd..."} as a shortcut for arrays \llang{['a', 'b', 'c', 'd', ...]}.
\end{enumerate}
Nothing else has to be done~--- now we have mutable reference-representable strings.