This commit is contained in:
Dmitry Boulytchev 2018-11-15 17:41:10 +03:00
parent ff53a6ba7e
commit 29efb45353

143
doc/07.tex Normal file
View file

@ -0,0 +1,143 @@
\section{Arrays and strings}
An array can be represented as a pair: the length of the array and a mapping from indices to elements. If we denote
$\mathscr E$ the set of elements then the set of all arrays $\mathscr A (\mathscr E)$ can be defined as follows:
\[
\mathscr A(\mathscr E) = \mathbb N \times (\mathbb N \to \mathscr E)
\]
For an array $(n, f)$ we assume $\dom{f}=[0\,..\,n-1]$. An element selection function:
\[
\begin{array}{c}
[\bullet] : \mathscr A (\mathscr E) \to \mathbb N \to \mathscr E\\[2mm]
(n, f) [i] = \left\{
\begin{array}{rcl}
f\;i &, & i < n\\
\bot&,&\;\mbox{otherwise}
\end{array}
\right.
\end{array}
\]
\begin{comment}
A set of (ASCII) characters:
\[
\mathscr C = [0\,..\,255]
\]
A string is an array of characters:
\[
\mathscr S = \mathscr A (\mathscr C)
\]
Adding strings and arrays to the language changes the set of values the programs operate on:
\end{comment}
We represent arrays by \emph{references}. Thus, we introduce a (linearly) ordered set of locations
\[
\mathscr L = \{l_0, l_1, \dots\}
\]
Now, the set of all values the programs operate on can be described as follows:
\[
\mathscr V = \mathbb Z \uplus \mathscr L
\]
Here, every value is either an integer, or a reference (some location). The disjoint union ``$\uplus$'' makes it possible to
unambiguously discriminate between the shapes of each value. To access arrays, we introduce an abstraction of memory:
\[
\mathscr M = \mathscr L \to \mathscr A\,(\mathscr V)
\]
We now add two more components to the states: a memory function $\mu$ and the first free memory location $l_m$.
%Enriching the value set extends the definitions of state (which now has to map variable names to values) and
%stack (similarly), but does not affect the definition of input/output streams, read and write primitives, etc.
\subsection{Adding arrays/strings on expression level}
On expression level, abstractly/concretely:
\[
\begin{array}{rclll}
\mathscr E += & &\mathscr E \mathtt{[} \mathscr E \mathtt{]} & (\llang{$a$ [$e$]}) & \mbox{taking an element}\\
& \mid & \mathtt{[} \mathscr E^* \mathtt{]} & (\llang{[$e_1$, $e_2$,.., $e_k$]}) & \mbox{creating an array}\\
& \mid & \mathscr E \mathtt{.length} & ($e$\mathtt{.length}) & \mbox{taking the length}
\end{array}
\]
In addition, in a concrete syntax we supply two special forms for strings: \llang{'$x$'} as a denotation for integer code of ASCII symbol
$x$, and \llang{"..."}~--- a string constant.
The semantics of enriched expressions is modified as follows. First, we add two additional premises to the rule for binary operators:
\setsubarrow{_{\mathscr E}}
\[\trule{\begin{array}{c}
\withenv{\Phi}{\trans{c}{A}{c^\prime=\inbr{\_,\, \_,\, \_,\, a}}}\\
\withenv{\Phi}{\trans{c^\prime}{B}{\inbr{\sigma^{\prime\prime}, i^{\prime\prime}, o^{\prime\prime}, b}}}\\
a\in\mathbb Z,\,b\in\mathbb Z
\end{array}
}
{\withenv{\Phi}{\trans{c}{A\otimes B}{\inbr{\sigma^{\prime\prime}, i^{\prime\prime}, o^{\prime\prime}, a\oplus b}}}}
\ruleno{Binop}
\]
These two premises ensure that both operand expressions are evaluated into integer values. Second, we have to add the rules for new
kinds of expressions (see Figure~\ref{array_expressions}).
\begin{figure}
\[\trule{\begin{array}{c}
\withenv{\Phi}{\trans{c}{E}{c^\prime=\inbr{\_, \_, \_, l}}}\\
\withenv{\Phi}{\trans{c^\prime}{J}{\inbr{s^{\prime\prime}=\inbr{\_,\, \_,\, \_,\, \mu,\,\_},\, i^{\prime\prime},\, o^{\prime\prime},\, j}}}\\
l\in \mathscr L,\,(n,\,f)=\mu\,\;l\,j\in\mathbb N,\,j<n
\end{array}}
{\withenv{\Phi}{\trans{c}{E\,\mathtt{[}J\mathtt{]}}{\inbr{s^{\prime\prime}, i^{\prime\prime}, o^{\prime\prime}, f\,j}}}}
\ruleno{ArrayElement}
\]
\[\trule{\withenv{\Phi}{\trans{c_j}{e_j}{c_{j+1}=\inbr{\inbr{\sigma_g^{j+1},\,S^{j+1},\,\sigma_l^{j+1},\,\mu^{j+1},\,l_{m_{j+1}}}, i_{j+1}, o_{j+1}, a_j}}},\,j\in [0..k]}
{\withenv{\Phi}{\trans{c_0}{\mathtt{[}e_0, e_1,...,e_k\mathtt{]}}{\inbr{\inbr{\sigma_g^{j+1},\,S^{j+1},\,\sigma_l^{j+1},\,\mu^{j+1}[l_{m_{k+1}}\gets (k,\,\lambda i\,.\,a_i)],\, l_{m_{k+1}+1}}, i_{k+1}, o_{k+1}, l_{m_{k+1}}}}}}
\ruleno{ArrayConstructor}
\]
\[\trule{\begin{array}{c}
\withenv{\Phi}{\trans{c}{E}{\inbr{s^{\prime\prime}=\inbr{\_,\,\_,\,\_,\,\mu,\,\_}, i^{\prime\prime}, o^{\prime\prime}, l}}}\\
l\in\mathscr L,\,(n,\,f)=\mu\;l
\end{array}
}
{\withenv{\Phi}{\trans{c}{E\mathtt{.length}}{\inbr{s^{\prime\prime}, i^{\prime\prime}, o^{\prime\prime}, n}}}}
\ruleno{ArrayLength}
\]
\caption{Big-step Operational Semantics for Array Expressions}
\label{array_expressions}
\end{figure}
\subsection{Adding arrays/strings on statement level}
On statement level, we add the single construct:
\[
\mathscr S += \mathscr E \mathtt{[} \mathscr E \mathtt{]} \llang{:=} \mathscr E
\]
This construct is interpreted as an assignment to an element of an array. The semantics of this construct is described by the following rule:
\[
\trule{\setsubarrow{_{\mathscr E}}
\begin{array}{c}
\withenv{\Phi}{\trans{c}{E}{c^\prime=\inbr{\_,\_,\_, (n,\,f)}}}\\
\withenv{\Phi}{\trans{c^\prime}{J}{c^{\prime\prime}=\inbr{\_,\_,\_,j}}}\\
\withenv{\Phi}{\trans{c^{\prime\prime}}{F}{c^{\prime\prime\prime}=\inbr{s^{\prime\prime},i^{\prime\prime},o^{\prime\prime},y}}}\\
j\in\mathbb N,\,j<n\\
\setsubarrow{}\withenv{\llang{skip},\,\Phi}{\trans{\inbr{\mathtt{update}(s^{\prime\prime}, E, (n,\,f [j\gets y])),i^{\prime\prime},o^{\prime\prime},\hbox{---}}}{K}{\widetilde{c}}}
\end{array}
}
{\setsubarrow{}\withenv{K,\,\Phi}{\trans{c}{E\mathtt{[}J\mathtt{]}\llang{:=}F}{\widetilde{c}}}}
\ruleno{ArrayAssign}
\]