Merge branch 'e2e4b6b7-1.30' into 1.30

This commit is contained in:
Dmitry Boulytchev 2024-08-28 20:53:48 +03:00
commit c8a56b714b
64 changed files with 6234 additions and 3976 deletions

View file

@ -1,12 +1,10 @@
name: Build
name: Main workflow
on:
pull_request:
push:
paths-ignore:
- 'README.md'
branches:
- '1.10'
permissions: read-all
jobs:
build:
@ -14,27 +12,25 @@ jobs:
fail-fast: false
matrix:
os:
#- macos-latest
- ubuntu-20.04
#- windows-latest
ocaml-version:
#- 4.11.0
- 4.10.1
#- 4.09.1
#- 4.08.1
- ubuntu-latest
- macos-latest
ocaml-compiler:
- 4.13.1
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Use OCaml ${{ matrix.ocaml-version }}
uses: avsm/setup-ocaml@v1
- name: Checkout tree
uses: actions/checkout@v3
- name: Set-up OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v2
with:
ocaml-version: ${{ matrix.ocaml-version }}
ocaml-compiler: ${{ matrix.ocaml-compiler }}
- run: opam pin add Lama.dev . --no-action
- run: opam depext Lama.dev --yes --with-test
- run: opam install . --deps-only --with-test
- run: opam exec -- make #dune build
- run: opam exec -- make regression # dune runtest
- run: eval $(opam env)
- run: opam exec -- make
- run: opam exec -- make regression

1
.gitignore vendored
View file

@ -5,3 +5,4 @@
*.o
.merlin
.vscode

View file

@ -1,18 +1,23 @@
opam-version: "2.0"
version: "1.10"
version: "1.30"
synopsis: "Lama programming system"
synopsis: "Lama programming language"
maintainer: "dboulytchev@gmail.com"
authors: "dboulytchev@gmail.com"
homepage: "https://github.com/JetBrains-Research/Lama"
bug-reports: "https://github.com/JetBrains-Research/Lama/issues"
authors: [
"Dmitry Boulytchev <dboulytchev@gmail.com>"
"Daniil Berezun <danya.berezun@gmail.com>"
"Egor Sheremetov <egor.sheremetov.dev@gmail.com>"
]
homepage: "https://github.com/PLTools/Lama"
bug-reports: "https://github.com/PLTools/Lama/issues"
depends: [
"ocaml" { >= "4.07.1" }
"ocaml" { >= "4.13.1" }
"ocamlfind" { build }
"camlp5" { >= "8.00.05" }
"ostap" { >= "0.5"}
"GT" { >= "0.5.0" }
"GT" { >= "0.5.1" }
"posix-uname" { = "2.0.2" }
]
build: [
@ -21,11 +26,7 @@ build: [
]
install: [make "install"]
depexts: [
["gcc-multilib"] {os-family = "debian"}
]
dev-repo: "git+https://github.com/JetBrains-Research/Lama.git"
dev-repo: "git+https://github.com/PLTools/Lama.git"
url {
src: "git+https://github.com/JetBrains-Research/Lama.git#1.10+ocaml4.10"
src: "git+https://github.com/PLTools/Lama.git#1.30"
}

View file

@ -1,17 +1,21 @@
EXECUTABLE = src/lamac
INSTALL ?= install -v
MKDIR ?= mkdir
BUILDDIR = build
.PHONY: all regression
all:
$(MAKE) -C src
$(MAKE) -C runtime
$(MAKE) -C byterun
$(MAKE) -C stdlib
STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i)
build: all
mkdir -p $(BUILDDIR)
cp -r runtime/Std.i runtime/runtime.a stdlib/* src/lamac $(BUILDDIR)
install: all
$(INSTALL) $(EXECUTABLE) `opam var bin`
$(MKDIR) -p `opam var share`/Lama
@ -21,13 +25,24 @@ uninstall:
$(RM) -r `opam var share`/Lama
$(RM) `opam var bin`/$(EXECUTABLE)
regression-all: regression regression-expressions
regression:
$(MAKE) clean check -C regression
$(MAKE) clean check -C stdlib/regression
$(MAKE) clean check -j8 -C regression
$(MAKE) clean check -j8 -C stdlib/regression
regression-expressions:
$(MAKE) clean check -j8 -C regression/expressions
$(MAKE) clean check -j8 -C regression/deep-expressions
negative_scenarios_tests:
$(MAKE) -C runtime negative_tests
clean:
$(MAKE) clean -C src
$(MAKE) clean -C runtime
$(MAKE) clean -C stdlib
$(MAKE) clean -C regression
$(MAKE) clean -C byterun
$(MAKE) clean -C bench
rm -rf $(BUILDDIR)

View file

@ -1,11 +1,9 @@
| Lama 1.10 | Lama-devel 1.10 |
| -------------------- | -------------------------- |
| [![Lama 1.10][1]][2] | [![Lama-devel 1.10][3]][4] |
| Lama 1.3 |
|---------------------|
| [![Lama 1.3][1]][2] |
[1]: https://github.com/JetBrains-Research/Lama/workflows/Build/badge.svg?branch=1.10
[2]: https://github.com/JetBrains-Research/Lama/actions
[3]: https://github.com/JetBrains-Research/Lama-devel/workflows/Build/badge.svg?branch=1.10
[4]: https://github.com/JetBrains-Research/Lama-devel/actions
[1]: https://github.com/PLTools/Lama/Lama/workflows/Build/badge.svg?branch=1.30
[2]: https://github.com/PLTools/Lama/Lama/actions
# Lama
@ -26,13 +24,12 @@ The name ![lama](lama.svg) is an acronym for *Lambda-Algol* since the language h
The main purpose of ![lama](lama.svg) is to present a repertoire of constructs with certain runtime behavior and relevant implementation techniques.
The lack of a type system (a vital feature for a real-world language
for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent.
for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent.
On the other hand the language can be used in the future as a raw substrate to apply various ways of software verification (including type systems).
The current implementation contains a native code compiler for **x86-32**, written in **OCaml**, a runtime library with garbage-collection support, written in **C**, and a small standard library, written in ![lama](lama.svg) itself.
The native code compiler uses **gcc** as a toolchain.
The current implementation contains a native code compiler for **x86-64**, written in **OCaml**, a runtime library with garbage-collection support, written in **C**, and a small standard library, written in ![lama](lama.svg) itself.
In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine.
In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine.
The stack machine code can in turn be either interpreted on a stack machine interpreter, or used as an intermediate representation by the native code compiler.
## Language Specification
@ -41,27 +38,24 @@ The language specification can be found [here](lama-spec.pdf).
## Installation
Supported target: GNU/Linux x86_32 (x86_64 by running 32-bit mode)
***Mac*** users should use either a virtual machine or docker with a Linux distributive inside.
Supported target: GNU/Linux x86_64, MacOS x86_64 (arm using Rosetta).
***Windows*** users should get Windows Subsystem for Linux a.k.a WSL (recommended) or cygwin.
Ubuntu-based variant of WSL is recommended.
* System-wide prerequisites:
- `gcc-multilib`
For example, (for Debian-based GNU/Linux):
```bash
$ sudo apt install gcc-multilib
- Linux: `gcc`
For example, (for Debian-based Linux):
```bash
sudo apt install gcc
```
On some versions, you need to install the additional package `lib32gcc-9-dev` in case of errors like
```
/usr/bin/ld: cannot find -lgcc
/usr/bin/ld: skipping incompatible /usr/lib/gcc/x86_64-linux-gnu/9/libgcc.a when searching for -lgcc
```
- MacOS: `clang`
Should be automatically installed with developer tools.
- [opam](http://opam.ocaml.org) (>= 2.0.4)
- [OCaml](http://ocaml.org) (>= 4.10.1). *Optional* because it can be easily installed through opam.
Compiler variant with `flambda` switch is recommended.
@ -73,22 +67,18 @@ Ubuntu-based variant of WSL is recommended.
1. Install the right [switch](https://opam.ocaml.org/doc/Manual.html#Switches) for the OCaml compiler
```bash
# for fresh opam
$ opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda
# for old opam
$ opam switch create lama ocaml-variants.4.10.1+flambda
opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda
```
* In the above command:
- `opam switch create` is a subcommand to create a new switch
- `ocaml-variants.4.10.1+flambda` is the name of a standard template for the switch
- `ocaml-variants.4.14.0+options` is the name of a standard template for the switch
- `lama` is an alias for the switch being created; on success a directory `$(HOME)/.opam/lama` should be created
2. Update PATH variable for the fresh switch. (You can add these commands to your `~/.bashrc` for convenience but they should be added by `opam`)
```bash
$ export OPAMSWITCH=lama
$ eval $(opam env)
eval $(opam env --switch=lama --set-switch)
```
* Check that the OCaml compiler is now available in PATH by running `which ocamlc`; it should answer with `/home/user/.opam/lama/bin/ocamlc` (or similar) and `ocamlc -v` should answer with
@ -100,16 +90,15 @@ Ubuntu-based variant of WSL is recommended.
3. Pin Lama package using `opam` and right URL (remember of "#" being a comment character in various shells)
```bash
$ opam pin add Lama https://github.com/JetBrains-Research/Lama.git\#1.10 --no-action
opam pin add Lama https://github.com/PLTools/Lama.git\#1.30 --no-action
```
The extra '#' sign is added because in various Shells it is the start of a comment
4. Install *dep*endencies on system-wide *ext*ernal packages and `lama` itself after that.
4. Install dependencies on system-wide external packages and `lama` itself after that.
```bash
$ opam depext Lama --yes
$ opam install Lama --yes
opam install Lama --yes
```
5. Check that `lamac` executable was installed: `which lamac` should answer with
@ -120,5 +109,20 @@ Ubuntu-based variant of WSL is recommended.
### Smoke-testing (optional)
Clone the repository and run `make -C tutorial`.
Clone the repository and run `make -C tutorial`.
It should build a local compiler `src/lamac` and a few tutorial executables in `tutorial/`.
### Useful links
* [Plugin for VS Code](https://marketplace.visualstudio.com/items?itemName=mrartemsav.lama-lsp)
### Changes in Lama 1.3
* Migrated from x86-32 to x86-64 architecture.
* Added `let ... in ...` construct.
* Added `-g` mode
* Changed regex syntax ...
### Changes in Lama 1.2
* New garbage collector: single-threaded stop-the-world `LISP2` (see GC Handbook for details: [1st edition](https://www.cs.kent.ac.uk/people/staff/rej/gcbook/), [2nd edition](http://gchandbook.org/)) [mark-compact](https://www.memorymanagement.org/glossary/m.html#term-mark-compact).

View file

@ -1,8 +1,10 @@
FLAGS=-g -fstack-protector-all
all: byterun.o
$(CC) -m32 -g -o byterun byterun.o ../runtime/runtime.a
$(CC) $(FLAGS) -o byterun byterun.o ../runtime/runtime.a
byterun.o: byterun.c
$(CC) -g -fstack-protector-all -m32 -c byterun.c
$(CC) $(FLAGS) -g -c byterun.c
clean:
$(RM) *.a *.o *~
$(RM) *.a *.o *~ byterun

3
dune-project Normal file
View file

@ -0,0 +1,3 @@
(lang dune 3.3)
(cram enable)

Binary file not shown.

View file

@ -1,18 +1,24 @@
TESTS=$(sort $(basename $(wildcard test*.lama)))
DEBUG_FILES=stack-dump-before data-dump-before extra-roots-dump-before heap-dump-before stack-dump-after data-dump-after extra-roots-dump-after heap-dump-after
TESTS=$(sort $(filter-out test111, $(basename $(wildcard test*.lama))))
LAMAC=../src/lamac
.PHONY: check $(TESTS)
check: $(TESTS)
check: $(TESTS) ctest111
$(TESTS): %: %.lama
@echo $@
cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log
LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@echo "regression/$@"
@cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log
@LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
ctest111:
@echo "regression/test111"
@LAMA=../runtime $(LAMAC) test111.lama && cat test111.input | $(ARCH) ./test111 > test111.log && diff test111.log orig/test111.log
clean:
$(RM) test*.log *.s *~ $(TESTS) *.i
$(RM) test*.log *.s *.sm *~ $(TESTS) *.i $(DEBUG_FILES) test111
$(MAKE) clean -C expressions
$(MAKE) clean -C deep-expressions

View file

@ -7,10 +7,10 @@ LAMAC = ../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
@echo "regression/deep-expressions/$@"
@LAMA=../../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log
clean:
rm -f *.log *.s *~

View file

@ -7,10 +7,10 @@ RC = ../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
@echo "regression/expressions/$@"
@LAMA=../../runtime $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(RC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(RC) -s $< > $@.log && diff $@.log orig/$@.log
clean:
rm -f *.log *.s *~

View file

View file

@ -0,0 +1,10 @@
fun f (a, b, c, d, e) {
var i = d / b;
write (a);
write (b);
write (c);
write (d);
write (e)
}
f (1, 2, 3, 4, 5)

View file

View file

@ -0,0 +1,15 @@
fun f (a, b, c, d, e, f, g, h) {
var i = 9, j = 10;
write (a);
write (b);
write (c);
write (d);
write (e);
write (f);
write (g);
write (h);
write (i);
write (j)
}
f (1, 2, 3, 4, 5, 6, 7, 8)

View file

View file

@ -0,0 +1,18 @@
fun f (a, b, c, d, e, f, g, h) {
fun g (unit) {
var tmp1 = 1 + (1 + (1 + (1 + (1 + 1 + (1 + (1 + (1 + (1 + 1 + (1 + (1 + (1 + (1 + 1))))))))))));
var tmp2 = (((((((((((((1 + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1;
write (a);
write (b);
write (c);
write (d);
write (e);
write (f);
write (g);
write (h)
}
g
}
f (1, 2, 3, 4, 5, 6, 7, 8)(0)

View file

@ -0,0 +1,5 @@
1
2
3
4
5

View file

@ -0,0 +1,10 @@
1
2
3
4
5
6
7
8
9
10

View file

@ -0,0 +1,10 @@
1
2
3
4
5
6
7
8
9
10

View file

@ -0,0 +1,6 @@
97
98
99
100
97
98

View file

@ -0,0 +1,11 @@
1
2
5
6
7
8
5
6
7
8
3

View file

@ -5,7 +5,7 @@ fun f (x) {
A -> write (1)
| B -> write (2)
| C -> write (3)
| _ -> write (4)
| _ -> write (4)
esac
}

View file

@ -9,14 +9,14 @@ fun sort (x) {
for i := 0, i<n, i := i+1 do
for j := i+1, j<n, j := j+1 do
if x[j] < x[i] then
y := x[i];
x[i] := x[j];
x[j] := y
y := x[i];
x[i] := x[j];
x[j] := y
fi
od
od;
x
x
}
n := read ();

View file

@ -28,4 +28,3 @@ write (sum ({}));
write (sum ({1, 2, 3, 4, 5}));
write (sum (1:2:3:4:5:{}));
print_list (array_to_list ([1, 2, 3, 4, 5]))

View file

@ -1,5 +1,5 @@
infix ++ at + (a, b) { a+b}
infix +++ at + (a, b) { a+b}
var x = read ();
write (infix ++ (2, 3))
write (infix +++ (2, 3))

0
regression/test111.input Normal file
View file

21
regression/test111.lama Normal file
View file

@ -0,0 +1,21 @@
import Std;
fun printString (s) {
var i;
for i := 0, i < s.length, i := i + 1 do
write (s[i])
od
}
fun printListConcat ( str_list ) {
printString (stringcat ( str_list ))
}
var l = { "a" , "b" , "c" , "d" };
printListConcat (l);
l := { "ab" };
printListConcat (l);
l := {};
printListConcat (l)

0
regression/test112.input Normal file
View file

16
regression/test112.lama Normal file
View file

@ -0,0 +1,16 @@
-- let A (x,y) be A(5,6) in write(x); write(y) ni;
-- case A(5,6) of A(x,y) -> write(x); write(y) esac;
-- let A (x,y) be A(5,6) in ....
-- let A (x,y) be A(5,6) <----- void then ;
fun f (x) {
write(x)
}
f(let x = A(1,2) in x[0]);
f(let x = A(1,2) in x[1]);
let A (x,y) = A(5,6) in let B(z,e) = B(7,8) in write(x); write(y); write(z); write(e);
let A (x,y) = A(5,6) in (let B(z,e) = B(7,8) in write(x); write(y); write(z); write(e));
let x = (let x = C(1,2) in x[1]) + (let x = C(1,2) in x[0]) in write(x)

134
runtime/.clang-format Normal file
View file

@ -0,0 +1,134 @@
# Common settings
BasedOnStyle: LLVM
TabWidth: 2
IndentWidth: 2
UseTab: Never
ColumnLimit: 100
IndentCaseLabels: true
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Language: Cpp
DisableFormat: false
Standard: Auto
AccessModifierOffset: -4
AlignAfterOpenBracket: true
AlignConsecutiveAssignments: Consecutive
AlignConsecutiveDeclarations: Consecutive
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: AllIfsAndElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: true
BitFieldColonSpacing: Both
# Configure each individual brace in BraceWrapping
BreakBeforeBraces: Attach
# Control of individual brace wrapping cases
BraceWrapping:
AfterClass: true
AfterControlStatement: Always
AfterEnum : true
AfterFunction : true
AfterNamespace : true
AfterStruct : true
AfterUnion : true
BeforeCatch : true
BeforeElse : true
IndentBraces : false
AfterExternBlock : true
SplitEmptyFunction : false
SplitEmptyRecord : false
SplitEmptyNamespace : true
BreakAfterJavaFieldAnnotations: true
BreakBeforeInheritanceComma: false
BreakBeforeBinaryOperators: NonAssignment
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: true
BreakStringLiterals: true
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
SpaceBeforeCpp11BracedList: false
DerivePointerAlignment: false
ExperimentalAutoDetectBinPacking: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IndentCaseLabels: true
FixNamespaceComments: true
IndentWrappedFunctionNames: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
JavaScriptQuotes: Double
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: Custom
SpaceBeforeParensOptions:
AfterControlStatements: true
AfterForeachMacros: true
AfterFunctionDeclarationName: true
AfterFunctionDefinitionName: true
AfterIfMacros: true
AfterOverloadedOperator: true
AfterRequiresInClause: true
AfterRequiresInExpression: true
BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: false
SpaceInEmptyBlock: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 3
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceAfterTemplateKeyword: true
SpaceBeforeInheritanceColon: true
SortUsingDeclarations: true
SortIncludes: CaseInsensitive
IndentGotoLabels: false
InsertBraces: false
# Comments are for developers, they should arrange them
ReflowComments: false
IncludeBlocks: Regroup
IndentPPDirectives: AfterHash
SeparateDefinitionBlocks: Always

View file

@ -1,12 +1,30 @@
UNAME_S := $(shell uname -s)
all: gc_runtime.o runtime.o
ar rc runtime.a gc_runtime.o runtime.o
ifeq ($(UNAME_S),Linux)
CC=gcc
else ifeq ($(UNAME_S),Darwin)
CC=clang
ARCH = -arch x86_64
endif
gc_runtime.o: gc_runtime.s
$(CC) -g -fstack-protector-all -m32 -c gc_runtime.s
DISABLE_WARNINGS=-Wno-shift-negative-value
COMMON_FLAGS=$(DISABLE_WARNINGS) -g -fstack-protector-all $(ARCH) --std=c11
PROD_FLAGS=$(COMMON_FLAGS) -DLAMA_ENV
TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION
UNIT_TESTS_FLAGS=$(TEST_FLAGS)
INVARIANTS_CHECK_FLAGS=$(TEST_FLAGS) -DFULL_INVARIANT_CHECKS
all: gc.o runtime.o printf.o
ar rc runtime.a runtime.o gc.o printf.o
gc.o: gc.c gc.h
$(CC) $(PROD_FLAGS) -c gc.c -o gc.o
runtime.o: runtime.c runtime.h
$(CC) -g -fstack-protector-all -m32 -c runtime.c
$(CC) $(PROD_FLAGS) -c runtime.c -o runtime.o
printf.o: printf.S
$(CC) $(PROD_FLAGS) -x assembler-with-cpp -c -g printf.S -o printf.o
clean:
$(RM) *.a *.o *~
$(RM) *.a *.o *~ negative_scenarios/*.err

956
runtime/gc.c Normal file
View file

@ -0,0 +1,956 @@
#define _GNU_SOURCE 1
// #define DEBUG_PRINT
#include "gc.h"
#include "runtime_common.h"
#include <assert.h>
#include <execinfo.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
static const size_t INIT_HEAP_SIZE = MINIMUM_HEAP_CAPACITY;
#ifdef DEBUG_VERSION
size_t cur_id = 0;
#endif
static extra_roots_pool extra_roots;
size_t __gc_stack_top = 0, __gc_stack_bottom = 0;
#ifdef LAMA_ENV
#ifdef __linux__
extern const size_t __start_custom_data, __stop_custom_data;
#elif defined(__APPLE__)
extern const size_t __start_custom_data __asm("section$start$__DATA$custom_data");
extern const size_t __stop_custom_data __asm("section$end$__DATA$custom_data");
#endif
#endif
#ifdef DEBUG_VERSION
memory_chunk heap;
#else
static memory_chunk heap;
#endif
#ifdef DEBUG_VERSION
void dump_heap ();
#endif
void handler (int sig) {
void *array[10];
int size;
// get void*'s for all entries on the stack
size = backtrace(array, 10);
fprintf(stderr, "heap size is %zu\n", heap.size);
backtrace_symbols_fd(array, size, STDERR_FILENO);
exit(1);
}
void *alloc (size_t size) {
#ifdef DEBUG_VERSION
++cur_id;
#endif
size_t obj_size = size;
size = BYTES_TO_WORDS(size);
size_t padding = size * sizeof(size_t) - obj_size;
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz);
#endif
void *p = gc_alloc_on_existing_heap(size);
if (!p) {
// fprintf(stderr, "Garbage collection is not implemented yet.\n");
// exit(149);
// not enough place in the heap, need to perform GC cycle
p = gc_alloc(size);
}
#ifdef DEBUG_PRINT
printf("Object allocated: content [%p, %p) padding [%p, %p)\n", p, p + obj_size, p + obj_size, p + size * sizeof(size_t));
fflush(stdout);
#endif
return p;
}
#ifdef FULL_INVARIANT_CHECKS
// precondition: obj_content is a valid address pointing to the content of an object
static void print_object_info (FILE *f, void *obj_content) {
data *d = TO_DATA(obj_content);
size_t obj_tag = TAG(d->data_header);
size_t obj_id = d->id;
fprintf(f, "id %zu tag %zu | ", obj_id, obj_tag);
}
static void print_unboxed (FILE *f, int unboxed) { fprintf(f, "unboxed %zu | ", unboxed); }
static FILE *print_stack_content (char *filename) {
FILE *f = fopen(filename, "w+");
ftruncate(fileno(f), 0);
fprintf(f, "Stack content:\n");
for (size_t *stack_ptr = (size_t *)((void *)__gc_stack_top + 4);
stack_ptr < (size_t *)__gc_stack_bottom;
++stack_ptr) {
size_t value = *stack_ptr;
if (is_valid_heap_pointer((size_t *)value)) {
fprintf(f, "%p, ", (void *)value);
print_object_info(f, (void *)value);
} else {
print_unboxed(f, (int)value);
}
fprintf(f, "\n");
}
fprintf(f, "Stack content end.\n");
return f;
}
// precondition: obj_content is a valid address pointing to the content of an object
static void objects_dfs (FILE *f, void *obj_content) {
void *obj_header = get_obj_header_ptr(obj_content);
data *obj_data = TO_DATA(obj_content);
// internal mark-bit for this dfs, should be recovered by the caller
if ((obj_data->forward_address & 2) != 0) { return; }
// set this bit as 1
obj_data->forward_address |= 2;
fprintf(f, "object at addr %p: ", obj_content);
print_object_info(f, obj_content);
/*fprintf(f, "object id: %zu | ", obj_data->id);*/
// first cycle: print object's fields
for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header);
!field_is_done_iterator(&field_it);
obj_next_field_iterator(&field_it)) {
size_t field_value = *(size_t *)field_it.cur_field;
if (is_valid_heap_pointer((size_t *)field_value)) {
print_object_info(f, (void *)field_value);
/*fprintf(f, "%zu ", TO_DATA(field_value)->id);*/
} else {
print_unboxed(f, (int)field_value);
}
}
fprintf(f, "\n");
for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header);
!field_is_done_iterator(&field_it);
obj_next_field_iterator(&field_it)) {
size_t field_value = *(size_t *)field_it.cur_field;
if (is_valid_heap_pointer((size_t *)field_value)) { objects_dfs(f, (void *)field_value); }
}
}
FILE *print_objects_traversal (char *filename, bool marked) {
FILE *f = fopen(filename, "w+");
ftruncate(fileno(f), 0);
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it)) {
void *obj_header = it.current;
data *obj_data = TO_DATA(get_object_content_ptr(obj_header));
if ((obj_data->forward_address & 1) == marked) {
objects_dfs(f, get_object_content_ptr(obj_header));
}
}
// resetting bit that represent mark-bit for this internal dfs-traversal
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it)) {
void *obj_header = it.current;
data *obj_data = TO_DATA(get_object_content_ptr(obj_header));
obj_data->forward_address &= (~2);
}
fflush(f);
// print extra roots
for (int i = 0; i < extra_roots.current_free; i++) {
fprintf(f, "extra root %p %p: ", extra_roots.roots[i], *(size_t **)extra_roots.roots[i]);
}
fflush(f);
return f;
}
int files_cmp (FILE *f1, FILE *f2) {
int symbol1, symbol2;
int position = 0;
while (true) {
symbol1 = fgetc(f1);
symbol2 = fgetc(f2);
if (symbol1 == EOF && symbol2 == EOF) { return -1; }
if (symbol1 != symbol2) { return position; }
++position;
}
}
#endif
void *gc_alloc_on_existing_heap (size_t size) {
if (heap.current + size <= heap.end) {
void *p = (void *)heap.current;
heap.current += size;
memset(p, 0, size * sizeof(size_t));
return p;
}
return NULL;
}
void *gc_alloc (size_t size) {
#ifdef DEBUG_PRINT
printf("Reallocation!\n");
#endif
fflush(stdout);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "===============================GC cycle has started\n");
#endif
#ifdef FULL_INVARIANT_CHECKS
FILE *stack_before = print_stack_content("stack-dump-before-compaction");
FILE *heap_before = print_objects_traversal("before-mark", 0);
fclose(heap_before);
#endif
mark_phase();
#ifdef FULL_INVARIANT_CHECKS
FILE *heap_before_compaction = print_objects_traversal("after-mark", 1);
#endif
compact_phase(size);
#ifdef FULL_INVARIANT_CHECKS
FILE *stack_after = print_stack_content("stack-dump-after-compaction");
FILE *heap_after_compaction = print_objects_traversal("after-compaction", 0);
int pos = files_cmp(stack_before, stack_after);
if (pos >= 0) { // position of difference is found
fprintf(stderr, "Stack is modified incorrectly, see position %d\n", pos);
exit(1);
}
fclose(stack_before);
fclose(stack_after);
pos = files_cmp(heap_before_compaction, heap_after_compaction);
if (pos >= 0) { // position of difference is found
fprintf(stderr, "GC invariant is broken, pos is %d\n", pos);
exit(1);
}
fclose(heap_before_compaction);
fclose(heap_after_compaction);
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "===============================GC cycle has finished\n");
#endif
return gc_alloc_on_existing_heap(size);
}
static void gc_root_scan_stack () {
for (size_t *p = (size_t *)(__gc_stack_top + sizeof(size_t)); p < (size_t *)__gc_stack_bottom; ++p) {
gc_test_and_mark_root((size_t **)p);
}
}
void mark_phase (void) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "marking has started\n");
fprintf(stderr,
"gc_root_scan_stack has started: gc_top=%p bot=%p\n",
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
#endif
gc_root_scan_stack();
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "gc_root_scan_stack has finished\n");
fprintf(stderr, "scan_extra_roots has started\n");
#endif
scan_extra_roots();
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "scan_extra_roots has finished\n");
fprintf(stderr, "scan_global_area has started\n");
#endif
#ifdef LAMA_ENV
scan_global_area();
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "scan_global_area has finished\n");
fprintf(stderr, "marking has finished\n");
#endif
}
void compact_phase (size_t additional_size) {
size_t live_size = compute_locations();
// all in words
size_t next_heap_size =
MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY);
size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size);
memory_chunk old_heap = heap;
heap.begin = mmap(NULL, WORDS_TO_BYTES(next_heap_pseudo_size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
memcpy(heap.begin, old_heap.begin, WORDS_TO_BYTES(old_heap.size));
if (heap.begin == MAP_FAILED) {
perror("ERROR: compact_phase: mmap failed\n");
exit(1);
}
heap.end = heap.begin + next_heap_pseudo_size;
heap.size = next_heap_pseudo_size;
heap.current = heap.begin + (old_heap.current - old_heap.begin);
update_references(&old_heap);
physically_relocate(&old_heap);
heap.current = heap.begin + live_size;
if (munmap(old_heap.begin, old_heap.size) < 0) {
perror("ERROR: compact_phase: munmap failed\n");
exit(1);
}
}
size_t compute_locations () {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC compute_locations started\n");
#endif
size_t *free_ptr = heap.begin;
heap_iterator scan_iter = heap_begin_iterator();
for (; !heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) {
void *header_ptr = scan_iter.current;
void *obj_content = get_object_content_ptr(header_ptr);
if (is_marked(obj_content)) {
size_t sz = BYTES_TO_WORDS(obj_size_header_ptr(header_ptr));
// forward address is responsible for object header pointer
set_forward_address(obj_content, (size_t)free_ptr);
free_ptr += sz;
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC compute_locations finished\n");
#endif
// it will return number of words
return free_ptr - heap.begin;
}
void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC scan_and_fix_region started\n");
#endif
for (size_t *ptr = (size_t *)start; ptr < (size_t *)end; ++ptr) {
size_t ptr_value = *ptr;
// this can't be expressed via is_valid_heap_pointer, because this pointer may point area corresponding to the old
// heap
if (is_valid_pointer((size_t *)ptr_value) && (size_t)old_heap->begin <= ptr_value
&& ptr_value <= (size_t)old_heap->current) {
void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin);
void *new_addr =
(void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin);
size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr));
*(void **)ptr = new_addr + content_offset;
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC scan_and_fix_region finished\n");
#endif
}
void scan_and_fix_region_roots (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "extra roots started: number of extra roots %i\n", extra_roots.current_free);
#endif
for (int i = 0; i < extra_roots.current_free; i++) {
size_t *ptr = (size_t *)extra_roots.roots[i];
size_t ptr_value = *ptr;
if (!is_valid_pointer((size_t *)ptr_value)) { continue; }
// skip this one since it was already fixed from scanning the stack
if ((extra_roots.roots[i] >= (void **)__gc_stack_top
&& extra_roots.roots[i] < (void **)__gc_stack_bottom)
#ifdef LAMA_ENV
|| (extra_roots.roots[i] <= (void **)&__stop_custom_data
&& extra_roots.roots[i] >= (void **)&__start_custom_data)
#endif
) {
#ifdef DEBUG_VERSION
if (is_valid_heap_pointer((size_t *)ptr_value)) {
# ifdef DEBUG_PRINT
fprintf(stderr,
"|\tskip extra root: %p (%p), since it points to Lama's stack top=%p bot=%p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
# endif
}
# ifdef LAMA_ENV
else if ((extra_roots.roots[i] <= (void *)&__stop_custom_data
&& extra_roots.roots[i] >= (void *)&__start_custom_data)) {
fprintf(
stderr,
"|\tskip extra root: %p (%p), since it points to Lama's static area stop=%p start=%p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)&__stop_custom_data,
(void *)&__start_custom_data);
exit(1);
}
# endif
else {
# ifdef DEBUG_PRINT
fprintf(stderr,
"|\tskip extra root: %p (%p): not a valid Lama pointer \n",
extra_roots.roots[i],
(void *)ptr_value);
# endif
}
#endif
continue;
}
if ((size_t)old_heap->begin <= ptr_value && ptr_value <= (size_t)old_heap->current) {
void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin);
void *new_addr =
(void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin);
size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr));
*(void **)ptr = new_addr + content_offset;
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr,
"|\textra root (%p) %p -> %p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)*ptr);
#endif
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "|\textra roots finished\n");
#endif
}
void update_references (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC update_references started\n");
#endif
heap_iterator it = heap_begin_iterator();
while (!heap_is_done_iterator(&it)) {
if (is_marked(get_object_content_ptr(it.current))) {
for (obj_field_iterator field_iter = ptr_field_begin_iterator(it.current);
!field_is_done_iterator(&field_iter);
obj_next_ptr_field_iterator(&field_iter)) {
size_t *field_value = *(size_t **)field_iter.cur_field;
if (field_value < old_heap->begin || field_value > old_heap->current) { continue; }
// this pointer should also be modified according to old_heap->begin
void *field_obj_content_addr =
(void *)heap.begin + (*(void **)field_iter.cur_field - (void *)old_heap->begin);
// important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk
void *new_addr =
heap.begin
+ ((size_t *)get_forward_address(field_obj_content_addr) - (size_t *)old_heap->begin);
// update field reference to point to new_addr
// since, we want fields to point to an actual content, we need to add this extra content_offset
// because forward_address itself is a pointer to the object's header
size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content_addr));
#ifdef DEBUG_VERSION
if (!is_valid_heap_pointer((void *)(new_addr + content_offset))) {
# ifdef DEBUG_PRINT
fprintf(stderr,
"ur: incorrect pointer assignment: on object with id %d",
TO_DATA(get_object_content_ptr(it.current))->id);
# endif
exit(1);
}
#endif
*(void **)field_iter.cur_field = new_addr + content_offset;
}
}
heap_next_obj_iterator(&it);
}
// fix pointers from stack
scan_and_fix_region(old_heap, (void *)__gc_stack_top + sizeof(size_t), (void *)__gc_stack_bottom + sizeof(size_t));
// fix pointers from extra_roots
scan_and_fix_region_roots(old_heap);
#ifdef LAMA_ENV
assert((void *)&__stop_custom_data >= (void *)&__start_custom_data);
scan_and_fix_region(old_heap, (void *)&__start_custom_data, (void *)&__stop_custom_data);
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC update_references finished\n");
#endif
}
void physically_relocate (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC physically_relocate started\n");
#endif
heap_iterator from_iter = heap_begin_iterator();
while (!heap_is_done_iterator(&from_iter)) {
void *obj = get_object_content_ptr(from_iter.current);
heap_iterator next_iter = from_iter;
heap_next_obj_iterator(&next_iter);
if (is_marked(obj)) {
// Move the object from its old location to its new location relative to
// the heap's (possibly new) location, 'to' points to future object header
size_t *to = heap.begin + ((size_t *)get_forward_address(obj) - (size_t *)old_heap->begin);
memmove(to, from_iter.current, obj_size_header_ptr(from_iter.current));
unmark_object(get_object_content_ptr(to));
}
from_iter = next_iter;
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC physically_relocate finished\n");
#endif
}
inline bool is_valid_heap_pointer (const size_t *p) {
return !UNBOXED(p) && (size_t)heap.begin <= (size_t)p && (size_t)p <= (size_t)heap.current;
}
static inline bool is_valid_pointer (const size_t *p) { return !UNBOXED(p); }
static inline void queue_enqueue (heap_iterator *tail_iter, void *obj) {
void *tail = tail_iter->current;
void *tail_content = get_object_content_ptr(tail);
set_forward_address(tail_content, (size_t)obj);
make_enqueued(obj);
heap_next_obj_iterator(tail_iter);
}
static inline void *queue_dequeue (heap_iterator *head_iter) {
void *head = head_iter->current;
void *head_content = get_object_content_ptr(head);
void *value = (void *)get_forward_address(head_content);
make_dequeued(value);
heap_next_obj_iterator(head_iter);
return value;
}
void mark (void *obj) {
if (!is_valid_heap_pointer(obj) || is_marked(obj)) { return; }
// TL;DR: [q_head_iter, q_tail_iter) q_head_iter -- current dequeue's victim, q_tail_iter -- place for next enqueue
// in forward_address of corresponding element we store address of element to be removed after dequeue operation
heap_iterator q_head_iter = heap_begin_iterator();
// iterator where we will write address of the element that is going to be enqueued
heap_iterator q_tail_iter = q_head_iter;
queue_enqueue(&q_tail_iter, obj);
// invariant: queue contains only objects that are valid heap pointers (each corresponding to content of unmarked
// object) also each object is in queue only once
while (q_head_iter.current != q_tail_iter.current) {
// while the queue is non-empty
void *cur_obj = queue_dequeue(&q_head_iter);
mark_object(cur_obj);
void *header_ptr = get_obj_header_ptr(cur_obj);
for (obj_field_iterator ptr_field_it = ptr_field_begin_iterator(header_ptr);
!field_is_done_iterator(&ptr_field_it);
obj_next_ptr_field_iterator(&ptr_field_it)) {
void *field_value = *(void **)ptr_field_it.cur_field;
if (!is_valid_heap_pointer(field_value) || is_marked(field_value)
|| is_enqueued(field_value)) {
continue;
}
// if we came to this point it must be true that field_value is unmarked and not currently in queue
// thus, we maintain the invariant
queue_enqueue(&q_tail_iter, field_value);
}
}
}
void scan_extra_roots (void) {
for (int i = 0; i < extra_roots.current_free; ++i) {
// this dereferencing is safe since runtime is pushing correct pointers into extra_roots
mark(*extra_roots.roots[i]);
}
}
#ifdef LAMA_ENV
void scan_global_area (void) {
// __start_custom_data is pointing to beginning of global area, thus all dereferencings are safe
for (size_t *ptr = (size_t *)&__start_custom_data; ptr < (size_t *)&__stop_custom_data; ++ptr) {
mark(*(void **)ptr);
}
}
#endif
extern void gc_test_and_mark_root (size_t **root) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr,
"\troot = %p (%p), stack addresses: [%p, %p)\n",
root,
*root,
(void *)__gc_stack_top + 4,
(void *)__gc_stack_bottom);
#endif
mark((void *)*root);
}
void __gc_init (void) {
__gc_stack_bottom = (size_t)__builtin_frame_address(1) + sizeof(size_t);
__init();
}
void __init (void) {
signal(SIGSEGV, handler);
size_t space_size = INIT_HEAP_SIZE * sizeof(size_t);
srandom(time(NULL));
heap.begin = mmap(
NULL, space_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (heap.begin == MAP_FAILED) {
perror("ERROR: __init: mmap failed\n");
exit(1);
}
heap.end = heap.begin + INIT_HEAP_SIZE;
heap.size = INIT_HEAP_SIZE;
heap.current = heap.begin;
clear_extra_roots();
}
extern void __shutdown (void) {
munmap(heap.begin, heap.size);
#ifdef DEBUG_VERSION
cur_id = 0;
#endif
heap.begin = NULL;
heap.end = NULL;
heap.size = 0;
heap.current = NULL;
__gc_stack_top = 0;
__gc_stack_bottom = 0;
}
void clear_extra_roots (void) { extra_roots.current_free = 0; }
void push_extra_root (void **p) {
if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) {
perror("ERROR: push_extra_roots: extra_roots_pool overflow\n");
exit(1);
}
assert(p >= (void **)__gc_stack_top || p < (void **)__gc_stack_bottom);
extra_roots.roots[extra_roots.current_free] = p;
extra_roots.current_free++;
}
void pop_extra_root (void **p) {
if (extra_roots.current_free == 0) {
perror("ERROR: pop_extra_root: extra_roots are empty\n");
exit(1);
}
extra_roots.current_free--;
if (extra_roots.roots[extra_roots.current_free] != p) {
perror("ERROR: pop_extra_root: stack invariant violation\n");
exit(1);
}
}
/* Functions for tests */
#if defined(DEBUG_VERSION)
size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size) {
size_t *ids_ptr = (size_t *)object_ids_buf;
size_t i = 0;
for (heap_iterator it = heap_begin_iterator();
!heap_is_done_iterator(&it) && i < object_ids_buf_size;
heap_next_obj_iterator(&it), ++i) {
void *header_ptr = it.current;
data *d = TO_DATA(get_object_content_ptr(header_ptr));
ids_ptr[i] = d->id;
}
return i;
}
#endif
#ifdef DEBUG_VERSION
extern char *de_hash (int);
void dump_heap () {
size_t i = 0;
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it), ++i) {
void *header_ptr = it.current;
void *content_ptr = get_object_content_ptr(header_ptr);
data *d = TO_DATA(content_ptr);
lama_type t = get_type_header_ptr(header_ptr);
switch (t) {
case ARRAY: fprintf(stderr, "of kind ARRAY\n"); break;
case CLOSURE: fprintf(stderr, "of kind CLOSURE\n"); break;
case STRING: fprintf(stderr, "of kind STRING\n"); break;
case SEXP:
fprintf(stderr, "of kind SEXP with tag %s\n", de_hash(TO_SEXP(content_ptr)->tag));
break;
}
}
}
void set_stack (size_t stack_top, size_t stack_bottom) {
__gc_stack_top = stack_top;
__gc_stack_bottom = stack_bottom;
}
void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr) {
memcpy(extra_roots.roots, extra_roots_ptr, MIN(sizeof(extra_roots.roots), extra_roots_size));
clear_extra_roots();
}
#endif
/* Utility functions */
size_t get_forward_address (void *obj) {
data *d = TO_DATA(obj);
return GET_FORWARD_ADDRESS(d->forward_address);
}
void set_forward_address (void *obj, size_t addr) {
data *d = TO_DATA(obj);
SET_FORWARD_ADDRESS(d->forward_address, addr);
}
bool is_marked (void *obj) {
data *d = TO_DATA(obj);
aint mark_bit = GET_MARK_BIT(d->forward_address);
return mark_bit;
}
void mark_object (void *obj) {
data *d = TO_DATA(obj);
SET_MARK_BIT(d->forward_address);
}
void unmark_object (void *obj) {
data *d = TO_DATA(obj);
RESET_MARK_BIT(d->forward_address);
}
bool is_enqueued (void *obj) {
data *d = TO_DATA(obj);
return IS_ENQUEUED(d->forward_address) != 0;
}
void make_enqueued (void *obj) {
data *d = TO_DATA(obj);
MAKE_ENQUEUED(d->forward_address);
}
void make_dequeued (void *obj) {
data *d = TO_DATA(obj);
MAKE_DEQUEUED(d->forward_address);
}
heap_iterator heap_begin_iterator () {
heap_iterator it = {.current = heap.begin};
return it;
}
void heap_next_obj_iterator (heap_iterator *it) {
void *ptr = it->current;
size_t obj_size = obj_size_header_ptr(ptr);
// make sure we take alignment into consideration
obj_size = BYTES_TO_WORDS(obj_size);
it->current += obj_size;
}
bool heap_is_done_iterator (heap_iterator *it) { return it->current >= heap.current; }
lama_type get_type_row_ptr (void *ptr) {
data *data_ptr = TO_DATA(ptr);
return get_type_header_ptr(data_ptr);
}
lama_type get_type_header_ptr (void *ptr) {
auint *header = (auint *)ptr;
switch (TAG(*header)) {
case ARRAY_TAG: return ARRAY;
case STRING_TAG: return STRING;
case CLOSURE_TAG: return CLOSURE;
case SEXP_TAG: return SEXP;
default: {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id);
raise(SIGINT); // only for debug purposes
#else
# ifdef FULL_INVARIANT_CHECKS
# ifdef DEBUG_PRINT
fprintf(stderr,
"ERROR: get_type_header_ptr: unknown object header, ptr is %p, tag %i, heap size is "
"%d cur_id=%d stack_top=%p stack_bot=%p ",
ptr,
TAG(*header),
heap.size,
cur_id,
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
# endif
FILE *heap_before_compaction = print_objects_traversal("dump_kill", 1);
fclose(heap_before_compaction);
# endif
kill(getpid(), SIGSEGV);
#endif
exit(1);
}
}
}
size_t obj_size_row_ptr (void *ptr) {
data *data_ptr = TO_DATA(ptr);
return obj_size_header_ptr(data_ptr);
}
size_t obj_size_header_ptr (void *ptr) {
ptrt len = LEN(*(ptrt *)ptr);
switch (get_type_header_ptr(ptr)) {
case ARRAY: return array_size(len);
case STRING: return string_size(len);
case CLOSURE: return closure_size(len);
case SEXP: return sexp_size(len);
default: {
#ifdef DEBUG_VERSION
fprintf(stderr, "ERROR: obj_size_header_ptr: unknown object header, cur_id=%d", cur_id);
raise(SIGINT); // only for debug purposes
#else
perror("ERROR: obj_size_header_ptr: unknown object header\n");
#endif
exit(1);
}
}
}
size_t array_size (size_t sz) { return get_header_size(ARRAY) + MEMBER_SIZE * sz; }
size_t string_size (size_t len) {
// string should be null terminated
return get_header_size(STRING) + len + 1;
}
size_t closure_size (size_t sz) { return get_header_size(CLOSURE) + MEMBER_SIZE * sz; }
size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * (members + 1); }
obj_field_iterator field_begin_iterator (void *obj) {
lama_type type = get_type_header_ptr(obj);
obj_field_iterator it = {.type = type, .obj_ptr = obj, .cur_field = get_object_content_ptr(obj)};
switch (type) {
case STRING: {
it.cur_field = get_end_of_obj(it.obj_ptr);
break;
}
case CLOSURE:
case SEXP: {
it.cur_field += MEMBER_SIZE;
break;
}
default: break;
}
return it;
}
obj_field_iterator ptr_field_begin_iterator (void *obj) {
obj_field_iterator it = field_begin_iterator(obj);
// corner case when obj has no fields
if (field_is_done_iterator(&it)) { return it; }
if (is_valid_pointer(*(size_t **)it.cur_field)) { return it; }
obj_next_ptr_field_iterator(&it);
return it;
}
void obj_next_field_iterator (obj_field_iterator *it) { it->cur_field += MEMBER_SIZE; }
void obj_next_ptr_field_iterator (obj_field_iterator *it) {
do {
obj_next_field_iterator(it);
} while (!field_is_done_iterator(it) && !is_valid_pointer(*(size_t **)it->cur_field));
}
bool field_is_done_iterator (obj_field_iterator *it) {
return it->cur_field >= get_end_of_obj(it->obj_ptr);
}
void *get_obj_header_ptr (void *ptr) {
lama_type type = get_type_row_ptr(ptr);
return ptr - get_header_size(type);
}
void *get_object_content_ptr (void *header_ptr) {
lama_type type = get_type_header_ptr(header_ptr);
return header_ptr + get_header_size(type);
}
void *get_end_of_obj (void *header_ptr) { return header_ptr + obj_size_header_ptr(header_ptr); }
size_t get_header_size (lama_type type) {
switch (type) {
case STRING:
case CLOSURE:
case ARRAY:
case SEXP: return DATA_HEADER_SZ;
default: perror("ERROR: get_header_size: unknown object type\n");
#ifdef DEBUG_VERSION
raise(SIGINT); // only for debug purposes
#endif
exit(1);
}
}
void *alloc_string (auint len) {
data *obj = alloc(string_size(len));
obj->data_header = STRING_TAG | (len << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [STRING] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
#ifdef DEBUG_PRINT
printf("Allocated string\n");
#endif
return obj;
}
void *alloc_array (auint len) {
data *obj = alloc(array_size(len));
obj->data_header = ARRAY_TAG | (len << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [ARRAY] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
#ifdef DEBUG_PRINT
printf("Allocated array\n");
#endif
return obj;
}
void *alloc_sexp (auint members) {
sexp *obj = alloc(sexp_size(members));
obj->data_header = SEXP_TAG | (members << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
obj->tag = 0;
#ifdef DEBUG_PRINT
printf("Allocated sexp\n");
#endif
return obj;
}
void *alloc_closure (auint captured) {
data *obj = alloc(closure_size(captured));
obj->data_header = CLOSURE_TAG | (captured << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [CLOSURE] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
#ifdef DEBUG_PRINT
printf("Allocated closure\n");
#endif
return obj;
}

240
runtime/gc.h Normal file
View file

@ -0,0 +1,240 @@
// ============================================================================
// GC
// ============================================================================
// This is an implementation of a compactifying garbage collection algorithm.
// GC algorithm itself consists of two major stages:
// 1. Marking roots
// 2. Compacting stage
// Compacting is implemented in a very similar fashion to LISP2 algorithm,
// which is well-known.
// Most important pieces of code to discover to understand how everything works:
// - void *gc_alloc (size_t): this function is basically called whenever we are
// not able to allocate memory on the existing heap via simple bump allocator.
// - mark_phase(): this function will tell you everything you need to know
// about marking. I would also recommend to pay attention to the fact that
// marking is implemented without usage of any additional memory. Already
// allocated space is sufficient (for details see 'void mark (void *obj)').
// - void compact_phase (size_t additional_size): the whole compaction phase
// can be understood by looking at this piece of code plus couple of other
// functions used in there. It is basically an implementation of LISP2.
#ifndef __LAMA_GC__
#define __LAMA_GC__
#include "runtime_common.h"
#define GET_MARK_BIT(x) (((ptrt)(x)) & 1)
#define SET_MARK_BIT(x) (x = (((ptrt)(x)) | 1))
#define IS_ENQUEUED(x) (((ptrt)(x)) & 2)
#define MAKE_ENQUEUED(x) (x = (((ptrt)(x)) | 2))
#define MAKE_DEQUEUED(x) (x = (((ptrt)(x)) & (~2)))
#define RESET_MARK_BIT(x) (x = (((ptrt)(x)) & (~1)))
// since last 2 bits are used for mark-bit and enqueued-bit and due to correct
// alignment we can expect that last 2 bits don't influence address (they
// should always be zero)
#define GET_FORWARD_ADDRESS(x) (((ptrt)(x)) & (~3))
// take the last two bits as they are and make all others zero
#define SET_FORWARD_ADDRESS(x, addr) (x = ((x & 3) | ((ptrt)(addr))))
// if heap is full after gc shows in how many times it has to be extended
#define EXTRA_ROOM_HEAP_COEFFICIENT 2
#define MINIMUM_HEAP_CAPACITY (64)
#include <stdbool.h>
#include <stddef.h>
typedef enum { ARRAY, CLOSURE, STRING, SEXP } lama_type;
typedef struct {
size_t *current;
} heap_iterator;
typedef struct {
lama_type type; // holds type of object, which fields we are iterating over
void *obj_ptr; // place to store a pointer to the object header
void *cur_field;
} obj_field_iterator;
// Memory pool for linear memory allocation
typedef struct {
size_t *begin;
size_t *end;
size_t *current;
size_t size;
} memory_chunk;
// the only GC-related function that should be exposed, others are useful for tests and internal implementation
// allocates object of the given size on the heap
void *alloc(size_t);
// takes number of words as a parameter
void *gc_alloc(size_t);
// takes number of words as a parameter
void *gc_alloc_on_existing_heap(size_t);
// specific for mark-and-compact_phase gc
void mark (void *obj);
void mark_phase (void);
// marks each pointer from extra roots
void scan_extra_roots (void);
#ifdef LAMA_ENV
// marks each valid pointer from global area
void scan_global_area (void);
#endif
// takes number of words that are required to be allocated somewhere on the heap
void compact_phase (size_t additional_size);
// specific for Lisp-2 algorithm
size_t compute_locations ();
void update_references (memory_chunk *);
void physically_relocate (memory_chunk *);
// ============================================================================
// GC extra roots
// ============================================================================
// Lama's program stack is continuous, i.e. it never interleaves with runtime
// function's activation records. But some valid Lama's pointers can escape
// into runtime. Those values (theirs stack addresses) has to be registered in
// an auxiliary data structure called `extra_roots_pool`.
// extra_roots_pool is a simple LIFO stack. During `pop` it compares that pop's
// argument is equal to the current stack top.
#define MAX_EXTRA_ROOTS_NUMBER 32
typedef struct {
int current_free;
void **roots[MAX_EXTRA_ROOTS_NUMBER];
} extra_roots_pool;
void clear_extra_roots (void);
void push_extra_root (void **p);
void pop_extra_root (void **p);
// ============================================================================
// Implemented in GASM: see gc_runtime.s
// ============================================================================
// MANDATORY TO CALL BEFORE ANY INTERACTION WITH GC (apart from cases where we
// are working with virtual stack as happens in tests)
void __gc_init (void);
// should be called before interaction with GC in case of using in tests with
// virtual stack, otherwise it is automatically invoked by `__gc_init`
void __init (void);
// mostly useful for tests but basically you want to call this in case you want
// to deallocate all object allocated via GC
extern void __shutdown (void);
// ============================================================================
// invoked from GASM: see gc_runtime.s
// ============================================================================
extern void gc_test_and_mark_root (size_t **root);
bool is_valid_heap_pointer (const size_t *);
static inline bool is_valid_pointer (const size_t *);
// ============================================================================
// Auxiliary functions for tests
// ============================================================================
#if defined(DEBUG_VERSION)
// makes a snapshot of current objects in heap (both alive and dead), writes these ids to object_ids_buf,
// returns number of ids dumped
// object_ids_buf is pointer to area preallocated by user for dumping ids of objects in heap
// object_ids_buf_size is in WORDS, NOT BYTES
size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size);
#endif
#ifdef DEBUG_VERSION
// essential function to mock program stack
void set_stack (size_t stack_top, size_t stack_bottom);
// function to mock extra roots (Lama specific)
void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr);
#endif
// ============================================================================
// Utility functions
// ============================================================================
// accepts pointer to the start of the region and to the end of the region
// scans it and if it meets a pointer, it should be modified in according to forward address
void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end);
// takes a pointer to an object content as an argument, returns forwarding address
size_t get_forward_address (void *obj);
// takes a pointer to an object content as an argument, sets forwarding address to value 'addr'
void set_forward_address (void *obj, size_t addr);
// takes a pointer to an object content as an argument, returns whether this object was marked as live
bool is_marked (void *obj);
// takes a pointer to an object content as an argument, marks the object as live
void mark_object (void *obj);
// takes a pointer to an object content as an argument, marks the object as dead
void unmark_object (void *obj);
// takes a pointer to an object content as an argument, returns whether this object was enqueued to the queue (which is used in mark phase)
bool is_enqueued (void *obj);
// takes a pointer to an object content as an argument, marks object as enqueued
void make_enqueued (void *obj);
// takes a pointer to an object content as an argument, unmarks object as enqueued
void make_dequeued (void *obj);
// returns iterator to an object with the lowest address
heap_iterator heap_begin_iterator ();
void heap_next_obj_iterator (heap_iterator *it);
bool heap_is_done_iterator (heap_iterator *it);
// returns correct type when pointer to actual data is passed (header is excluded)
lama_type get_type_row_ptr (void *ptr);
// returns correct type when pointer to an object header is passed
lama_type get_type_header_ptr (void *ptr);
// returns correct object size (together with header) of an object, ptr is pointer to an actual data is passed (header is excluded)
size_t obj_size_row_ptr (void *ptr);
// returns correct object size (together with header) of an object, ptr is pointer to an object header
size_t obj_size_header_ptr (void *ptr);
// returns total padding size that we need to store given object type
size_t get_header_size (lama_type type);
// returns number of bytes that are required to allocate array with 'sz' elements (header included)
size_t array_size (size_t sz);
// returns number of bytes that are required to allocate string of length 'l' (header included)
size_t string_size (size_t len);
// returns number of bytes that are required to allocate closure with 'sz-1' captured values (header included)
size_t closure_size (size_t sz);
// returns number of bytes that are required to allocate s-expression with 'members' fields (header included)
size_t sexp_size (size_t members);
// returns an iterator over object fields, obj is ptr to object header
// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object,
// considering that now we store two versions of header in there)
obj_field_iterator field_begin_iterator (void *obj);
// returns an iterator over object fields which are actual pointers, obj is ptr to object header
// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object,
// considering that now we store two versions of header in there)
obj_field_iterator ptr_field_begin_iterator (void *obj);
// moves the iterator to next object field
void obj_next_field_iterator (obj_field_iterator *it);
// moves the iterator to the next object field which is an actual pointer
void obj_next_ptr_field_iterator (obj_field_iterator *it);
// returns if we are done iterating over fields of the object
bool field_is_done_iterator (obj_field_iterator *it);
// ptr is pointer to the actual object content, returns pointer to the very beginning of the object (header)
void *get_obj_header_ptr (void *ptr);
void *get_object_content_ptr (void *header_ptr);
void *get_end_of_obj (void *header_ptr);
void *alloc_string (auint len);
void *alloc_array (auint len);
void *alloc_sexp (auint members);
void *alloc_closure (auint captured);
#endif

View file

@ -1,116 +0,0 @@
.data
printf_format: .string "Stack root: %lx\n"
printf_format2: .string "BOT: %lx\n"
printf_format3: .string "TOP: %lx\n"
printf_format4: .string "EAX: %lx\n"
printf_format5: .string "LOL\n"
__gc_stack_bottom: .long 0
__gc_stack_top: .long 0
.globl __pre_gc
.globl __post_gc
.globl __gc_init
.globl __gc_root_scan_stack
.globl __gc_stack_top
.globl __gc_stack_bottom
.extern init_pool
.extern gc_test_and_copy_root
.text
__gc_init: movl %ebp, __gc_stack_bottom
addl $4, __gc_stack_bottom
call __init
ret
// if __gc_stack_top is equal to 0
// then set __gc_stack_top to %ebp
// else return
__pre_gc:
pushl %eax
movl __gc_stack_top, %eax
cmpl $0, %eax
jne __pre_gc_2
movl %ebp, %eax
// addl $8, %eax
movl %eax, __gc_stack_top
__pre_gc_2:
popl %eax
ret
// if __gc_stack_top has been set by the caller
// (i.e. it is equal to its %ebp)
// then set __gc_stack_top to 0
// else return
__post_gc:
pushl %eax
movl __gc_stack_top, %eax
cmpl %eax, %ebp
jnz __post_gc2
movl $0, __gc_stack_top
__post_gc2:
popl %eax
ret
// Scan stack for roots
// strting from __gc_stack_top
// till __gc_stack_bottom
__gc_root_scan_stack:
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %edx
movl __gc_stack_top, %eax
jmp next
loop:
movl (%eax), %ebx
// check that it is not a pointer to code section
// i.e. the following is not true:
// __executable_start <= (%eax) <= __etext
check11:
leal __executable_start, %edx
cmpl %ebx, %edx
jna check12
jmp check21
check12:
leal __etext, %edx
cmpl %ebx, %edx
jnb next
// check that it is not a pointer into the program stack
// i.e. the following is not true:
// __gc_stack_bottom <= (%eax) <= __gc_stack_top
check21:
cmpl %ebx, __gc_stack_top
jna check22
jmp loop2
check22:
cmpl %ebx, __gc_stack_bottom
jnb next
// check if it a valid pointer
// i.e. the lastest bit is set to zero
loop2:
andl $0x00000001, %ebx
jnz next
gc_run_t:
pushl %eax
pushl %eax
call gc_test_and_copy_root
addl $4, %esp
popl %eax
next:
addl $4, %eax
cmpl %eax, __gc_stack_bottom
jne loop
returnn:
movl $0, %eax
popl %edx
popl %ebx
movl %ebp, %esp
popl %ebp
ret

169
runtime/printf.S Normal file
View file

@ -0,0 +1,169 @@
#ifdef __linux__
#define PREFIXED(name) name
#elif defined(__APPLE__)
#define PREFIXED(name) _##name
#endif
.data
.global PREFIXED(Lprintf)
.extern PREFIXED(Bprintf)
.global PREFIXED(Lfprintf)
.extern PREFIXED(Bfprintf)
.global PREFIXED(Lsprintf)
.extern PREFIXED(Bsprintf)
.global PREFIXED(Lfailure)
.extern PREFIXED(failure)
.extern cnt_percentage_sign
.text
PREFIXED(Lprintf):
# save return address
popq %r14
pushq %r9
pushq %r8
pushq %rcx
pushq %rdx
pushq %rsi
movq %rsp, %rax
# rdi --- format string
# r11 --- number of arguments except format string
PREFIXED(Lprintf_loop):
movq $0, %r12
cmpq %r11, %r12
jz PREFIXED(Lprintf_continue)
decq %r11
movq (%rax), %r10
testq $1, %r10
jz PREFIXED(Lprintf_loop_end)
# unbox value
sarq %r10
movq %r10, (%rax)
PREFIXED(Lprintf_loop_end):
addq $8, %rax
jmp PREFIXED(Lprintf_loop)
PREFIXED(Lprintf_continue):
popq %rsi
popq %rdx
popq %rcx
popq %r8
popq %r9
# restore return address
pushq %r14
jmp PREFIXED(Bprintf)
PREFIXED(Lfprintf):
# save return address
popq %r14
pushq %r9
pushq %r8
pushq %rcx
pushq %rdx
movq %rsp, %rax
# rdi --- FILE*
# rsi --- format string
# r11 --- number of arguments except format string
PREFIXED(Lfprintf_loop):
movq $0, %r12
cmpq %r11, %r12
jz PREFIXED(Lfprintf_continue)
decq %r11
movq (%rax), %r10
testq $1, %r10
jz PREFIXED(Lfprintf_loop_end)
# unbox value
sarq %r10
movq %r10, (%rax)
PREFIXED(Lfprintf_loop_end):
addq $8, %rax
jmp PREFIXED(Lfprintf_loop)
PREFIXED(Lfprintf_continue):
popq %rdx
popq %rcx
popq %r8
popq %r9
# restore return address
pushq %r14
jmp PREFIXED(Bfprintf)
PREFIXED(Lsprintf):
# save return address
popq %r14
pushq %r9
pushq %r8
pushq %rcx
pushq %rdx
pushq %rsi
movq %rsp, %rax
# rdi --- format string
# r11 --- number of arguments except format string
PREFIXED(Lsprintf_loop):
movq $0, %r12
cmpq %r11, %r12
jz PREFIXED(Lsprintf_continue)
decq %r11
movq (%rax), %r10
testq $1, %r10
jz PREFIXED(Lsprintf_loop_end)
# unbox value
sarq %r10
movq %r10, (%rax)
PREFIXED(Lsprintf_loop_end):
addq $8, %rax
jmp PREFIXED(Lsprintf_loop)
PREFIXED(Lsprintf_continue):
popq %rsi
popq %rdx
popq %rcx
popq %r8
popq %r9
# restore return address
pushq %r14
jmp PREFIXED(Bsprintf)
PREFIXED(Lfailure):
# save return address
popq %r14
pushq %r9
pushq %r8
pushq %rcx
pushq %rdx
pushq %rsi
movq %rsp, %rax
# rdi --- format string
# r11 --- number of arguments except format string
PREFIXED(Lfailure_loop):
movq $0, %r12
cmpq %r11, %r12
jz PREFIXED(Lfailure_continue)
decq %r11
movq (%rax), %r10
testq $1, %r10
jz PREFIXED(Lfailure_loop_end)
# unbox value
sarq %r10
movq %r10, (%rax)
PREFIXED(Lfailure_loop_end):
addq $8, %rax
jmp PREFIXED(Lfailure_loop)
PREFIXED(Lfailure_continue):
popq %rsi
popq %rdx
popq %rcx
popq %r8
popq %r9
# restore return address
pushq %r14
jmp PREFIXED(failure)

File diff suppressed because it is too large Load diff

View file

@ -1,21 +1,21 @@
# ifndef __LAMA_RUNTIME__
# define __LAMA_RUNTIME__
#ifndef __LAMA_RUNTIME__
#define __LAMA_RUNTIME__
# include <stdio.h>
# include <stdio.h>
# include <string.h>
# include <stdarg.h>
# include <stdlib.h>
# include <sys/mman.h>
# include <assert.h>
# include <errno.h>
# include <regex.h>
# include <time.h>
# include <limits.h>
# include <ctype.h>
#include "runtime_common.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
# define WORD_SIZE (CHAR_BIT * sizeof(int))
#define WORD_SIZE (CHAR_BIT * sizeof(ptrt))
void failure (char *s, ...);
_Noreturn void failure (char *s, ...);
# endif
#endif

95
runtime/runtime_common.h Normal file
View file

@ -0,0 +1,95 @@
#ifndef __LAMA_RUNTIME_COMMON__
#define __LAMA_RUNTIME_COMMON__
#include <stddef.h>
#include <inttypes.h>
#include <limits.h>
// this flag makes GC behavior a bit different for testing purposes.
//#define DEBUG_VERSION
//#define FULL_INVARIANT_CHECKS
#if defined(__x86_64__) || defined(__ppc64__)
#define X86_64
#endif
typedef size_t ptrt; // pointer type, because can hold a pointer on a corresponding platform
#ifdef X86_64
typedef int64_t aint; // adaptive int
typedef uint64_t auint; // adaptive unsigned int
#define PRIdAI PRId64
#define SCNdAI SCNd64
#else
typedef int32_t aint; // adaptive int
typedef uint32_t auint; // adaptive unsigned int
#define PRIdAI PRId32
#define SCNdAI SCNd32
#endif
#define STRING_TAG 0x00000001
#define ARRAY_TAG 0x00000003
#define SEXP_TAG 0x00000005
#define CLOSURE_TAG 0x00000007
#define UNBOXED_TAG 0x00000009 // Not actually a data_header; used to return from LkindOf
#ifdef X86_64
#define LEN_MASK (UINT64_MAX^7)
#else
#define LEN_MASK (UINT32_MAX^7)
#endif
#define LEN(x) (ptrt)(((ptrt)x & LEN_MASK) >> 3)
#define TAG(x) (x & 7)
#ifndef DEBUG_VERSION
# define DATA_HEADER_SZ (sizeof(auint) + sizeof(ptrt))
#else
# define DATA_HEADER_SZ (sizeof(auint) + sizeof(ptrt) + sizeof(auint))
#endif
#define MEMBER_SIZE sizeof(ptrt)
#define TO_DATA(x) ((data *)((char *)(x)-DATA_HEADER_SZ))
#define TO_SEXP(x) ((sexp *)((char *)(x)-DATA_HEADER_SZ))
#define UNBOXED(x) (((aint)(x)) & 1)
#define UNBOX(x) (((aint)(x)) >> 1)
#define BOX(x) ((((aint)(x)) << 1) | 1)
#define BYTES_TO_WORDS(bytes) (((bytes) - 1) / sizeof(size_t) + 1)
#define WORDS_TO_BYTES(words) ((words) * sizeof(size_t))
// CAREFUL WITH DOUBLE EVALUATION!
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
typedef struct {
// store tag in the last three bits to understand what structure this is, other bits are filled with
// other utility info (i.e., size for array, number of fields for s-expression)
auint data_header;
#ifdef DEBUG_VERSION
size_t id;
#endif
// last bit is used as MARK-BIT, the rest are used to store address where object should move
// last bit can be used because due to alignment we can assume that last two bits are always 0's
ptrt forward_address;
char contents[];
} data;
typedef struct {
// store tag in the last three bits to understand what structure this is, other bits are filled with
// other utility info (i.e., size for array, number of fields for s-expression)
auint data_header;
#ifdef DEBUG_VERSION
size_t id;
#endif
// last bit is used as MARK-BIT, the rest are used to store address where object should move
// last bit can be used because due to alignment we can assume that last two bits are always 0's
ptrt forward_address;
auint tag;
char contents[];
} sexp;
#endif

View file

@ -29,9 +29,9 @@ of runtime behaviors, including those which a typical type system is called to p
the language can be used in future as a raw substrate to apply various ways of software verification (including
type systems) on.
The current implementation contains a native code compiler for \textsc{x86-32}, written
The current implementation contains a native code compiler for \textsc{x86-64}, written
in \textsc{OCaml}, a runtime library with garbage-collection support, written in \textsc{C}, and a small
standard library, written in \lama itself. The native code compiler uses \textsc{gcc} as a toolchain.
standard library, written in \lama itself.
In addition, a source-level reference interpreter is implemented as well as a compiler to a small
stack machine. The stack machine code can in turn be either interpreted on a stack machine interpreter, or

View file

@ -2,7 +2,7 @@
\label{sec:wellformedness}
{
\newcommand{\Ref}{\primi{Ref}{}}
\renewcommand{\Ref}{\primi{Ref}{}}
\newcommand{\Val}{\primi{Val}{}}
\newcommand{\Void}{\primi{Void}{}}
\newcommand{\Weak}{\primi{Weak}{}}

View file

@ -6,7 +6,7 @@
\label{sec:lexical_structure}
The character set for the language is \textsc{ASCII}, case-sensitive. In the following lexical description we will use
the GNU Regexp syntax~\cite{GNULib} in lexical definitions.
the POSIX-Extended Regular Expressions in lexical definitions.
\subsection{Whitespaces and Comments}
@ -83,7 +83,7 @@ The following identifiers are reserved for keywords:
after array at before box case do elif else
esac eta false fi for fun if import infix
infixl infixr lazy od of public sexp skip str
syntax then true val var while
syntax then true val var while let in
\end{lstlisting}
\subsection{Infix Operators}

View file

@ -40,7 +40,7 @@ property ``$e$ is a reference''. The result of assignment operator coincides wit
assigns 3 to both "\lstinline|x|" and "\lstinline|y|".
\begin{figure}[h]
\newcommand{\Ref}[1]{\mathcal{R}\,({#1})}
\renewcommand{\Ref}[1]{\mathcal{R}\,({#1})}
\renewcommand{\arraystretch}{4}
\[
\begin{array}{cc}
@ -108,7 +108,8 @@ designates an anonymous functional value in the form of a closure.
& & \nonterm{whileDoExpression}&\alt\\
& & \nonterm{doWhileExpression}&\alt\\
& & \nonterm{forExpression}&\alt\\
& & \nonterm{caseExpression}&
& & \nonterm{caseExpression}&\alt\\
& & \nonterm{letExpression}&
\end{array}
\]
\caption{Expression concrete syntax}
@ -134,7 +135,15 @@ Expression \lstinline|skip| can be used to designate a no-value when no action i
\label{composite_expressions}
\end{figure}
There are three forms of expressions to specify composite values: arrays, lists and S-expressions (see Fig.~\ref{composite_expressions}).
There are three forms of expressions to specify composite values: arrays, lists and S-expressions (see Fig.~\ref{composite_expressions}).
\FloatBarrier
\subsection{Let Expressions}
TODO
\FloatBarrier
\subsection{Conditional Expressions}

View file

@ -4,11 +4,10 @@
\chapter{Implementation-dependent Limitations}
\label{sec:limitations}
The following limitations are in effect for \textsc{x86-32} platform implementation:
The following limitations are in effect for \textsc{x86-64} platform implementation:
\begin{itemize}
\item the range of representable integers is [-1073741824..1073741823] (31-bit signed in two-complement representation);
\item the maximal length of array/string/number of S-expression parameters is 536870911 (29-bit unsigned integer);
\item the minimal address space size is 2GB (garbage collector requirement);
\item the maximal number of S-expression constructor name symbols taken into account is 5.
\item the range of representable integers is \newline [-4611686018427387905..4611686018427387904] (63-bit signed in two-complement representation);
\item the maximal length of array/string/number of S-expression parameters is 2305843009213693952 (61-bit unsigned integer);
\item the maximal number of S-expression constructor name symbols taken into account is 9.
\end{itemize}

View file

@ -4,7 +4,8 @@
\chapter{Debugging Support}
\label{sec:debugging}
Current implementation supports a minimalistic debugging with \textsc{GDB}~\cite{gdb}. In order to include the debug information into object files/executable these files
Current implementation supports a minimalistic debugging with \textsc{GDB}~\cite{gdb} for the Linux target only.
In order to include the debug information into object files/executable these files
have to be compiled with the command-line option "\texttt{-g}" (see Section~\ref{sec:driver}).
The following debugging features are supported:
@ -19,7 +20,7 @@ The following debugging features are supported:
\end{itemize}
\item stepping over/into;
\item inspecting the values of global variables by their source names;
\item inspecting the values of function arguments and local variables (include those in nested scopes) by their source names;
\item inspecting the values of local variables (include those in nested scopes) by their source names;
\item inspecting the values in closures by their indices; the indices for closure elements can be found in stack machine
program dump (option "\texttt{-ds}", see Section~\ref{sec:driver}).
\end{itemize}
@ -36,4 +37,5 @@ The following customized commands are available:
\item "\texttt{pc }$i$", where "$i$" is an integer number. The commands prints a value of $i$-component of current closure.
\end{itemize}
For the MacOS target the debugging is supported with \textsc{LLDB}.
But debugging features are not available.

View file

@ -95,7 +95,7 @@ is automatically created and closed within the call.}
\descr{\lstinline|fun fprintf (file, fmt, ...)|}{Same as "\lstinline|printf|", but outputs to a given file. The file argument should be that acquired
by \lstinline|fopen| function.}
\descr{\lstinline|fun regexp (str)|}{Compiles a string representation of a regular expression (as per GNULib's regexp~\cite{GNULib}) into
\descr{\lstinline|fun regexp (str)|}{Compiles a string representation of a regular expression (as per POSIX-Extended Regular Expressions syntax) into
an internal representation. The return value is a external pointer to the internal representation.}
\descr{\lstinline|fun regexpMatch (pattern, subj, pos)|}{Matches a string "\lstinline{subj}", starting from the position "\lstinline|pos|",

View file

@ -93,7 +93,7 @@
\newcommand{\descr}[2]{\smallskip{#1}\begin{itemize}[noitemsep,topsep=0pt]\item[]{#2}\end{itemize}}
\lstdefinelanguage{abslama}{
keywords={skip,if,then,else,elif,fi,while,do,od,for,fun,public,import,
keywords={skip,if,then,else,elif,fi,while,do,od,for,fun,public,import,let,in,
box,val,var,case,of,esac,when,box,str,sexp,array,infix,infixl,infixr,at,before,after,true,false,eta,lazy,syntax,ref,ignore,elemRef},
sensitive=true,
basicstyle=\small,
@ -109,8 +109,8 @@ morecomment=[l][\ttfamily]{--}
}
\lstdefinelanguage{lama}{
keywords={skip,if,then,else,elif,fi,while,do,od,for,fun,public,import,
box,val, var,case,of,esac,when,box,str,sexp,array,infix,infixl,infixr,at,before,after,true,false,eta,lazy,syntax},
keywords={skip,if,then,else,elif,fi,while,do,od,for,fun,public,import,let,in,
box,val,var,case,of,esac,when,box,str,sexp,array,infix,infixl,infixr,at,before,after,true,false,eta,lazy,syntax},
sensitive=true,
basicstyle=\small,
%commentstyle=\scriptsize\rmfamily,
@ -168,7 +168,7 @@ language=lama
{\huge\bfseries \lama Language Specification}\\[0.4cm] % Title of your document
{\textsc{Version 1.10}}
{\textsc{Version 1.30}}
\HRule\\[1.5cm]
@ -183,18 +183,6 @@ language=lama
Dmitry \textsc{Boulytchev} % Your name
\end{flushleft}
\end{minipage}
%~
%\begin{minipage}{0.4\textwidth}
% \begin{flushright}
% \large
% \textit{Supervisor}\\
% Dr. Caroline \textsc{Becker} % Supervisor's name
% \end{flushright}
%\end{minipage}
% If you don't want a supervisor, uncomment the two lines below and comment the code above
%{\large\textit{Author}}\\
%John \textsc{Smith} % Your name
%------------------------------------------------
% Date

1
src/.ocamlformat Normal file
View file

@ -0,0 +1 @@
profile=default

View file

@ -1,185 +1,40 @@
exception Commandline_error of string
open Options
class options args =
let n = Array.length args in
let dump_ast = 0b1 in
let dump_sm = 0b010 in
let dump_source = 0b100 in
(* Kakadu: binary masks are cool for C code, but for OCaml I don't see any reason to save memory like this *)
let help_string =
"Lama compiler. (C) JetBrains Reserach, 2017-2020.\n" ^
"Usage: lamac <options> <input file>\n\n" ^
"When no options specified, builds the source file into executable.\n" ^
"Options:\n" ^
" -c --- compile into object file\n" ^
" -o <file> --- write executable into file <file>\n" ^
" -I <path> --- add <path> into unit search path list\n" ^
" -i --- interpret on a source-level interpreter\n" ^
" -s --- compile into stack machine code and interpret on the stack machine initerpreter\n" ^
" -dp --- dump AST (the output will be written into .ast file)\n" ^
" -dsrc --- dump pretty-printed source code\n" ^
" -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^
" effect if -i option is specfied)\n" ^
" -b --- compile to a stack machine bytecode\n" ^
" -v --- show version\n" ^
" -h --- show this help\n"
in
object (self)
val version = ref false
val help = ref false
val i = ref 1
val infile = ref (None : string option)
val outfile = ref (None : string option)
val paths = ref [X86.get_std_path ()]
val mode = ref (`Default : [`Default | `Eval | `SM | `Compile | `BC])
val curdir = Unix.getcwd ()
val debug = ref false
(* Workaround until Ostap starts to memoize properly *)
val const = ref false
(* end of the workaround *)
val dump = ref 0
initializer
let rec loop () =
match self#peek with
| Some opt ->
(match opt with
(* Workaround until Ostap starts to memoize properly *)
| "-w" -> self#set_workaround
(* end of the workaround *)
| "-c" -> self#set_mode `Compile
| "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname)
| "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path)
| "-s" -> self#set_mode `SM
| "-b" -> self#set_mode `BC
| "-i" -> self#set_mode `Eval
| "-ds" -> self#set_dump dump_sm
| "-dsrc" -> self#set_dump dump_source
| "-dp" -> self#set_dump dump_ast
| "-h" -> self#set_help
| "-v" -> self#set_version
| "-g" -> self#set_debug
| _ ->
if opt.[0] = '-'
then raise (Commandline_error (Printf.sprintf "Invalid command line specifier ('%s')" opt))
else self#set_infile opt
);
loop ()
| None -> ()
in loop ()
(* Workaround until Ostap starts to memoize properly *)
method is_workaround = !const
method private set_workaround =
const := true
(* end of the workaround *)
method private set_help = help := true
method private set_version = version := true
method private set_dump mask =
dump := !dump lor mask
method private set_infile name =
match !infile with
| None -> infile := Some name
| Some name' -> raise (Commandline_error (Printf.sprintf "Input file ('%s') already specified" name'))
method private set_outfile name =
match !outfile with
| None -> outfile := Some name
| Some name' -> raise (Commandline_error (Printf.sprintf "Output file ('%s') already specified" name'))
method private add_include_path path =
paths := path :: !paths
method private set_mode s =
match !mode with
| `Default -> mode := s
| _ -> raise (Commandline_error "Extra compilation mode specifier")
method private peek =
let j = !i in
if j < n
then (incr i; Some (args.(j)))
else None
method get_mode = !mode
method get_output_option =
match !outfile with
| None -> Printf.sprintf "-o %s" self#basename
| Some name -> Printf.sprintf "-o %s" name
method get_absolute_infile =
let f = self#get_infile in
if Filename.is_relative f then Filename.concat curdir f else f
method get_infile =
match !infile with
| None -> raise (Commandline_error "Input file not specified")
| Some name -> name
method get_help = !help
method get_include_paths = !paths
method basename = Filename.chop_suffix (Filename.basename self#get_infile) ".lama"
method topname =
match !mode with
| `Compile -> "init" ^ self#basename
| _ -> "main"
method dump_file ext contents =
let name = self#basename in
let outf = open_out (Printf.sprintf "%s.%s" name ext) in
Printf.fprintf outf "%s" contents;
close_out outf
method dump_AST ast =
if (!dump land dump_ast) > 0
then (
let buf = Buffer.create 1024 in
Buffer.add_string buf "<html>";
Buffer.add_string buf (Printf.sprintf "<title> %s </title>" self#get_infile);
Buffer.add_string buf "<body><li>";
GT.html(Language.Expr.t) ast buf;
Buffer.add_string buf "</li></body>";
Buffer.add_string buf "</html>";
self#dump_file "html" (Buffer.contents buf)
)
method dump_source (ast: Language.Expr.t) =
if (!dump land dump_source) > 0
then Pprinter.pp Format.std_formatter ast;
method dump_SM sm =
if (!dump land dump_sm) > 0
then self#dump_file "sm" (SM.show_prg sm)
else ()
method greet =
(match !outfile with
| None -> ()
| Some _ -> (match !mode with `Default -> () | _ -> Printf.printf "Output file option ignored in this mode.\n")
);
if !version then Printf.printf "%s\n" Version.version;
if !help then Printf.printf "%s" help_string
method get_debug =
if !debug then "" else "-g"
method set_debug =
debug := true
end
let main =
let[@ocaml.warning "-32"] main =
try
let cmd = new options Sys.argv in
cmd#greet;
match (try Language.run_parser cmd with Language.Semantic_error msg -> `Fail msg) with
| `Ok prog ->
cmd#dump_AST (snd prog);
cmd#dump_source (snd prog);
(match cmd#get_mode with
| `Default | `Compile ->
ignore @@ X86.build cmd prog
| `BC ->
SM.ByteCode.compile cmd (SM.compile cmd prog)
match
try Language.run_parser cmd
with Language.Semantic_error msg -> `Fail msg
with
| `Ok prog -> (
cmd#dump_AST (snd prog);
cmd#dump_source (snd prog);
match cmd#get_mode with
| `Default | `Compile -> ignore @@ X86_64.build cmd prog
| `BC -> SM.ByteCode.compile cmd (SM.compile cmd prog)
| _ ->
let rec read acc =
try
let r = read_int () in
Printf.printf "> ";
read (acc @ [r])
with End_of_file -> acc
in
let input = read [] in
let output =
if cmd#get_mode = `Eval
then Language.eval prog input
else SM.run (SM.compile cmd prog) input
in
List.iter (fun i -> Printf.printf "%d\n" i) output
)
| `Fail er -> Printf.eprintf "Error: %s\n" er; exit 255
let rec read acc =
try
let r = read_int () in
Printf.printf "> ";
read (acc @ [ r ])
with End_of_file -> acc
in
let input = read [] in
let output =
if cmd#get_mode = `Eval then Language.eval prog input
else SM.run (SM.compile cmd prog) input
in
List.iter (fun i -> Printf.printf "%d\n" i) output)
| `Fail er ->
Printf.eprintf "Error: %s\n" er;
exit 255
with
| Language.Semantic_error msg -> Printf.printf "Error: %s\n" msg; exit 255
| Commandline_error msg -> Printf.printf "%s\n" msg; exit 255
| Language.Semantic_error msg ->
Printf.printf "Error: %s\n" msg;
exit 255
| Commandline_error msg ->
Printf.printf "%s\n" msg;
exit 255

View file

@ -3,6 +3,8 @@
*)
module OrigList = List
[@@@ocaml.warning "-7-8-13-15-20-26-27-32"]
open GT
(* Opening a library for combinator-based syntax analysis *)
@ -55,7 +57,7 @@ module Loc =
let report_error ?(loc=None) str =
raise (Semantic_error (str ^ match loc with None -> "" | Some (l, c) -> Printf.sprintf " at (%d, %d)" l c));;
@type k = Unmut | Mut | FVal with show, html, foldl
(* Values *)
@ -85,7 +87,7 @@ module Value =
with show, html, foldl
let is_int = function Int _ -> true | _ -> false
let to_int = function
| Int n -> n
| x -> failwith (Printf.sprintf "int value expected (%s)\n" (show(t) (fun _ -> "<not supported>") (fun _ -> "<not supported>") x))
@ -114,6 +116,7 @@ module Value =
match x with
| Sexp (_, a) | Array a -> ignore (update_array a i v)
| String a -> ignore (update_string a i (Char.chr @@ to_int v))
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let string_val v =
let buf = Buffer.create 128 in
@ -121,8 +124,7 @@ module Value =
let rec inner = function
| Int n -> append (string_of_int n)
| String s -> append "\""; append @@ Bytes.to_string s; append "\""
| Array a -> let n = Array.length a in
append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]"
| Array a -> append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]"
| Sexp (t, a) -> let n = Array.length a in
if t = "cons"
then (
@ -131,6 +133,7 @@ module Value =
| [||] -> ()
| [|x; Int 0|] -> inner x
| [|x; Sexp ("cons", a)|] -> inner x; append ", "; inner_list a
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
in inner_list a;
append "}"
)
@ -139,6 +142,7 @@ module Value =
(if n > 0 then (append " ("; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a;
append ")"))
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
in
inner v;
Bytes.of_string @@ Buffer.contents buf
@ -156,24 +160,27 @@ module Builtin =
let eval (st, i, o, vs) args = function
| "read" -> (match i with z::i' -> (st, i', o, (Value.of_int z)::vs) | _ -> failwith "Unexpected end of input")
| "write" -> (st, i, o @ [Value.to_int @@ List.hd args], Value.Empty :: vs)
| ".elem" -> let [b; j] = args in
(st, i, o, let i = Value.to_int j in
(match b with
| Value.String s -> Value.of_int @@ Char.code (Bytes.get s i)
| Value.Array a -> a.(i)
| Value.Sexp (_, a) -> a.(i)
) :: vs
| ".elem" -> (match args with
| [b; j] -> (st, i, o, let i = Value.to_int j in
(match b with
| Value.String s -> Value.of_int @@ Char.code (Bytes.get s i)
| Value.Array a -> a.(i)
| Value.Sexp (_, a) -> a.(i)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
) :: vs
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
)
| "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s))::vs)
| "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)))::vs)
| ".array" -> (st, i, o, (Value.of_array @@ Array.of_list args)::vs)
| "string" -> let [a] = args in (st, i, o, (Value.of_string @@ Value.string_val a)::vs)
| "string" -> (match args with | [a] -> (st, i, o, (Value.of_string @@ Value.string_val a)::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
end
(* States *)
module State =
struct
(* State: global state, local state, scope variables *)
@type 'a t =
| I
@ -273,7 +280,7 @@ module State =
| _ -> L (xs, s, st)
(* Drop a local scope *)
let drop = function L (_, _, e) -> e | G _ -> I
let drop = function L (_, _, e) -> e | G _ -> I | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
(* Observe a variable in a state and print it to stderr *)
let observe st x =
@ -440,19 +447,18 @@ module Expr =
let seq x = function Skip -> x | y -> Seq (x, y)
let schedule_list h::tl =
List.fold_left seq h tl
let schedule_list = function h::tl -> List.fold_left seq h tl | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let rec take = function
| 0 -> fun rest -> [], rest
| n -> fun h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest
| n -> function h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let rec eval ((st, i, o, vs) as conf) k expr =
let print_values vs =
(* let print_values vs =
Printf.eprintf "Values:\n%!";
List.iter (fun v -> Printf.eprintf "%s\n%!" @@ show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v) vs;
Printf.eprintf "End Values\n%!"
in
in *)
match expr with
| Lambda (args, body) ->
eval (st, i, o, Value.Closure (args, body, [|st|]) :: vs) Skip k
@ -500,73 +506,78 @@ module Expr =
| Sexp (t, xs) ->
eval conf k (schedule_list (xs @ [Intrinsic (fun (st, i, o, vs) -> let es, vs' = take (List.length xs) vs in (st, i, o, Value.Sexp (t, Array.of_list (List.rev es)) :: vs'))]))
| Binop (op, x, y) ->
eval conf k (schedule_list [x; y; Intrinsic (fun (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs))])
eval conf k (schedule_list [x; y; Intrinsic (function (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Elem (b, i) ->
eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem")])
eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem" | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| ElemRef (b, i) ->
eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs))])
eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Call (f, args) ->
eval conf k (schedule_list (f :: args @ [Intrinsic (fun (st, i, o, vs) ->
let es, vs' = take (List.length args + 1) vs in
let f :: es = List.rev es in
(match f with
| Value.Builtin name ->
Builtin.eval (st, i, o, vs') es name
| Value.Closure (args, body, closure) ->
let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in
let st'', i', o', vs'' = eval (st', i, o, []) Skip body in
closure.(0) <- st'';
(State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs')
| _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") f))
))]))
match List.rev es with
| f :: es ->
(match f with
| Value.Builtin name ->
Builtin.eval (st, i, o, vs') es name
| Value.Closure (args, body, closure) ->
let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in
let st'', i', o', vs'' = eval (st', i, o, []) Skip body in
closure.(0) <- st'';
(State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs')
| _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") f))
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
)]))
| Leave -> eval (State.drop st, i, o, vs) Skip k
| Assign (x, e) ->
eval conf k (schedule_list [x; e; Intrinsic (fun (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs))])
eval conf k (schedule_list [x; e; Intrinsic (function (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Seq (s1, s2) ->
eval conf (seq s2 k) s1
| Skip ->
(match k with Skip -> conf | _ -> eval conf Skip k)
| If (e, s1, s2) ->
eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs))])
eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| While (e, s) ->
eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs))])
eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| DoWhile (s, e) ->
eval conf (seq (While (e, s)) k) s
| Case (e, bs, _, _)->
let rec branch ((st, i, o, v::vs) as conf) = function
| [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v))
| (patt, body)::tl ->
let rec match_patt patt v st =
let update x v = function
| None -> None
| Some s -> Some (State.bind x v s)
in
match patt, v with
| Pattern.Named (x, p), v -> update x v (match_patt p v st )
| Pattern.Wildcard , _ -> st
| Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Const n , Value.Int n' when n = n' -> st
| Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st
| Pattern.Boxed , Value.String _
| Pattern.Boxed , Value.Array _
| Pattern.UnBoxed , Value.Int _
| Pattern.Boxed , Value.Sexp (_, _)
| Pattern.StringTag , Value.String _
| Pattern.ArrayTag , Value.Array _
| Pattern.ClosureTag , Value.Closure _
| Pattern.SexpTag , Value.Sexp (_, _) -> st
| _ -> None
and match_list ps vs s =
match ps, vs with
| [], [] -> s
| p::ps, v::vs -> match_list ps vs (match_patt p v s)
| _ -> None
in
match match_patt patt v (Some State.undefined) with
| None -> branch conf tl
| Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave))
let rec branch =
function (_,_,_,[]) -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
| ((st, i, o, v::vs) as conf) -> function
| [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v))
| (patt, body)::tl ->
let rec match_patt patt v st =
let update x v = function
| None -> None
| Some s -> Some (State.bind x v s)
in
match patt, v with
| Pattern.Named (x, p), v -> update x v (match_patt p v st )
| Pattern.Wildcard , _ -> st
| Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Const n , Value.Int n' when n = n' -> st
| Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st
| Pattern.Boxed , Value.String _
| Pattern.Boxed , Value.Array _
| Pattern.UnBoxed , Value.Int _
| Pattern.Boxed , Value.Sexp (_, _)
| Pattern.StringTag , Value.String _
| Pattern.ArrayTag , Value.Array _
| Pattern.ClosureTag , Value.Closure _
| Pattern.SexpTag , Value.Sexp (_, _) -> st
| _ -> None
and match_list ps vs s =
match ps, vs with
| [], [] -> s
| p::ps, v::vs -> match_list ps vs (match_patt p v s)
| _ -> None
in
match match_patt patt v (Some State.undefined) with
| None -> branch conf tl
| Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave))
in
eval conf Skip (schedule_list [e; Intrinsic (fun conf -> branch conf bs)])
@ -593,6 +604,7 @@ module Expr =
match s with
| ":" -> Sexp ("cons", [x; y])
| ":=" -> Assign (x, y)
| "=" -> Binop ("==", Call (Var ("compare"), [x; y]), Const (0))
| _ -> Binop (s, x, y)
in
match x with
@ -635,16 +647,21 @@ module Expr =
let not_a_reference s = new Reason.t (Msg.make "not a reference" [||] (Msg.Locator.Point s#coord))
(* UGLY! *)
let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Pervasives.ref (fun _ _ -> invalid_arg "must not happen")
let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Stdlib.ref (fun _ _ -> invalid_arg "must not happen")
let defCell = Pervasives.ref 0
let defCell = Stdlib.ref 0
(* ======= *)
let makeParsers env =
let makeParser, makeBasicParser, makeScopeParser =
let def s = let Some def = Obj.magic !defCell in def s in
let [@ocaml.warning "-26"] makeParser, makeBasicParser, makeScopeParser =
let [@ocaml.warning "-20"] def s = let [@ocaml.warning "-8"] Some def = Obj.magic !defCell in def s in
let ostap (
parse[infix][atr]: h:basic[infix][Void] -";" t:parse[infix][atr] {Seq (h, t)} | basic[infix][atr];
(* parse[infix][atr]: h:basic[infix][Void] -";" t:parse[infix][atr] {Seq (h, t)} | basic[infix][atr]; *)
parse[infix][atr]:
%"let" l:$ pat:(!(Pattern.parse) -"=") e:parse[infix][Val] %"in" body:parse[infix][atr] {Case (e, [(pat, body)], l#coord, atr )}
| h:basic[infix][Void] -";" t:parse[infix][atr] {Seq (h, t)}
| basic[infix][atr];
scope[infix][atr]: <(d, infix')> : def[infix] expr:parse[infix'][atr] {Scope (d, expr)} | {isVoid atr} => <(d, infix')> : def[infix] => {d <> []} => {Scope (d, materialize atr Skip)};
basic[infix][atr]: !(expr (fun x -> x) (Array.map (fun (a, (atr, l)) -> a, (atr, List.map (fun (s, _, f) -> ostap (- $(s)), f) l)) infix) (primary infix) atr);
primary[infix][atr]:
@ -771,6 +788,10 @@ module Expr =
Scope (defs, DoWhile (s, e))
| _ -> DoWhile (s, e)
}
(* Let-in as expression doesn't work due to lack of greed; In case of expressions we need closing "ni" *)
(* | %"let" l:$ pat:!(Pattern.parse) %"be" e:parse[infix][Val] %"in" body:scope[infix][atr] %"ni" {Case (e, [(pat, body)], l#coord, atr)} *)
(* | %"let" l:$ pat:(!(Pattern.parse) -"=") e:parse[infix][Val] %"in" body:scope[infix][atr] {Case (e, [(pat, body)], l#coord, Val )} *)
(* | %"let" l:$ pat:(!(Pattern.parse) -"=") e:parse[infix][Val] %"in" body:parse[infix][atr] {Case (e, [(pat, body)], l#coord, Val )} *)
| %"case" l:$ e:parse[infix][Val] %"of" bs:!(Util.listBy)[ostap ("|")][ostap (!(Pattern.parse) -"->" scope[infix][atr])] %"esac"{Case (e, bs, l#coord, atr)}
| l:$ %"lazy" e:basic[infix][Val] => {notRef atr} :: (not_a_reference l) => {env#add_import "Lazy"; ignore atr (Call (Var "makeLazy", [Lambda ([], e)]))}
| l:$ %"eta" e:basic[infix][Val] => {notRef atr} :: (not_a_reference l) => {let name = env#get_tmp in ignore atr (Lambda ([name], Call (e, [Var name])))}
@ -872,7 +893,7 @@ module Infix =
show(showable) @@ Array.map (fun (ass, (_, l)) -> List.map (fun (str, kind, _) -> ass, str, kind) l) infix
let extract_exports infix =
let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in
(* let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in *)
let exported =
Array.map
(fun (ass, (_, ops)) ->
@ -901,7 +922,7 @@ module Infix =
in List.rev exports
let is_predefined op =
List.exists (fun x -> op = x) [":"; "!!"; "&&"; "=="; "!="; "<="; "<"; ">="; ">"; "+"; "-"; "*" ; "/"; "%"; ":="]
List.exists (fun x -> op = x) [":"; "!!"; "&&"; "="; "=="; "!="; "<="; "<"; ">="; ">"; "+"; "-"; "*" ; "/"; "%"; ":="]
(*
List.iter (fun op ->
@ -925,11 +946,11 @@ module Infix =
[|
`Righta, [":="];
`Righta, [":"];
`Lefta , ["!!"];
`Lefta , ["&&"];
`Nona , ["=="; "!="; "<="; "<"; ">="; ">"];
`Lefta , ["+" ; "-"];
`Lefta , ["*" ; "/"; "%"];
`Lefta , ["!!"];
`Lefta , ["&&"];
`Nona , ["=";"=="; "!="; "<="; "<"; ">="; ">"];
`Lefta , ["+" ; "-"];
`Lefta , ["*" ; "/"; "%"];
|]
exception Break of [`Ok of t | `Fail of string]
@ -1013,7 +1034,7 @@ module Definition =
(* end of the workaround *)
)
let makeParser env exprBasic exprScope =
let [@ocaml.warning "-26"] makeParser env exprBasic exprScope =
let ostap (
arg : l:$ x:LIDENT {Loc.attach x l#coord; x};
position[pub][ass][coord][newp]:
@ -1107,7 +1128,7 @@ module Interface =
Buffer.contents buf
(* Read an interface file *)
let read fname =
let [@ocaml.warning "-26"] read fname =
let ostap (
funspec: "F" "," i:IDENT ";" {`Fun i};
varspec: "V" "," i:IDENT ";" {`Variable i};
@ -1201,8 +1222,8 @@ ostap (
let parse cmd =
let env =
object
val imports = Pervasives.ref ([] : string list)
val tmp_index = Pervasives.ref 0
val imports = Stdlib.ref ([] : string list)
val tmp_index = Stdlib.ref 0
method add_import imp = imports := imp :: !imports
method get_tmp = let index = !tmp_index in incr tmp_index; Printf.sprintf "__tmp%d" index
@ -1223,7 +1244,7 @@ let parse cmd =
definitions
in
let definitions = Pervasives.ref None in
let definitions = Stdlib.ref None in
let (makeParser, makeBasicParser, makeScopeParser) = Expr.makeParsers env in
@ -1233,7 +1254,7 @@ let parse cmd =
definitions := Some (makeDefinitions env exprBasic exprScope);
let Some definitions = !definitions in
let [@ocaml.warning "-8-20"] Some definitions = !definitions in
let ostap (
parse[cmd]:
@ -1252,10 +1273,11 @@ let run_parser cmd =
let kws = [
"skip";
"if"; "then"; "else"; "elif"; "fi";
"let"; "in";
"while"; "do"; "od";
"for";
"fun"; "var"; "public"; "external"; "import";
"case"; "of"; "esac";
"case"; "of"; "esac";
"box"; "val"; "str"; "sexp"; "array";
"infix"; "infixl"; "infixr"; "at"; "before"; "after";
"true"; "false"; "lazy"; "eta"; "syntax"]

View file

@ -8,7 +8,9 @@ PXFLAGS = $(CAMLP5)
BFLAGS = -rectypes -g -w -13-58 -package GT,ostap,unix
OFLAGS = $(BFLAGS)
all: depend metagen $(TOPFILE)
all: # depend metagen # $(TOPFILE)
dune build ./Driver.exe
ln -sf ../_build/default/src/Driver.exe lamac
metagen:
echo "let version = \"Version `git rev-parse --abbrev-ref HEAD`, `git rev-parse --short HEAD`, `git rev-parse --verify HEAD |git show --no-patch --no-notes --pretty='%cd'`\"" > version.ml
@ -25,6 +27,7 @@ $(TOPFILE).byte: $(SOURCES:.ml=.cmo)
clean:
$(RM) $(TOPFILE) *.cm[ioxa] *.annot *.o *.opt *.byte *~ .depend
dune clean
-include .depend
# generic rules
@ -44,4 +47,4 @@ clean:
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<
%.cmx: %.ml
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<

205
src/Options.ml Normal file
View file

@ -0,0 +1,205 @@
exception Commandline_error of string
type os_t = Linux | Darwin
class options args =
let n = Array.length args in
let dump_ast = 0b1 in
let dump_sm = 0b010 in
let dump_source = 0b100 in
(* Kakadu: binary masks are cool for C code, but for OCaml I don't see any reason to save memory like this *)
let runtime_path_ =
match Sys.getenv_opt "LAMA" with Some s -> s | None -> Stdpath.path
in
let host_os =
let uname = Posix_uname.uname () in
match uname.sysname with
| "Darwin" -> Darwin
| "Linux" -> Linux
| _ -> failwith "Unsupported OS"
in
let help_string =
"Lama compiler. (C) JetBrains Reserach, 2017-2024.\n"
^ "Usage: lamac <options> <input file>\n\n"
^ "When no options specified, builds the source file into executable.\n"
^ "Options:\n" ^ " -c --- compile into object file\n"
^ " -o <file> --- write executable into file <file>\n"
^ " -I <path> --- add <path> into unit search path list\n"
^ " -i --- interpret on a source-level interpreter\n"
^ " -s --- compile into stack machine code and interpret on the \
stack machine initerpreter\n"
^ " -g --- add more debug info and runtime checks\n"
^ " -dp --- dump AST (the output will be written into .ast file)\n"
^ " -dsrc --- dump pretty-printed source code\n"
^ " -ds --- dump stack machine code (the output will be written \
into .sm file; has no\n"
^ " effect if -i option is specfied)\n"
^ " -b --- compile to a stack machine bytecode\n"
^ " -v --- show version\n" ^ " -h --- show this help\n"
in
object (self)
val version = ref false
val help = ref false
val i = ref 1
val infile = ref (None : string option)
val outfile = ref (None : string option)
val runtime_path = runtime_path_
val paths = ref [ runtime_path_ ]
val mode = ref (`Default : [ `Default | `Eval | `SM | `Compile | `BC ])
val curdir = Unix.getcwd ()
val debug = ref false
val target_os = host_os
(* Workaround until Ostap starts to memoize properly *)
val const = ref false
(* end of the workaround *)
val dump = ref 0
initializer
let set_debug () = debug := true in
let rec loop () =
match self#peek with
| Some opt ->
(match opt with
(* Workaround until Ostap starts to memoize properly *)
| "-w" -> self#set_workaround
(* end of the workaround *)
| "-c" -> self#set_mode `Compile
| "-o" -> (
match self#peek with
| None ->
raise
(Commandline_error
"File name expected after '-o' specifier")
| Some fname -> self#set_outfile fname)
| "-I" -> (
match self#peek with
| None ->
raise
(Commandline_error "Path expected after '-I' specifier")
| Some path -> self#add_include_path path)
| "-s" -> self#set_mode `SM
| "-b" -> self#set_mode `BC
| "-i" -> self#set_mode `Eval
| "-ds" -> self#set_dump dump_sm
| "-dsrc" -> self#set_dump dump_source
| "-dp" -> self#set_dump dump_ast
| "-h" -> self#set_help
| "-v" -> self#set_version
| "-g" -> set_debug ()
| _ ->
if opt.[0] = '-' then
raise
(Commandline_error
(Printf.sprintf "Invalid command line specifier ('%s')"
opt))
else self#set_infile opt);
loop ()
| None -> ()
in
loop ()
(* Workaround until Ostap starts to memoize properly *)
method is_workaround = !const
method private set_workaround = const := true
(* end of the workaround *)
method private set_help = help := true
method private set_version = version := true
method private set_dump mask = dump := !dump lor mask
method private set_infile name =
match !infile with
| None -> infile := Some name
| Some name' ->
raise
(Commandline_error
(Printf.sprintf "Input file ('%s') already specified" name'))
method private set_outfile name =
match !outfile with
| None -> outfile := Some name
| Some name' ->
raise
(Commandline_error
(Printf.sprintf "Output file ('%s') already specified" name'))
method private add_include_path path = paths := path :: !paths
method private set_mode s =
match !mode with
| `Default -> mode := s
| _ -> raise (Commandline_error "Extra compilation mode specifier")
method private peek =
let j = !i in
if j < n then (
incr i;
Some args.(j))
else None
method get_mode = !mode
method get_output_option =
match !outfile with
| None -> Printf.sprintf "-o %s" self#basename
| Some name -> Printf.sprintf "-o %s" name
method get_absolute_infile =
let f = self#get_infile in
if Filename.is_relative f then Filename.concat curdir f else f
method get_infile =
match !infile with
| None -> raise (Commandline_error "Input file not specified")
| Some name -> name
method get_help = !help
method get_include_paths = !paths
method get_runtime_path = runtime_path
method basename =
Filename.chop_suffix (Filename.basename self#get_infile) ".lama"
method topname =
match !mode with `Compile -> "init" ^ self#basename | _ -> "main"
method dump_file ext contents =
let name = self#basename in
let outf = open_out (Printf.sprintf "%s.%s" name ext) in
Printf.fprintf outf "%s" contents;
close_out outf
method dump_AST ast =
if !dump land dump_ast > 0 then (
let buf = Buffer.create 1024 in
Buffer.add_string buf "<html>";
Buffer.add_string buf
(Printf.sprintf "<title> %s </title>" self#get_infile);
Buffer.add_string buf "<body><li>";
GT.html Language.Expr.t ast buf;
Buffer.add_string buf "</li></body>";
Buffer.add_string buf "</html>";
self#dump_file "html" (Buffer.contents buf))
method dump_source (ast : Language.Expr.t) =
if !dump land dump_source > 0 then Pprinter.pp Format.std_formatter ast
method dump_SM sm =
if !dump land dump_sm > 0 then self#dump_file "sm" (SM.show_prg sm)
else ()
method greet =
(match !outfile with
| None -> ()
| Some _ -> (
match !mode with
| `Default -> ()
| _ -> Printf.printf "Output file option ignored in this mode.\n"));
if !version then Printf.printf "%s\n" Version.version;
if !help then Printf.printf "%s" help_string
method is_debug = !debug
method target_os = target_os
end

2494
src/SM.ml

File diff suppressed because it is too large Load diff

View file

@ -1,848 +0,0 @@
open GT
open Language
open SM
(* X86 codegeneration interface *)
(* The registers: *)
let regs = [|"%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp"|]
(* We can not freely operate with all register; only 3 by now *)
let num_of_regs = Array.length regs - 5
(* We need to know the word size to calculate offsets correctly *)
let word_size = 4;;
(* We need to distinguish the following operand types: *)
@type opnd =
| R of int (* hard register *)
| S of int (* a position on the hardware stack *)
| C (* a saved closure *)
| M of string (* a named memory location *)
| L of int (* an immediate operand *)
| I of int * opnd (* an indirect operand with offset *)
with show
let show_opnd = show(opnd)
(* For convenience we define the following synonyms for the registers: *)
let ebx = R 0
let ecx = R 1
let esi = R 2
let edi = R 3
let eax = R 4
let edx = R 5
let ebp = R 6
let esp = R 7
(* Now x86 instruction (we do not need all of them): *)
type instr =
(* copies a value from the first to the second operand *) | Mov of opnd * opnd
(* loads an address of the first operand into the second *) | Lea of opnd * opnd
(* makes a binary operation; note, the first operand *) | Binop of string * opnd * opnd
(* designates x86 operator, not the source language one *)
(* x86 integer division, see instruction set reference *) | IDiv of opnd
(* see instruction set reference *) | Cltd
(* sets a value from flags; the first operand is the *) | Set of string * string
(* suffix, which determines the value being set, the *)
(* the second --- (sub)register name *)
(* pushes the operand on the hardware stack *) | Push of opnd
(* pops from the hardware stack to the operand *) | Pop of opnd
(* call a function by a name *) | Call of string
(* call a function by indirect address *) | CallI of opnd
(* returns from a function *) | Ret
(* a label in the code *) | Label of string
(* a conditional jump *) | CJmp of string * string
(* a non-conditional jump *) | Jmp of string
(* directive *) | Meta of string
(* arithmetic correction: decrement *) | Dec of opnd
(* arithmetic correction: or 0x0001 *) | Or1 of opnd
(* arithmetic correction: shl 1 *) | Sal1 of opnd
(* arithmetic correction: shr 1 *) | Sar1 of opnd
| Repmovsl
(* Instruction printer *)
let stack_offset i =
if i >= 0
then (i+1) * word_size
else 8 + (-i-1) * word_size
let show instr =
let rec opnd = function
| R i -> regs.(i)
| C -> "4(%ebp)"
| S i -> if i >= 0
then Printf.sprintf "-%d(%%ebp)" (stack_offset i)
else Printf.sprintf "%d(%%ebp)" (stack_offset i)
| M x -> x
| L i -> Printf.sprintf "$%d" i
| I (0, x) -> Printf.sprintf "(%s)" (opnd x)
| I (n, x) -> Printf.sprintf "%d(%s)" n (opnd x)
in
let binop = function
| "+" -> "addl"
| "-" -> "subl"
| "*" -> "imull"
| "&&" -> "andl"
| "!!" -> "orl"
| "^" -> "xorl"
| "cmp" -> "cmpl"
| "test" -> "test"
| _ -> failwith "unknown binary operator"
in
match instr with
| Cltd -> "\tcltd"
| Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s
| IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1)
| Binop (op, s1, s2) -> Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2)
| Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2)
| Lea (x, y) -> Printf.sprintf "\tleal\t%s,\t%s" (opnd x) (opnd y)
| Push s -> Printf.sprintf "\tpushl\t%s" (opnd s)
| Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s)
| Ret -> "\tret"
| Call p -> Printf.sprintf "\tcall\t%s" p
| CallI o -> Printf.sprintf "\tcall\t*(%s)" (opnd o)
| Label l -> Printf.sprintf "%s:\n" l
| Jmp l -> Printf.sprintf "\tjmp\t%s" l
| CJmp (s , l) -> Printf.sprintf "\tj%s\t%s" s l
| Meta s -> Printf.sprintf "%s\n" s
| Dec s -> Printf.sprintf "\tdecl\t%s" (opnd s)
| Or1 s -> Printf.sprintf "\torl\t$0x0001,\t%s" (opnd s)
| Sal1 s -> Printf.sprintf "\tsall\t%s" (opnd s)
| Sar1 s -> Printf.sprintf "\tsarl\t%s" (opnd s)
| Repmovsl -> Printf.sprintf "\trep movsl\t"
(* Opening stack machine to use instructions without fully qualified names *)
open SM
(* Symbolic stack machine evaluator
compile : env -> prg -> env * instr list
Take an environment, a stack machine program, and returns a pair --- the updated environment and the list
of x86 instructions
*)
let compile cmd env imports code =
(* SM.print_prg code; *)
flush stdout;
let suffix = function
| "<" -> "l"
| "<=" -> "le"
| "==" -> "e"
| "!=" -> "ne"
| ">=" -> "ge"
| ">" -> "g"
| _ -> failwith "unknown operator"
in
let box n = (n lsl 1) lor 1 in
let rec compile' env scode =
let on_stack = function S _ -> true | _ -> false in
let mov x s = if on_stack x && on_stack s then [Mov (x, eax); Mov (eax, s)] else [Mov (x, s)] in
let callc env n tail =
let tail = tail && env#nargs = n in
if tail
then (
let rec push_args env acc = function
| 0 -> env, acc
| n -> let x, env = env#pop in
if x = env#loc (Value.Arg (n-1))
then push_args env acc (n-1)
else push_args env ((mov x (env#loc (Value.Arg (n-1)))) @ acc) (n-1)
in
let env , pushs = push_args env [] n in
let closure, env = env#pop in
let y , env = env#allocate in
env, pushs @ [Mov (closure, edx);
Mov (I(0, edx), eax);
Mov (ebp, esp);
Pop (ebp)] @
(if env#has_closure then [Pop ebx] else []) @
[Jmp "*%eax"] (* UGLY!!! *)
)
else (
let pushr, popr =
List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n)
in
let pushr, popr = env#save_closure @ pushr, env#rest_closure @ popr in
let env, code =
let rec push_args env acc = function
| 0 -> env, acc
| n -> let x, env = env#pop in
push_args env ((Push x)::acc) (n-1)
in
let env, pushs = push_args env [] n in
let pushs = List.rev pushs in
let closure, env = env#pop in
let call_closure =
if on_stack closure
then [Mov (closure, edx); Mov (edx, eax); CallI eax]
else [Mov (closure, edx); CallI closure]
in
env, pushr @ pushs @ call_closure @ [Binop ("+", L (word_size * List.length pushs), esp)] @ (List.rev popr)
in
let y, env = env#allocate in env, code @ [Mov (eax, y)]
)
in
let call env f n tail =
let tail = tail && env#nargs = n && f.[0] <> '.' in
let f =
match f.[0] with '.' -> "B" ^ String.sub f 1 (String.length f - 1) | _ -> f
in
if tail
then (
let rec push_args env acc = function
| 0 -> env, acc
| n -> let x, env = env#pop in
if x = env#loc (Value.Arg (n-1))
then push_args env acc (n-1)
else push_args env ((mov x (env#loc (Value.Arg (n-1)))) @ acc) (n-1)
in
let env, pushs = push_args env [] n in
let y, env = env#allocate in
env, pushs @ [Mov (ebp, esp); Pop (ebp)] @ (if env#has_closure then [Pop ebx] else []) @ [Jmp f]
)
else (
let pushr, popr =
List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n)
in
let pushr, popr = env#save_closure @ pushr, env#rest_closure @ popr in
let env, code =
let rec push_args env acc = function
| 0 -> env, acc
| n -> let x, env = env#pop in
push_args env ((Push x)::acc) (n-1)
in
let env, pushs = push_args env [] n in
let pushs =
match f with
| "Barray" -> List.rev @@ (Push (L (box n))) :: pushs
| "Bsexp" -> List.rev @@ (Push (L (box n))) :: pushs
| "Bsta" -> pushs
| _ -> List.rev pushs
in
env, pushr @ pushs @ [Call f; Binop ("+", L (word_size * List.length pushs), esp)] @ (List.rev popr)
in
let y, env = env#allocate in env, code @ [Mov (eax, y)]
)
in
match scode with
| [] -> env, []
| instr :: scode' ->
let stack = "" (* env#show_stack*) in
(* Printf.printf "insn=%s, stack=%s\n%!" (GT.show(insn) instr) (env#show_stack); *)
let env', code' =
if env#is_barrier
then match instr with
| LABEL s -> if env#has_stack s then (env#drop_barrier)#retrieve_stack s, [Label s] else env#drop_stack, []
| FLABEL s -> env#drop_barrier, [Label s]
| SLABEL s -> env, [Label s]
| _ -> env, []
else
match instr with
| PUBLIC name -> env#register_public name, []
| EXTERN name -> env#register_extern name, []
| IMPORT name -> env, []
| CLOSURE (name, closure) ->
let pushr, popr =
List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers 0)
in
let closure_len = List.length closure in
let push_closure =
List.map (fun d -> Push (env#loc d)) @@ List.rev closure
in
let s, env = env#allocate in
(env,
pushr @
push_closure @
[Push (M ("$" ^ name));
Push (L (box closure_len));
Call "Bclosure";
Binop ("+", L (word_size * (closure_len + 2)), esp);
Mov (eax, s)] @
List.rev popr @ env#reload_closure)
| CONST n ->
let s, env' = env#allocate in
(env', [Mov (L (box n), s)])
| STRING s ->
let s, env = env#string s in
let l, env = env#allocate in
let env, call = call env ".string" 1 false in
(env, Mov (M ("$" ^ s), l) :: call)
| LDA x ->
let s, env' = (env #variable x)#allocate in
let s', env''= env'#allocate in
env'',
[Lea (env'#loc x, eax); Mov (eax, s); Mov (eax, s')]
| LD x ->
let s, env' = (env#variable x)#allocate in
env',
(match s with
| S _ | M _ -> [Mov (env'#loc x, eax); Mov (eax, s)]
| _ -> [Mov (env'#loc x, s)]
)
| ST x ->
let env' = env#variable x in
let s = env'#peek in
env',
(match s with
| S _ | M _ -> [Mov (s, eax); Mov (eax, env'#loc x)]
| _ -> [Mov (s, env'#loc x)]
)
| STA ->
call env ".sta" 3 false
| STI ->
let v, x, env' = env#pop2 in
env'#push x,
(match x with
| S _ | M _ -> [Mov (v, edx); Mov (x, eax); Mov (edx, I (0, eax)); Mov (edx, x)] @ env#reload_closure
| _ -> [Mov (v, eax); Mov (eax, I (0, x)); Mov (eax, x)]
)
| BINOP op ->
let x, y, env' = env#pop2 in
env'#push y,
(match op with
| "/" ->
[Mov (y, eax);
Sar1 eax;
Cltd;
(* x := x >> 1 ?? *)
Sar1 x; (*!!!*)
IDiv x;
Sal1 eax;
Or1 eax;
Mov (eax, y)
]
| "%" ->
[Mov (y, eax);
Sar1 eax;
Cltd;
(* x := x >> 1 ?? *)
Sar1 x; (*!!!*)
IDiv x;
Sal1 edx;
Or1 edx;
Mov (edx, y)
] @ env#reload_closure
| "<" | "<=" | "==" | "!=" | ">=" | ">" ->
(match x with
| M _ | S _ ->
[Binop ("^", eax, eax);
Mov (x, edx);
Binop ("cmp", edx, y);
Set (suffix op, "%al");
Sal1 eax;
Or1 eax;
Mov (eax, y)
] @ env#reload_closure
| _ ->
[Binop ("^" , eax, eax);
Binop ("cmp", x, y);
Set (suffix op, "%al");
Sal1 eax;
Or1 eax;
Mov (eax, y)
]
)
| "*" ->
if on_stack y
then [Dec y; Mov (x, eax); Sar1 eax; Binop (op, y, eax); Or1 eax; Mov (eax, y)]
else [Dec y; Mov (x, eax); Sar1 eax; Binop (op, eax, y); Or1 y]
| "&&" ->
[Dec x; (*!!!*)
Mov (x, eax);
Binop (op, x, eax);
Mov (L 0, eax);
Set ("ne", "%al");
Dec y; (*!!!*)
Mov (y, edx);
Binop (op, y, edx);
Mov (L 0, edx);
Set ("ne", "%dl");
Binop (op, edx, eax);
Set ("ne", "%al");
Sal1 eax;
Or1 eax;
Mov (eax, y)
] @ env#reload_closure
| "!!" ->
[Mov (y, eax);
Sar1 eax;
Sar1 x; (*!!!*)
Binop (op, x, eax);
Mov (L 0, eax);
Set ("ne", "%al");
Sal1 eax;
Or1 eax;
Mov (eax, y)
]
| "+" ->
if on_stack x && on_stack y
then [Mov (x, eax); Dec eax; Binop ("+", eax, y)]
else [Binop (op, x, y); Dec y]
| "-" ->
if on_stack x && on_stack y
then [Mov (x, eax); Binop (op, eax, y); Or1 y]
else [Binop (op, x, y); Or1 y]
)
| LABEL s
| FLABEL s
| SLABEL s -> env, [Label s]
| JMP l -> (env#set_stack l)#set_barrier, [Jmp l]
| CJMP (s, l) ->
let x, env = env#pop in
env#set_stack l, [Sar1 x; (*!!!*) Binop ("cmp", L 0, x); CJmp (s, l)]
| BEGIN (f, nargs, nlocals, closure, args, scopes) ->
let rec stabs_scope scope =
let names =
List.map
(fun (name, index) ->
Meta (Printf.sprintf "\t.stabs \"%s:1\",128,0,0,-%d" name (stack_offset index))
)
scope.names
in
names @
(if names = [] then [] else [Meta (Printf.sprintf "\t.stabn 192,0,0,%s-%s" scope.blab f)]) @
(List.flatten @@ List.map stabs_scope scope.subs) @
(if names = [] then [] else [Meta (Printf.sprintf "\t.stabn 224,0,0,%s-%s" scope.elab f)])
in
let name =
if f.[0] = 'L' then String.sub f 1 (String.length f - 1) else f
in
env#assert_empty_stack;
let has_closure = closure <> [] in
let env = env#enter f nargs nlocals has_closure in
env, [Meta (Printf.sprintf "\t.type %s, @function" name)] @
(if f = "main"
then []
else
[Meta (Printf.sprintf "\t.stabs \"%s:F1\",36,0,0,%s" name f)] @
(List.mapi (fun i a -> Meta (Printf.sprintf "\t.stabs \"%s:p1\",160,0,0,%d" a ((i*4) + 8))) args) @
(List.flatten @@ List.map stabs_scope scopes)
)
@
[Meta "\t.cfi_startproc"] @
(if has_closure then [Push edx] else []) @
(if f = cmd#topname
then
[Mov (M "_init", eax);
Binop ("test", eax, eax);
CJmp ("z", "_continue");
Ret;
Label "_continue";
Mov (L 1, M "_init");
]
else []
) @
[Push ebp;
Meta ("\t.cfi_def_cfa_offset\t" ^ if has_closure then "12" else "8");
Meta ("\t.cfi_offset 5, -" ^ if has_closure then "12" else "8");
Mov (esp, ebp);
Meta "\t.cfi_def_cfa_register\t5";
Binop ("-", M ("$" ^ env#lsize), esp);
Mov (esp, edi);
Mov (M "$filler", esi);
Mov (M ("$" ^ (env#allocated_size)), ecx);
Repmovsl
] @
(if f = "main"
then [Call "__gc_init"; Push (I (12, ebp)); Push (I (8, ebp)); Call "set_args"; Binop ("+", L 8, esp)]
else []
) @
(if f = cmd#topname
then List.map (fun i -> Call ("init" ^ i)) (List.filter (fun i -> i <> "Std") imports)
else []
)
| END ->
let x, env = env#pop in
env#assert_empty_stack;
let name = env#fname in
env#leave, [
Mov (x, eax); (*!!*)
Label env#epilogue;
Mov (ebp, esp);
Pop ebp;
] @
env#rest_closure @
(if name = "main" then [Binop ("^", eax, eax)] else []) @
[Meta "\t.cfi_restore\t5";
Meta "\t.cfi_def_cfa\t4, 4";
Ret;
Meta "\t.cfi_endproc";
Meta (Printf.sprintf "\t.set\t%s,\t%d" env#lsize (env#allocated * word_size));
Meta (Printf.sprintf "\t.set\t%s,\t%d" env#allocated_size env#allocated);
Meta (Printf.sprintf "\t.size %s, .-%s" name name);
]
| RET ->
let x = env#peek in
env, [Mov (x, eax); Jmp env#epilogue]
| ELEM -> call env ".elem" 2 false
| CALL (f, n, tail) -> call env f n tail
| CALLC (n, tail) -> callc env n tail
| SEXP (t, n) ->
let s, env = env#allocate in
let env, code = call env ".sexp" (n+1) false in
env, [Mov (L (box (env#hash t)), s)] @ code
| DROP ->
snd env#pop, []
| DUP ->
let x = env#peek in
let s, env = env#allocate in
env, mov x s
| SWAP ->
let x, y = env#peek2 in
env, [Push x; Push y; Pop x; Pop y]
| TAG (t, n) ->
let s1, env = env#allocate in
let s2, env = env#allocate in
let env, code = call env ".tag" 3 false in
env, [Mov (L (box (env#hash t)), s1); Mov (L (box n), s2)] @ code
| ARRAY n ->
let s, env = env#allocate in
let env, code = call env ".array_patt" 2 false in
env, [Mov (L (box n), s)] @ code
| PATT StrCmp -> call env ".string_patt" 2 false
| PATT patt ->
call env
(match patt with
| Boxed -> ".boxed_patt"
| UnBoxed -> ".unboxed_patt"
| Array -> ".array_tag_patt"
| String -> ".string_tag_patt"
| Sexp -> ".sexp_tag_patt"
| Closure -> ".closure_tag_patt"
) 1 false
| LINE (line) ->
env#gen_line line
| FAIL ((line, col), value) ->
let v, env = if value then env#peek, env else env#pop in
let s, env = env#string cmd#get_infile in
env, [Push (L (box col)); Push (L (box line)); Push (M ("$" ^ s)); Push v; Call "Bmatch_failure"; Binop ("+", L (4 * word_size), esp)]
| i ->
invalid_arg (Printf.sprintf "invalid SM insn: %s\n" (GT.show(insn) i))
in
let env'', code'' = compile' env' scode' in
env'', [Meta (Printf.sprintf "# %s / % s" (GT.show(SM.insn) instr) stack)] @ code' @ code''
in
compile' env code
(* A set of strings *)
module S = Set.Make (String)
(* A map indexed by strings *)
module M = Map.Make (String)
(* Environment implementation *)
class env prg =
let chars = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'" in
let make_assoc l i = List.combine l (List.init (List.length l) (fun x -> x + i)) in
let rec assoc x = function [] -> raise Not_found | l :: ls -> try List.assoc x l with Not_found -> assoc x ls in
object (self)
inherit SM.indexer prg
val globals = S.empty (* a set of global variables *)
val stringm = M.empty (* a string map *)
val scount = 0 (* string count *)
val stack_slots = 0 (* maximal number of stack positions *)
val static_size = 0 (* static data size *)
val stack = [] (* symbolic stack *)
val nargs = 0 (* number of function arguments *)
val locals = [] (* function local variables *)
val fname = "" (* function name *)
val stackmap = M.empty (* labels to stack map *)
val barrier = false (* barrier condition *)
val max_locals_size = 0
val has_closure = false
val publics = S.empty
val externs = S.empty
val nlabels = 0
val first_line = true
method publics = S.elements publics
method register_public name = {< publics = S.add name publics >}
method register_extern name = {< externs = S.add name externs >}
method max_locals_size = max_locals_size
method has_closure = has_closure
method save_closure =
if has_closure then [Push edx] else []
method rest_closure =
if has_closure then [Pop edx] else []
method reload_closure =
if has_closure then [Mov (C (*S 0*), edx)] else []
method fname = fname
method leave =
if stack_slots > max_locals_size
then {< max_locals_size = stack_slots >}
else self
method show_stack =
GT.show(list) (GT.show(opnd)) stack
method print_locals =
Printf.printf "LOCALS: size = %d\n" static_size;
List.iter
(fun l ->
Printf.printf "(";
List.iter (fun (a, i) -> Printf.printf "%s=%d " a i) l;
Printf.printf ")\n"
) locals;
Printf.printf "END LOCALS\n"
(* Assert empty stack *)
method assert_empty_stack = assert (stack = [])
(* check barrier condition *)
method is_barrier = barrier
(* set barrier *)
method set_barrier = {< barrier = true >}
(* drop barrier *)
method drop_barrier = {< barrier = false >}
(* drop stack *)
method drop_stack = {< stack = [] >}
(* associates a stack to a label *)
method set_stack l = (*Printf.printf "Setting stack for %s\n" l;*)
{< stackmap = M.add l stack stackmap >}
(* retrieves a stack for a label *)
method retrieve_stack l = (*Printf.printf "Retrieving stack for %s\n" l;*)
try {< stack = M.find l stackmap >} with Not_found -> self
(* checks if there is a stack for a label *)
method has_stack l = (*Printf.printf "Retrieving stack for %s\n" l;*)
M.mem l stackmap
(* gets a name for a global variable *)
method loc x =
match x with
| Value.Global name -> M ("global_" ^ name)
| Value.Fun name -> M ("$" ^ name)
| Value.Local i -> S i
| Value.Arg i -> S (- (i + if has_closure then 2 else 1))
| Value.Access i -> I (word_size * (i+1), edx)
(* allocates a fresh position on a symbolic stack *)
method allocate =
let x, n =
let rec allocate' = function
| [] -> ebx , 0
| (S n)::_ -> S (n+1) , n+2
| (R n)::_ when n < num_of_regs -> R (n+1) , stack_slots
| _ -> S static_size, static_size+1
in
allocate' stack
in
x, {< stack_slots = max n stack_slots; stack = x::stack >}
(* pushes an operand to the symbolic stack *)
method push y = {< stack = y::stack >}
(* pops one operand from the symbolic stack *)
method pop = let x::stack' = stack in x, {< stack = stack' >}
(* pops two operands from the symbolic stack *)
method pop2 = let x::y::stack' = stack in x, y, {< stack = stack' >}
(* peeks the top of the stack (the stack does not change) *)
method peek = List.hd stack
(* peeks two topmost values from the stack (the stack itself does not change) *)
method peek2 = let x::y::_ = stack in x, y
(* tag hash: gets a hash for a string tag *)
method hash tag =
let h = Pervasives.ref 0 in
for i = 0 to min (String.length tag - 1) 4 do
h := (!h lsl 6) lor (String.index chars tag.[i])
done;
!h
(* registers a variable in the environment *)
method variable x =
match x with
| Value.Global name -> {< globals = S.add ("global_" ^ name) globals >}
| _ -> self
(* registers a string constant *)
method string x =
let escape x =
let n = String.length x in
let buf = Buffer.create (n*2) in
let rec iterate i =
if i < n
then (
(match x.[i] with
| '"' -> Buffer.add_string buf "\\\""
| '\n' -> Buffer.add_string buf "\n"
| '\t' -> Buffer.add_string buf "\t"
| c -> Buffer.add_char buf c
);
iterate (i+1)
)
in
iterate 0;
Buffer.contents buf
in
let x = escape x in
try M.find x stringm, self
with Not_found ->
let y = Printf.sprintf "string_%d" scount in
let m = M.add x y stringm in
y, {< scount = scount + 1; stringm = m>}
(* gets number of arguments in the current function *)
method nargs = nargs
(* gets all global variables *)
method globals = S.elements (S.diff globals externs)
(* gets all string definitions *)
method strings = M.bindings stringm
(* gets a number of stack positions allocated *)
method allocated = stack_slots
method allocated_size = Printf.sprintf "LS%s_SIZE" fname
(* enters a function *)
method enter f nargs nlocals has_closure =
{< nargs = nargs; static_size = nlocals; stack_slots = nlocals; stack = []; fname = f; has_closure = has_closure; first_line = true >}
(* returns a label for the epilogue *)
method epilogue = Printf.sprintf "L%s_epilogue" fname
(* returns a name for local size meta-symbol *)
method lsize = Printf.sprintf "L%s_SIZE" fname
(* returns a list of live registers *)
method live_registers depth =
let rec inner d acc = function
| [] -> acc
| (R _ as r)::tl -> inner (d+1) (if d >= depth then (r::acc) else acc) tl
| _::tl -> inner (d+1) acc tl
in
inner 0 [] stack
(* generate a line number information for current function *)
method gen_line line =
let lab = Printf.sprintf ".L%d" nlabels in
{< nlabels = nlabels + 1; first_line = false >},
if fname = "main"
then
[Meta (Printf.sprintf "\t.stabn 68,0,%d,%s" line lab); Label lab]
else
(if first_line then [Meta (Printf.sprintf "\t.stabn 68,0,%d,0" line)] else []) @
[Meta (Printf.sprintf "\t.stabn 68,0,%d,%s-%s" line lab fname); Label lab]
end
(* Generates an assembler text for a program: first compiles the program into
the stack code, then generates x86 assember code, then prints the assembler file
*)
let genasm cmd prog =
let sm = SM.compile cmd prog in
let env, code = compile cmd (new env sm) (fst (fst prog)) sm in
let globals =
List.map (fun s -> Meta (Printf.sprintf "\t.globl\t%s" s)) env#publics
in
let data = [Meta "\t.data"] @
(List.map (fun (s, v) -> Meta (Printf.sprintf "%s:\t.string\t\"%s\"" v s)) env#strings) @
[Meta "_init:\t.int 0";
Meta "\t.section custom_data,\"aw\",@progbits";
Meta (Printf.sprintf "filler:\t.fill\t%d, 4, 1" env#max_locals_size)] @
(List.concat @@
List.map
(fun s -> [Meta (Printf.sprintf "\t.stabs \"%s:S1\",40,0,0,%s" (String.sub s (String.length "global_") (String.length s - String.length "global_")) s);
Meta (Printf.sprintf "%s:\t.int\t1" s)])
env#globals
)
in
let asm = Buffer.create 1024 in
List.iter
(fun i -> Buffer.add_string asm (Printf.sprintf "%s\n" @@ show i))
([Meta (Printf.sprintf "\t.file \"%s\"" cmd#get_absolute_infile);
Meta (Printf.sprintf "\t.stabs \"%s\",100,0,0,.Ltext" cmd#get_absolute_infile)] @
globals @
data @
[Meta "\t.text"; Label ".Ltext"; Meta "\t.stabs \"data:t1=r1;0;4294967295;\",128,0,0,0"] @
code);
Buffer.contents asm
let get_std_path () =
match Sys.getenv_opt "LAMA" with
| Some s -> s
| None -> Stdpath.path
(* Builds a program: generates the assembler file and compiles it with the gcc toolchain *)
let build cmd prog =
let find_objects imports paths =
let module S = Set.Make (String) in
let rec iterate acc s = function
| [] -> acc
| import::imports ->
if S.mem import s
then iterate acc s imports
else
let path, intfs = Interface.find import paths in
iterate
((Filename.concat path (import ^ ".o")) :: acc)
(S.add import s)
((List.map (function `Import name -> name | _ -> invalid_arg "must not happen") @@
List.filter (function `Import _ -> true | _ -> false) intfs) @
imports)
in
iterate [] (S.add "Std" S.empty) imports
in
cmd#dump_file "s" (genasm cmd prog);
cmd#dump_file "i" (Interface.gen prog);
let inc = get_std_path () in
match cmd#get_mode with
| `Default ->
let objs = find_objects (fst @@ fst prog) cmd#get_include_paths in
let buf = Buffer.create 255 in
List.iter (fun o -> Buffer.add_string buf o; Buffer.add_string buf " ") objs;
let gcc_cmdline = Printf.sprintf "gcc %s -m32 %s %s.s %s %s/runtime.a" cmd#get_debug cmd#get_output_option cmd#basename (Buffer.contents buf) inc in
Sys.command gcc_cmdline
| `Compile ->
Sys.command (Printf.sprintf "gcc %s -m32 -c %s.s" cmd#get_debug cmd#basename)
| _ -> invalid_arg "must not happen"

1494
src/X86_64.ml Normal file

File diff suppressed because it is too large Load diff

111
src/dune Normal file
View file

@ -0,0 +1,111 @@
(env
(dev
(flags
(:standard -warn-error -3-7-8-13-15-20-26-27-32-33-39))))
(rule
(targets version.ml)
(action
(progn
(with-stdout-to
version2.ml
(progn
(run echo let version = "\"")
(run echo Version)
(run git rev-parse --abbrev-ref HEAD)
(run echo , " ")
(run git rev-parse --short HEAD)
(run echo , " ")
(pipe-stdout
(run git rev-parse --verify HEAD)
(run git show --no-patch --no-notes --pretty='%cd'))
(run echo "\"")))
(with-stdout-to
version.ml
(pipe-stdout
(run cat version2.ml)
(run tr -d '\n'))))))
(rule
(targets stdpath.ml)
(action
(progn
(with-stdout-to
stdpath2.ml
(progn
(run echo let path = "\"")
(run opam var share)
(run echo /Lama)
(run echo "\"")))
(with-stdout-to
stdpath.ml
(pipe-stdout
(run cat stdpath2.ml)
(run tr -d '\n'))))))
(library
(name liba)
(modules Language Pprinter stdpath version X86_64 SM Options)
(libraries GT ostap posix-uname)
(flags
(:standard
-rectypes
;-dsource
))
; (ocamlopt_flags
; (:standard -dsource))
(wrapped false)
(preprocess
(per_module
((pps GT.ppx_all)
SM
X86)
((action
(run %{project_root}/src/pp5+gt+plugins+ostap+dump.byte %{input-file}))
Language
Pprinter
stdpath
version)))
(preprocessor_deps
(file %{project_root}/src/pp5+gt+plugins+ostap+dump.byte)
;(file %{project_root}/src/pp5+gt+plugins+ostap+dump.exe)
)
;(inline_tests)
)
(executable
(name Driver)
(flags
(:standard
-rectypes
;-dsource
))
(modules Driver)
(libraries liba unix))
; (rule
; (targets pp5+gt+plugins+ostap+dump.exe)
; (deps
; (package GT))
; (action
; (run
; mkcamlp5.opt
; -package
; camlp5,camlp5.pa_o,camlp5.pr_dump,camlp5.extend,camlp5.quotations,ostap.syntax,GT.syntax.all,GT.syntax
; -o
; %{targets})))
(rule
(targets pp5+gt+plugins+ostap+dump.byte)
(deps
(package GT))
(action
(run
mkcamlp5
-package
camlp5,camlp5.pa_o,camlp5.pr_o,ostap.syntax,GT.syntax.all,GT.syntax
-o
%{targets})))
(cram
(deps ./Driver.exe))

View file

@ -2,7 +2,7 @@ SHELL := /bin/bash
FILES=$(wildcard *.lama)
ALL=$(sort $(FILES:.lama=.o))
LAMAC=../src/lamac -g
LAMAC=../src/lamac
all: $(ALL)
@ -21,7 +21,7 @@ Buffer.o: List.o
STM.o: List.o Fun.o
%.o: %.lama
LAMA=../runtime $(LAMAC) -I . -c $<
LAMA=../runtime $(LAMAC) -g -I . -c $<
clean:
rm -Rf *.s *.o *.i *~

View file

@ -1,4 +1,4 @@
TESTS=$(sort $(basename $(wildcard test*.lama)))
TESTS=$(sort $(filter-out test30, $(basename $(wildcard test*.lama))))
LAMAC=../../src/lamac
@ -7,8 +7,8 @@ LAMAC=../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log
@echo "stdlib/regression/$@"
@LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log
clean:
$(RM) test*.log *.s *~ $(TESTS) *.i

View file

@ -14,5 +14,5 @@
1
0
0
31
1
-1

View file

@ -1,5 +1,5 @@
HashTab internal structure: [0, 0, {[{1, 2, 3}, 100]}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
HashTab internal structure: [0, 0, {[{1, 2, 3}, 200], [{1, 2, 3}, 100]}, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
HashTab internal structure: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {[{1, 2, 3}, 100]}, 0, 0, 0]
HashTab internal structure: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, {[{1, 2, 3}, 200], [{1, 2, 3}, 100]}, 0, 0, 0]
Searching: Some (200)
Searching: Some (200)
Replaced: Some (800)

View file

@ -0,0 +1,2 @@
' " ` % \ \r
\h @ $ # ; [ ]

View file

@ -1,3 +1,7 @@
fun normalize (x) {
if x < 0 then -1 else if x > 0 then 1 else 0 fi fi
}
fun f (x) {
fun (y) {x + y}
}
@ -8,8 +12,8 @@ write (compare (3, 3));
write (compare (2, "abc"));
write (compare ("abc", 2));
write (compare ("abc", "abc"));
write (compare ("ab", "abc"));
write (compare ("abc", "ab"));
write (normalize (compare ("ab", "abc")));
write (normalize (compare ("abc", "ab")));
write (compare ([], []));
write (compare (A, A));
write (compare (A, B));

View file

@ -87,8 +87,6 @@ fun normalize (x) {
fun not (x) {0 - x}
disableGC ();
for var i=0;, i<25, i:=i+1
do
case genCyclicArrays (1000, true, false) of

View file

@ -0,0 +1,3 @@
var s = " ' "" ` % \ \r \n\t \h @ $ # ; [ ] ";
printf ("%s", s)