Merge branch '1.2' of https://github.com/PLTools/Lama into blonded04/readme-installation-fixes

This commit is contained in:
blonded04 2023-10-12 03:41:04 +03:00
commit 1b91a663e0
43 changed files with 5289 additions and 3504 deletions

View file

@ -1,12 +1,10 @@
name: Build
name: Main workflow
on:
pull_request:
push:
paths-ignore:
- 'README.md'
branches:
- '1.10'
permissions: read-all
jobs:
build:
@ -14,27 +12,26 @@ jobs:
fail-fast: false
matrix:
os:
#- macos-latest
- ubuntu-20.04
#- windows-latest
ocaml-version:
#- 4.11.0
- 4.10.1
#- 4.09.1
#- 4.08.1
- ubuntu-latest
ocaml-compiler:
- 4.13.1
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Use OCaml ${{ matrix.ocaml-version }}
uses: avsm/setup-ocaml@v1
- name: Checkout tree
uses: actions/checkout@v3
- name: Set-up OCaml ${{ matrix.ocaml-compiler }}
uses: ocaml/setup-ocaml@v2
with:
ocaml-version: ${{ matrix.ocaml-version }}
ocaml-compiler: ${{ matrix.ocaml-compiler }}
- run: opam pin add Lama.dev . --no-action
- run: opam depext Lama.dev --yes --with-test
- run: opam install . --deps-only --with-test
- run: opam exec -- make #dune build
- run: opam exec -- make regression # dune runtest
- run: eval $(opam env)
- run: opam exec -- make
- run: opam exec -- make regression-all
- run: opam exec -- make unit_tests
- run: opam exec -- make negative_scenarios_tests

1
.gitignore vendored
View file

@ -5,3 +5,4 @@
*.o
.merlin
.vscode

View file

@ -1,23 +1,27 @@
opam-version: "2.0"
version: "1.10"
version: "1.2"
synopsis: "Lama programming system"
synopsis: "Lama programming language"
maintainer: "dboulytchev@gmail.com"
authors: "dboulytchev@gmail.com"
homepage: "https://github.com/JetBrains-Research/Lama"
bug-reports: "https://github.com/JetBrains-Research/Lama/issues"
authors: [
"Dmitry Boulytchev <dboulytchev@gmail.com>"
"Daniil Berezun <danya.berezun@gmail.com>"
"Egor Sheremetov <egor.sheremetov.dev@gmail.com>"
]
homepage: "https://github.com/PLTools/Lama"
bug-reports: "https://github.com/PLTools/Lama/issues"
depends: [
"ocaml" { >= "4.07.1" }
"ocaml" { >= "4.13.1" }
"ocamlfind" { build }
"camlp5" { >= "8.00.05" }
"ostap" { >= "0.5"}
"GT" { >= "0.5.0" }
"GT" { >= "0.5.1" }
]
build: [
[make]
[make "regression"] {with-test}
[make "regression-all"] {with-test}
]
install: [make "install"]
@ -25,7 +29,7 @@ depexts: [
["gcc-multilib"] {os-family = "debian"}
]
dev-repo: "git+https://github.com/JetBrains-Research/Lama.git"
dev-repo: "git+https://github.com/PLTools/Lama.git"
url {
src: "git+https://github.com/JetBrains-Research/Lama.git#1.10+ocaml4.10"
src: "git+https://github.com/PLTools/Lama.git#1.20"
}

View file

@ -9,6 +9,9 @@ all:
$(MAKE) -C runtime
$(MAKE) -C byterun
$(MAKE) -C stdlib
$(MAKE) -C runtime unit_tests.o
$(MAKE) -C runtime invariants_check.o
$(MAKE) -C runtime invariants_check_debug_print.o
STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i)
@ -21,13 +24,28 @@ uninstall:
$(RM) -r `opam var share`/Lama
$(RM) `opam var bin`/$(EXECUTABLE)
regression-all: regression regression-expressions
regression:
$(MAKE) clean check -C regression
$(MAKE) clean check -C stdlib/regression
$(MAKE) clean check -j8 -C regression
$(MAKE) clean check -j8 -C stdlib/regression
regression-expressions:
$(MAKE) clean check -j8 -C regression/expressions
$(MAKE) clean check -j8 -C regression/deep-expressions
unit_tests:
./runtime/unit_tests.o
./runtime/invariants_check.o
./runtime/invariants_check_debug_print.o
negative_scenarios_tests:
$(MAKE) -C runtime negative_tests
clean:
$(MAKE) clean -C src
$(MAKE) clean -C runtime
$(MAKE) clean -C stdlib
$(MAKE) clean -C regression
$(MAKE) clean -C byterun
$(MAKE) clean -C bench

View file

@ -1,11 +1,9 @@
| Lama 1.10 | Lama-devel 1.10 |
| -------------------- | -------------------------- |
| [![Lama 1.10][1]][2] | [![Lama-devel 1.10][3]][4] |
| Lama 1.2 |
| ------------------- |
| [![Lama 1.2][1]][2] |
[1]: https://github.com/JetBrains-Research/Lama/workflows/Build/badge.svg?branch=1.10
[2]: https://github.com/JetBrains-Research/Lama/actions
[3]: https://github.com/JetBrains-Research/Lama-devel/workflows/Build/badge.svg?branch=1.10
[4]: https://github.com/JetBrains-Research/Lama-devel/actions
[1]: https://github.com/PLTools/Lama/workflows/Build/badge.svg?branch=1.10
[2]: https://github.com/PLTools/Lama/actions
# Lama
@ -26,13 +24,13 @@ The name ![lama](lama.svg) is an acronym for *Lambda-Algol* since the language h
The main purpose of ![lama](lama.svg) is to present a repertoire of constructs with certain runtime behavior and relevant implementation techniques.
The lack of a type system (a vital feature for a real-world language
for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent.
for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent.
On the other hand the language can be used in the future as a raw substrate to apply various ways of software verification (including type systems).
The current implementation contains a native code compiler for **x86-32**, written in **OCaml**, a runtime library with garbage-collection support, written in **C**, and a small standard library, written in ![lama](lama.svg) itself.
The native code compiler uses **gcc** as a toolchain.
In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine.
In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine.
The stack machine code can in turn be either interpreted on a stack machine interpreter, or used as an intermediate representation by the native code compiler.
## Language Specification
@ -51,9 +49,9 @@ Ubuntu-based variant of WSL is recommended.
* System-wide prerequisites:
- `gcc-multilib`
For example, (for Debian-based GNU/Linux):
```bash
```bash
$ sudo apt install gcc-multilib
```
@ -73,10 +71,10 @@ Ubuntu-based variant of WSL is recommended.
1. Install the right [switch](https://opam.ocaml.org/doc/Manual.html#Switches) for the OCaml compiler
```bash
# for fresh opam
$ opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda
# for old opam
$ opam switch create lama ocaml-variants.4.10.1+flambda
# for fresh opam
$ opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda
# for old opam
$ opam switch create lama ocaml-variants.4.13.1+flambda
```
* In the above command:
@ -100,7 +98,7 @@ Ubuntu-based variant of WSL is recommended.
3. Pin Lama package using `opam` and right URL (remember of "#" being a comment character in various shells)
```bash
$ opam pin add Lama https://github.com/JetBrains-Research/Lama.git\#1.10 --no-action
$ opam pin add Lama https://github.com/PLTools/Lama.git\#v1.2 --no-action
```
The extra '#' sign is added because in various Shells it is the start of a comment
@ -120,5 +118,13 @@ Ubuntu-based variant of WSL is recommended.
### Smoke-testing (optional)
Clone the repository and run `make -C tutorial`.
Clone the repository and run `make -C tutorial`.
It should build a local compiler `src/lamac` and a few tutorial executables in `tutorial/`.
### Useful links
* [Plugin for VS Code](https://marketplace.visualstudio.com/items?itemName=arsavelev.lsp-lama)
### Changes in Lama 1.2
* New garbage collector: single-threaded stop-the-world `LISP2` (see GC Handbook for details: [1st edition](https://www.cs.kent.ac.uk/people/staff/rej/gcbook/), [2nd edition](http://gchandbook.org/)) [mark-compact](https://www.memorymanagement.org/glossary/m.html#term-mark-compact).

View file

@ -1,8 +1,10 @@
FLAGS=-m32 -g2 -fstack-protector-all
all: byterun.o
$(CC) -m32 -g -o byterun byterun.o ../runtime/runtime.a
$(CC) $(FLAGS) -o byterun byterun.o ../runtime/runtime.a
byterun.o: byterun.c
$(CC) -g -fstack-protector-all -m32 -c byterun.c
$(CC) $(FLAGS) -g -c byterun.c
clean:
$(RM) *.a *.o *~
$(RM) *.a *.o *~ byterun

3
dune-project Normal file
View file

@ -0,0 +1,3 @@
(lang dune 3.3)
(cram enable)

View file

@ -1,18 +1,24 @@
TESTS=$(sort $(basename $(wildcard test*.lama)))
DEBUG_FILES=stack-dump-before data-dump-before extra-roots-dump-before heap-dump-before stack-dump-after data-dump-after extra-roots-dump-after heap-dump-after
TESTS=$(sort $(filter-out test111, $(basename $(wildcard test*.lama))))
LAMAC=../src/lamac
.PHONY: check $(TESTS)
check: $(TESTS)
check: ctest111 $(TESTS)
$(TESTS): %: %.lama
@echo $@
cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log
LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@echo "regression/$@"
@cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log
@LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
ctest111:
@echo "regression/test111"
@LAMA=../runtime $(LAMAC) test111.lama && cat test111.input | ./test111 > test111.log && diff test111.log orig/test111.log
clean:
$(RM) test*.log *.s *~ $(TESTS) *.i
$(RM) test*.log *.s *.sm *~ $(TESTS) *.i $(DEBUG_FILES) test111
$(MAKE) clean -C expressions
$(MAKE) clean -C deep-expressions

View file

@ -7,10 +7,10 @@ LAMAC = ../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
@echo "regression/deep-expressions/$@"
@LAMA=../../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log
clean:
rm -f *.log *.s *~

View file

@ -7,10 +7,10 @@ RC = ../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
@echo "regression/expressions/$@"
@LAMA=../../runtime $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(RC) -i $< > $@.log && diff $@.log orig/$@.log
@cat $@.input | LAMA=../../runtime $(RC) -s $< > $@.log && diff $@.log orig/$@.log
clean:
rm -f *.log *.s *~

View file

@ -0,0 +1,6 @@
97
98
99
100
97
98

0
regression/test111.input Normal file
View file

21
regression/test111.lama Normal file
View file

@ -0,0 +1,21 @@
import Std;
fun printString (s) {
var i;
for i := 0, i < s.length, i := i + 1 do
write (s[i])
od
}
fun printListConcat ( str_list ) {
printString (stringcat ( str_list ))
}
var l = { "a" , "b" , "c" , "d" };
printListConcat (l);
l := { "ab" };
printListConcat (l);
l := {};
printListConcat (l)

134
runtime/.clang-format Normal file
View file

@ -0,0 +1,134 @@
# Common settings
BasedOnStyle: LLVM
TabWidth: 2
IndentWidth: 2
UseTab: Never
ColumnLimit: 100
IndentCaseLabels: true
# https://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Language: Cpp
DisableFormat: false
Standard: Auto
AccessModifierOffset: -4
AlignAfterOpenBracket: true
AlignConsecutiveAssignments: Consecutive
AlignConsecutiveDeclarations: Consecutive
AlignEscapedNewlines: Right
AlignOperands: true
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: AllIfsAndElse
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: true
BitFieldColonSpacing: Both
# Configure each individual brace in BraceWrapping
BreakBeforeBraces: Attach
# Control of individual brace wrapping cases
BraceWrapping:
AfterClass: true
AfterControlStatement: Always
AfterEnum : true
AfterFunction : true
AfterNamespace : true
AfterStruct : true
AfterUnion : true
BeforeCatch : true
BeforeElse : true
IndentBraces : false
AfterExternBlock : true
SplitEmptyFunction : false
SplitEmptyRecord : false
SplitEmptyNamespace : true
BreakAfterJavaFieldAnnotations: true
BreakBeforeInheritanceComma: false
BreakBeforeBinaryOperators: NonAssignment
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: true
BreakStringLiterals: true
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
SpaceBeforeCpp11BracedList: false
DerivePointerAlignment: false
ExperimentalAutoDetectBinPacking: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IndentCaseLabels: true
FixNamespaceComments: true
IndentWrappedFunctionNames: true
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
JavaScriptQuotes: Double
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: Custom
SpaceBeforeParensOptions:
AfterControlStatements: true
AfterForeachMacros: true
AfterFunctionDeclarationName: true
AfterFunctionDefinitionName: true
AfterIfMacros: true
AfterOverloadedOperator: true
AfterRequiresInClause: true
AfterRequiresInExpression: true
BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: false
SpaceInEmptyBlock: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 3
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceAfterTemplateKeyword: true
SpaceBeforeInheritanceColon: true
SortUsingDeclarations: true
SortIncludes: CaseInsensitive
IndentGotoLabels: false
InsertBraces: false
# Comments are for developers, they should arrange them
ReflowComments: false
IncludeBlocks: Regroup
IndentPPDirectives: AfterHash
SeparateDefinitionBlocks: Always

View file

@ -1,12 +1,44 @@
CC=gcc
COMMON_FLAGS=-m32 -g2 -fstack-protector-all
PROD_FLAGS=$(COMMON_FLAGS) -DLAMA_ENV
TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION
UNIT_TESTS_FLAGS=$(TEST_FLAGS)
INVARIANTS_CHECK_FLAGS=$(TEST_FLAGS) -DFULL_INVARIANT_CHECKS
all: gc_runtime.o runtime.o
ar rc runtime.a gc_runtime.o runtime.o
# this target is the most important one, its' artefacts should be used as a runtime of Lama
all: gc.o runtime.o
ar rc runtime.a runtime.o gc.o
gc_runtime.o: gc_runtime.s
$(CC) -g -fstack-protector-all -m32 -c gc_runtime.s
NEGATIVE_TESTS=$(sort $(basename $(notdir $(wildcard negative_scenarios/*_neg.c))))
$(NEGATIVE_TESTS): %: negative_scenarios/%.c
@echo "Running test $@"
@$(CC) -o $@.o $(COMMON_FLAGS) negative_scenarios/$@.c gc.c
@./$@.o 2> negative_scenarios/$@.err || diff negative_scenarios/$@.err negative_scenarios/expected/$@.err
negative_tests: $(NEGATIVE_TESTS)
# this is a target that runs unit tests, scenarios are written in a single file `test_main.c`
unit_tests.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s
$(CC) -o unit_tests.o $(UNIT_TESTS_FLAGS) gc.c virt_stack.c runtime.c test_main.c test_util.s
# this target also runs unit tests but with additional expensive checks of GC invariants which aren't used in production version
invariants_check.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s
$(CC) -o invariants_check.o $(INVARIANTS_CHECK_FLAGS) gc.c virt_stack.c runtime.c test_main.c test_util.s
# this target also runs unit tests but with additional expensive checks of GC invariants which aren't used in production version
# additionally, it prints debug information
invariants_check_debug_print.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s
$(CC) -o invariants_check_debug_print.o $(INVARIANTS_CHECK_FLAGS) -DDEBUG_PRINT gc.c virt_stack.c runtime.c test_main.c test_util.s
virt_stack.o: virt_stack.h virt_stack.c
$(CC) $(PROD_FLAGS) -c virt_stack.c
gc.o: gc.c gc.h
$(CC) -rdynamic $(PROD_FLAGS) -c gc.c
runtime.o: runtime.c runtime.h
$(CC) -g -fstack-protector-all -m32 -c runtime.c
$(CC) $(PROD_FLAGS) -c runtime.c
clean:
$(RM) *.a *.o *~
$(RM) *.a *.o *~ negative_scenarios/*.err

19
runtime/TODO.md Normal file
View file

@ -0,0 +1,19 @@
### TODO list
- [x] Fix heap&stack&extra_roots dump
- [x] Remove extra and dead code
- [x] Debug print -> DEBUG_PRINT mode
- [x] Check `mmap`/`remap`/...
- [x] Check: `__gc_stack_bot`: same issue as `__gc_stack_top`?
- [x] Check: Can we get rid of `__gc_init` (as an assembly (implement in C instead))? (answer: if we make main in which every Lama file is compiled set `__gc_stack_bottom` to current `ebp` then yes, otherwise we need access to registers)
- [x] Check: runtime tags: should always the last bit be 1? (Answer: not really, however, we still need to distinguish between 5 different options (because unboxed values should have its own value to be returned from `LkindOf`))
- [x] Fix warnings in ML code
- [x] TODO: debug flag doesn't compile
- [x] Sexp: move the tag to be `contents[0]` instead of the word in sexp header; i.e. get rid of sexp as separate data structure
- [x] Run Lama compiler on Lama
- [ ] Add more stress tests (for graph-like structures) to `stdlib/regression` and unit tests
- [ ] Magic constants
- [ ] Normal documentation: a-la doxygen
- [ ] Think: normal debug mode
- [ ] Fix warnings in C code
- [ ] Modes (like FULL_INVARIANTS) -> separate files

922
runtime/gc.c Normal file
View file

@ -0,0 +1,922 @@
#define _GNU_SOURCE 1
#include "gc.h"
#include "runtime_common.h"
#include <assert.h>
#include <execinfo.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
static const size_t INIT_HEAP_SIZE = MINIMUM_HEAP_CAPACITY;
#ifdef DEBUG_VERSION
size_t cur_id = 0;
#endif
static extra_roots_pool extra_roots;
size_t __gc_stack_top = 0, __gc_stack_bottom = 0;
#ifdef LAMA_ENV
extern const size_t __start_custom_data, __stop_custom_data;
#endif
#ifdef DEBUG_VERSION
memory_chunk heap;
#else
static memory_chunk heap;
#endif
#ifdef DEBUG_VERSION
void dump_heap ();
#endif
void handler (int sig) {
void *array[10];
int size;
// get void*'s for all entries on the stack
size = backtrace(array, 10);
fprintf(stderr, "heap size is %zu\n", heap.size);
backtrace_symbols_fd(array, size, STDERR_FILENO);
exit(1);
}
void *alloc (size_t size) {
#ifdef DEBUG_VERSION
++cur_id;
#endif
size_t bytes_sz = size;
size = BYTES_TO_WORDS(size);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz);
#endif
void *p = gc_alloc_on_existing_heap(size);
if (!p) {
// not enough place in the heap, need to perform GC cycle
p = gc_alloc(size);
}
return p;
}
#ifdef FULL_INVARIANT_CHECKS
// precondition: obj_content is a valid address pointing to the content of an object
static void print_object_info (FILE *f, void *obj_content) {
data *d = TO_DATA(obj_content);
size_t obj_tag = TAG(d->data_header);
size_t obj_id = d->id;
fprintf(f, "id %zu tag %zu | ", obj_id, obj_tag);
}
static void print_unboxed (FILE *f, int unboxed) { fprintf(f, "unboxed %zu | ", unboxed); }
static FILE *print_stack_content (char *filename) {
FILE *f = fopen(filename, "w+");
ftruncate(fileno(f), 0);
fprintf(f, "Stack content:\n");
for (size_t *stack_ptr = (size_t *)((void *)__gc_stack_top + 4);
stack_ptr < (size_t *)__gc_stack_bottom;
++stack_ptr) {
size_t value = *stack_ptr;
if (is_valid_heap_pointer((size_t *)value)) {
fprintf(f, "%p, ", (void *)value);
print_object_info(f, (void *)value);
} else {
print_unboxed(f, (int)value);
}
fprintf(f, "\n");
}
fprintf(f, "Stack content end.\n");
return f;
}
// precondition: obj_content is a valid address pointing to the content of an object
static void objects_dfs (FILE *f, void *obj_content) {
void *obj_header = get_obj_header_ptr(obj_content);
data *obj_data = TO_DATA(obj_content);
// internal mark-bit for this dfs, should be recovered by the caller
if ((obj_data->forward_address & 2) != 0) { return; }
// set this bit as 1
obj_data->forward_address |= 2;
fprintf(f, "object at addr %p: ", obj_content);
print_object_info(f, obj_content);
/*fprintf(f, "object id: %zu | ", obj_data->id);*/
// first cycle: print object's fields
for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header);
!field_is_done_iterator(&field_it);
obj_next_field_iterator(&field_it)) {
size_t field_value = *(size_t *)field_it.cur_field;
if (is_valid_heap_pointer((size_t *)field_value)) {
print_object_info(f, (void *)field_value);
/*fprintf(f, "%zu ", TO_DATA(field_value)->id);*/
} else {
print_unboxed(f, (int)field_value);
}
}
fprintf(f, "\n");
for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header);
!field_is_done_iterator(&field_it);
obj_next_field_iterator(&field_it)) {
size_t field_value = *(size_t *)field_it.cur_field;
if (is_valid_heap_pointer((size_t *)field_value)) { objects_dfs(f, (void *)field_value); }
}
}
FILE *print_objects_traversal (char *filename, bool marked) {
FILE *f = fopen(filename, "w+");
ftruncate(fileno(f), 0);
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it)) {
void *obj_header = it.current;
data *obj_data = TO_DATA(get_object_content_ptr(obj_header));
if ((obj_data->forward_address & 1) == marked) {
objects_dfs(f, get_object_content_ptr(obj_header));
}
}
// resetting bit that represent mark-bit for this internal dfs-traversal
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it)) {
void *obj_header = it.current;
data *obj_data = TO_DATA(get_object_content_ptr(obj_header));
obj_data->forward_address &= (~2);
}
fflush(f);
// print extra roots
for (int i = 0; i < extra_roots.current_free; i++) {
fprintf(f, "extra root %p %p: ", extra_roots.roots[i], *(size_t **)extra_roots.roots[i]);
}
fflush(f);
return f;
}
int files_cmp (FILE *f1, FILE *f2) {
int symbol1, symbol2;
int position = 0;
while (true) {
symbol1 = fgetc(f1);
symbol2 = fgetc(f2);
if (symbol1 == EOF && symbol2 == EOF) { return -1; }
if (symbol1 != symbol2) { return position; }
++position;
}
}
#endif
void *gc_alloc_on_existing_heap (size_t size) {
if (heap.current + size <= heap.end) {
void *p = (void *)heap.current;
heap.current += size;
memset(p, 0, size * sizeof(size_t));
return p;
}
return NULL;
}
void *gc_alloc (size_t size) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "===============================GC cycle has started\n");
#endif
#ifdef FULL_INVARIANT_CHECKS
FILE *stack_before = print_stack_content("stack-dump-before-compaction");
FILE *heap_before = print_objects_traversal("before-mark", 0);
fclose(heap_before);
#endif
mark_phase();
#ifdef FULL_INVARIANT_CHECKS
FILE *heap_before_compaction = print_objects_traversal("after-mark", 1);
#endif
compact_phase(size);
#ifdef FULL_INVARIANT_CHECKS
FILE *stack_after = print_stack_content("stack-dump-after-compaction");
FILE *heap_after_compaction = print_objects_traversal("after-compaction", 0);
int pos = files_cmp(stack_before, stack_after);
if (pos >= 0) { // position of difference is found
fprintf(stderr, "Stack is modified incorrectly, see position %d\n", pos);
exit(1);
}
fclose(stack_before);
fclose(stack_after);
pos = files_cmp(heap_before_compaction, heap_after_compaction);
if (pos >= 0) { // position of difference is found
fprintf(stderr, "GC invariant is broken, pos is %d\n", pos);
exit(1);
}
fclose(heap_before_compaction);
fclose(heap_after_compaction);
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "===============================GC cycle has finished\n");
#endif
return gc_alloc_on_existing_heap(size);
}
static void gc_root_scan_stack () {
for (size_t *p = (size_t *)(__gc_stack_top + 4); p < (size_t *)__gc_stack_bottom; ++p) {
gc_test_and_mark_root((size_t **)p);
}
}
void mark_phase (void) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "marking has started\n");
fprintf(stderr,
"gc_root_scan_stack has started: gc_top=%p bot=%p\n",
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
#endif
gc_root_scan_stack();
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "gc_root_scan_stack has finished\n");
fprintf(stderr, "scan_extra_roots has started\n");
#endif
scan_extra_roots();
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "scan_extra_roots has finished\n");
fprintf(stderr, "scan_global_area has started\n");
#endif
#ifdef LAMA_ENV
scan_global_area();
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "scan_global_area has finished\n");
fprintf(stderr, "marking has finished\n");
#endif
}
void compact_phase (size_t additional_size) {
size_t live_size = compute_locations();
// all in words
size_t next_heap_size =
MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY);
size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size);
memory_chunk old_heap = heap;
heap.begin = mremap(
heap.begin, WORDS_TO_BYTES(heap.size), WORDS_TO_BYTES(next_heap_pseudo_size), MREMAP_MAYMOVE);
if (heap.begin == MAP_FAILED) {
perror("ERROR: compact_phase: mremap failed\n");
exit(1);
}
heap.end = heap.begin + next_heap_pseudo_size;
heap.size = next_heap_pseudo_size;
heap.current = heap.begin + (old_heap.current - old_heap.begin);
update_references(&old_heap);
physically_relocate(&old_heap);
heap.current = heap.begin + live_size;
}
size_t compute_locations () {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC compute_locations started\n");
#endif
size_t *free_ptr = heap.begin;
heap_iterator scan_iter = heap_begin_iterator();
for (; !heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) {
void *header_ptr = scan_iter.current;
void *obj_content = get_object_content_ptr(header_ptr);
if (is_marked(obj_content)) {
size_t sz = BYTES_TO_WORDS(obj_size_header_ptr(header_ptr));
// forward address is responsible for object header pointer
set_forward_address(obj_content, (size_t)free_ptr);
free_ptr += sz;
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC compute_locations finished\n");
#endif
// it will return number of words
return free_ptr - heap.begin;
}
void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC scan_and_fix_region started\n");
#endif
for (size_t *ptr = (size_t *)start; ptr < (size_t *)end; ++ptr) {
size_t ptr_value = *ptr;
// this can't be expressed via is_valid_heap_pointer, because this pointer may point area corresponding to the old
// heap
if (is_valid_pointer((size_t *)ptr_value) && (size_t)old_heap->begin <= ptr_value
&& ptr_value <= (size_t)old_heap->current) {
void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin);
void *new_addr =
(void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin);
size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr));
*(void **)ptr = new_addr + content_offset;
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC scan_and_fix_region finished\n");
#endif
}
void scan_and_fix_region_roots (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "extra roots started: number of extra roots %i\n", extra_roots.current_free);
#endif
for (int i = 0; i < extra_roots.current_free; i++) {
size_t *ptr = (size_t *)extra_roots.roots[i];
size_t ptr_value = *ptr;
if (!is_valid_pointer((size_t *)ptr_value)) { continue; }
// skip this one since it was already fixed from scanning the stack
if ((extra_roots.roots[i] >= (void **)__gc_stack_top
&& extra_roots.roots[i] < (void **)__gc_stack_bottom)
#ifdef LAMA_ENV
|| (extra_roots.roots[i] <= (void **)&__stop_custom_data
&& extra_roots.roots[i] >= (void **)&__start_custom_data)
#endif
) {
#ifdef DEBUG_VERSION
if (is_valid_heap_pointer((size_t *)ptr_value)) {
# ifdef DEBUG_PRINT
fprintf(stderr,
"|\tskip extra root: %p (%p), since it points to Lama's stack top=%p bot=%p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
# endif
}
# ifdef LAMA_ENV
else if ((extra_roots.roots[i] <= (void *)&__stop_custom_data
&& extra_roots.roots[i] >= (void *)&__start_custom_data)) {
fprintf(
stderr,
"|\tskip extra root: %p (%p), since it points to Lama's static area stop=%p start=%p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)&__stop_custom_data,
(void *)&__start_custom_data);
exit(1);
}
# endif
else {
# ifdef DEBUG_PRINT
fprintf(stderr,
"|\tskip extra root: %p (%p): not a valid Lama pointer \n",
extra_roots.roots[i],
(void *)ptr_value);
# endif
}
#endif
continue;
}
if ((size_t)old_heap->begin <= ptr_value && ptr_value <= (size_t)old_heap->current) {
void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin);
void *new_addr =
(void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin);
size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr));
*(void **)ptr = new_addr + content_offset;
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr,
"|\textra root (%p) %p -> %p\n",
extra_roots.roots[i],
(void *)ptr_value,
(void *)*ptr);
#endif
}
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "|\textra roots finished\n");
#endif
}
void update_references (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC update_references started\n");
#endif
heap_iterator it = heap_begin_iterator();
while (!heap_is_done_iterator(&it)) {
if (is_marked(get_object_content_ptr(it.current))) {
for (obj_field_iterator field_iter = ptr_field_begin_iterator(it.current);
!field_is_done_iterator(&field_iter);
obj_next_ptr_field_iterator(&field_iter)) {
size_t *field_value = *(size_t **)field_iter.cur_field;
if (field_value < old_heap->begin || field_value > old_heap->current) { continue; }
// this pointer should also be modified according to old_heap->begin
void *field_obj_content_addr =
(void *)heap.begin + (*(void **)field_iter.cur_field - (void *)old_heap->begin);
// important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk
void *new_addr =
heap.begin
+ ((size_t *)get_forward_address(field_obj_content_addr) - (size_t *)old_heap->begin);
// update field reference to point to new_addr
// since, we want fields to point to an actual content, we need to add this extra content_offset
// because forward_address itself is a pointer to the object's header
size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content_addr));
#ifdef DEBUG_VERSION
if (!is_valid_heap_pointer((void *)(new_addr + content_offset))) {
# ifdef DEBUG_PRINT
fprintf(stderr,
"ur: incorrect pointer assignment: on object with id %d",
TO_DATA(get_object_content_ptr(it.current))->id);
# endif
exit(1);
}
#endif
*(void **)field_iter.cur_field = new_addr + content_offset;
}
}
heap_next_obj_iterator(&it);
}
// fix pointers from stack
scan_and_fix_region(old_heap, (void *)__gc_stack_top + 4, (void *)__gc_stack_bottom + 4);
// fix pointers from extra_roots
scan_and_fix_region_roots(old_heap);
#ifdef LAMA_ENV
assert((void *)&__stop_custom_data >= (void *)&__start_custom_data);
scan_and_fix_region(old_heap, (void *)&__start_custom_data, (void *)&__stop_custom_data);
#endif
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC update_references finished\n");
#endif
}
void physically_relocate (memory_chunk *old_heap) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC physically_relocate started\n");
#endif
heap_iterator from_iter = heap_begin_iterator();
while (!heap_is_done_iterator(&from_iter)) {
void *obj = get_object_content_ptr(from_iter.current);
heap_iterator next_iter = from_iter;
heap_next_obj_iterator(&next_iter);
if (is_marked(obj)) {
// Move the object from its old location to its new location relative to
// the heap's (possibly new) location, 'to' points to future object header
size_t *to = heap.begin + ((size_t *)get_forward_address(obj) - (size_t *)old_heap->begin);
memmove(to, from_iter.current, obj_size_header_ptr(from_iter.current));
unmark_object(get_object_content_ptr(to));
}
from_iter = next_iter;
}
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "GC physically_relocate finished\n");
#endif
}
inline bool is_valid_heap_pointer (const size_t *p) {
return !UNBOXED(p) && (size_t)heap.begin <= (size_t)p && (size_t)p <= (size_t)heap.current;
}
static inline bool is_valid_pointer (const size_t *p) { return !UNBOXED(p); }
static inline void queue_enqueue (heap_iterator *tail_iter, void *obj) {
void *tail = tail_iter->current;
void *tail_content = get_object_content_ptr(tail);
set_forward_address(tail_content, (size_t)obj);
make_enqueued(obj);
heap_next_obj_iterator(tail_iter);
}
static inline void *queue_dequeue (heap_iterator *head_iter) {
void *head = head_iter->current;
void *head_content = get_object_content_ptr(head);
void *value = (void *)get_forward_address(head_content);
make_dequeued(value);
heap_next_obj_iterator(head_iter);
return value;
}
void mark (void *obj) {
if (!is_valid_heap_pointer(obj) || is_marked(obj)) { return; }
// TL;DR: [q_head_iter, q_tail_iter) q_head_iter -- current dequeue's victim, q_tail_iter -- place for next enqueue
// in forward_address of corresponding element we store address of element to be removed after dequeue operation
heap_iterator q_head_iter = heap_begin_iterator();
// iterator where we will write address of the element that is going to be enqueued
heap_iterator q_tail_iter = q_head_iter;
queue_enqueue(&q_tail_iter, obj);
// invariant: queue contains only objects that are valid heap pointers (each corresponding to content of unmarked
// object) also each object is in queue only once
while (q_head_iter.current != q_tail_iter.current) {
// while the queue is non-empty
void *cur_obj = queue_dequeue(&q_head_iter);
mark_object(cur_obj);
void *header_ptr = get_obj_header_ptr(cur_obj);
for (obj_field_iterator ptr_field_it = ptr_field_begin_iterator(header_ptr);
!field_is_done_iterator(&ptr_field_it);
obj_next_ptr_field_iterator(&ptr_field_it)) {
void *field_value = *(void **)ptr_field_it.cur_field;
if (!is_valid_heap_pointer(field_value) || is_marked(field_value)
|| is_enqueued(field_value)) {
continue;
}
// if we came to this point it must be true that field_value is unmarked and not currently in queue
// thus, we maintain the invariant
queue_enqueue(&q_tail_iter, field_value);
}
}
}
void scan_extra_roots (void) {
for (int i = 0; i < extra_roots.current_free; ++i) {
// this dereferencing is safe since runtime is pushing correct pointers into extra_roots
mark(*extra_roots.roots[i]);
}
}
#ifdef LAMA_ENV
void scan_global_area (void) {
// __start_custom_data is pointing to beginning of global area, thus all dereferencings are safe
for (size_t *ptr = (size_t *)&__start_custom_data; ptr < (size_t *)&__stop_custom_data; ++ptr) {
mark(*(void **)ptr);
}
}
#endif
extern void gc_test_and_mark_root (size_t **root) {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr,
"\troot = %p (%p), stack addresses: [%p, %p)\n",
root,
*root,
(void *)__gc_stack_top + 4,
(void *)__gc_stack_bottom);
#endif
mark((void *)*root);
}
void __gc_init (void) {
__gc_stack_bottom = (size_t)__builtin_frame_address(1) + 4;
__init();
}
void __init (void) {
signal(SIGSEGV, handler);
size_t space_size = INIT_HEAP_SIZE * sizeof(size_t);
srandom(time(NULL));
heap.begin = mmap(
NULL, space_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
if (heap.begin == MAP_FAILED) {
perror("ERROR: __init: mmap failed\n");
exit(1);
}
heap.end = heap.begin + INIT_HEAP_SIZE;
heap.size = INIT_HEAP_SIZE;
heap.current = heap.begin;
clear_extra_roots();
}
extern void __shutdown (void) {
munmap(heap.begin, heap.size);
#ifdef DEBUG_VERSION
cur_id = 0;
#endif
heap.begin = NULL;
heap.end = NULL;
heap.size = 0;
heap.current = NULL;
__gc_stack_top = 0;
__gc_stack_bottom = 0;
}
void clear_extra_roots (void) { extra_roots.current_free = 0; }
void push_extra_root (void **p) {
if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) {
perror("ERROR: push_extra_roots: extra_roots_pool overflow\n");
exit(1);
}
assert(p >= (void **)__gc_stack_top || p < (void **)__gc_stack_bottom);
extra_roots.roots[extra_roots.current_free] = p;
extra_roots.current_free++;
}
void pop_extra_root (void **p) {
if (extra_roots.current_free == 0) {
perror("ERROR: pop_extra_root: extra_roots are empty\n");
exit(1);
}
extra_roots.current_free--;
if (extra_roots.roots[extra_roots.current_free] != p) {
perror("ERROR: pop_extra_root: stack invariant violation\n");
exit(1);
}
}
/* Functions for tests */
#if defined(DEBUG_VERSION)
size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size) {
size_t *ids_ptr = (size_t *)object_ids_buf;
size_t i = 0;
for (heap_iterator it = heap_begin_iterator();
!heap_is_done_iterator(&it) && i < object_ids_buf_size;
heap_next_obj_iterator(&it), ++i) {
void *header_ptr = it.current;
data *d = TO_DATA(get_object_content_ptr(header_ptr));
ids_ptr[i] = d->id;
}
return i;
}
#endif
#ifdef DEBUG_VERSION
extern char *de_hash (int);
void dump_heap () {
size_t i = 0;
for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it);
heap_next_obj_iterator(&it), ++i) {
void *header_ptr = it.current;
void *content_ptr = get_object_content_ptr(header_ptr);
data *d = TO_DATA(content_ptr);
lama_type t = get_type_header_ptr(header_ptr);
switch (t) {
case ARRAY: fprintf(stderr, "of kind ARRAY\n"); break;
case CLOSURE: fprintf(stderr, "of kind CLOSURE\n"); break;
case STRING: fprintf(stderr, "of kind STRING\n"); break;
case SEXP:
fprintf(stderr, "of kind SEXP with tag %s\n", de_hash(TO_SEXP(content_ptr)->tag));
break;
}
}
}
void set_stack (size_t stack_top, size_t stack_bottom) {
__gc_stack_top = stack_top;
__gc_stack_bottom = stack_bottom;
}
void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr) {
memcpy(extra_roots.roots, extra_roots_ptr, MIN(sizeof(extra_roots.roots), extra_roots_size));
clear_extra_roots();
}
#endif
/* Utility functions */
size_t get_forward_address (void *obj) {
data *d = TO_DATA(obj);
return GET_FORWARD_ADDRESS(d->forward_address);
}
void set_forward_address (void *obj, size_t addr) {
data *d = TO_DATA(obj);
SET_FORWARD_ADDRESS(d->forward_address, addr);
}
bool is_marked (void *obj) {
data *d = TO_DATA(obj);
int mark_bit = GET_MARK_BIT(d->forward_address);
return mark_bit;
}
void mark_object (void *obj) {
data *d = TO_DATA(obj);
SET_MARK_BIT(d->forward_address);
}
void unmark_object (void *obj) {
data *d = TO_DATA(obj);
RESET_MARK_BIT(d->forward_address);
}
bool is_enqueued (void *obj) {
data *d = TO_DATA(obj);
return IS_ENQUEUED(d->forward_address) != 0;
}
void make_enqueued (void *obj) {
data *d = TO_DATA(obj);
MAKE_ENQUEUED(d->forward_address);
}
void make_dequeued (void *obj) {
data *d = TO_DATA(obj);
MAKE_DEQUEUED(d->forward_address);
}
heap_iterator heap_begin_iterator () {
heap_iterator it = {.current = heap.begin};
return it;
}
void heap_next_obj_iterator (heap_iterator *it) {
void *ptr = it->current;
size_t obj_size = obj_size_header_ptr(ptr);
// make sure we take alignment into consideration
obj_size = BYTES_TO_WORDS(obj_size);
it->current += obj_size;
}
bool heap_is_done_iterator (heap_iterator *it) { return it->current >= heap.current; }
lama_type get_type_row_ptr (void *ptr) {
data *data_ptr = TO_DATA(ptr);
return get_type_header_ptr(data_ptr);
}
lama_type get_type_header_ptr (void *ptr) {
int *header = (int *)ptr;
switch (TAG(*header)) {
case ARRAY_TAG: return ARRAY;
case STRING_TAG: return STRING;
case CLOSURE_TAG: return CLOSURE;
case SEXP_TAG: return SEXP;
default: {
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id);
raise(SIGINT); // only for debug purposes
#else
# ifdef FULL_INVARIANT_CHECKS
# ifdef DEBUG_PRINT
fprintf(stderr,
"ERROR: get_type_header_ptr: unknown object header, ptr is %p, tag %i, heap size is "
"%d cur_id=%d stack_top=%p stack_bot=%p ",
ptr,
TAG(*header),
heap.size,
cur_id,
(void *)__gc_stack_top,
(void *)__gc_stack_bottom);
# endif
FILE *heap_before_compaction = print_objects_traversal("dump_kill", 1);
fclose(heap_before_compaction);
# endif
kill(getpid(), SIGSEGV);
#endif
exit(1);
}
}
}
size_t obj_size_row_ptr (void *ptr) {
data *data_ptr = TO_DATA(ptr);
return obj_size_header_ptr(data_ptr);
}
size_t obj_size_header_ptr (void *ptr) {
int len = LEN(*(int *)ptr);
switch (get_type_header_ptr(ptr)) {
case ARRAY: return array_size(len);
case STRING: return string_size(len);
case CLOSURE: return closure_size(len);
case SEXP: return sexp_size(len);
default: {
#ifdef DEBUG_VERSION
fprintf(stderr, "ERROR: obj_size_header_ptr: unknown object header, cur_id=%d", cur_id);
raise(SIGINT); // only for debug purposes
#else
perror("ERROR: obj_size_header_ptr: unknown object header\n");
#endif
exit(1);
}
}
}
size_t array_size (size_t sz) { return get_header_size(ARRAY) + MEMBER_SIZE * sz; }
size_t string_size (size_t len) {
// string should be null terminated
return get_header_size(STRING) + len + 1;
}
size_t closure_size (size_t sz) { return get_header_size(CLOSURE) + MEMBER_SIZE * sz; }
size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * (members + 1); }
obj_field_iterator field_begin_iterator (void *obj) {
lama_type type = get_type_header_ptr(obj);
obj_field_iterator it = {.type = type, .obj_ptr = obj, .cur_field = get_object_content_ptr(obj)};
switch (type) {
case STRING: {
it.cur_field = get_end_of_obj(it.obj_ptr);
break;
}
case CLOSURE:
case SEXP: {
it.cur_field += MEMBER_SIZE;
break;
}
default: break;
}
return it;
}
obj_field_iterator ptr_field_begin_iterator (void *obj) {
obj_field_iterator it = field_begin_iterator(obj);
// corner case when obj has no fields
if (field_is_done_iterator(&it)) { return it; }
if (is_valid_pointer(*(size_t **)it.cur_field)) { return it; }
obj_next_ptr_field_iterator(&it);
return it;
}
void obj_next_field_iterator (obj_field_iterator *it) { it->cur_field += MEMBER_SIZE; }
void obj_next_ptr_field_iterator (obj_field_iterator *it) {
do {
obj_next_field_iterator(it);
} while (!field_is_done_iterator(it) && !is_valid_pointer(*(size_t **)it->cur_field));
}
bool field_is_done_iterator (obj_field_iterator *it) {
return it->cur_field >= get_end_of_obj(it->obj_ptr);
}
void *get_obj_header_ptr (void *ptr) {
lama_type type = get_type_row_ptr(ptr);
return ptr - get_header_size(type);
}
void *get_object_content_ptr (void *header_ptr) {
lama_type type = get_type_header_ptr(header_ptr);
return header_ptr + get_header_size(type);
}
void *get_end_of_obj (void *header_ptr) { return header_ptr + obj_size_header_ptr(header_ptr); }
size_t get_header_size (lama_type type) {
switch (type) {
case STRING:
case CLOSURE:
case ARRAY:
case SEXP: return DATA_HEADER_SZ;
default: perror("ERROR: get_header_size: unknown object type\n");
#ifdef DEBUG_VERSION
raise(SIGINT); // only for debug purposes
#endif
exit(1);
}
}
void *alloc_string (int len) {
data *obj = alloc(string_size(len));
obj->data_header = STRING_TAG | (len << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [STRING] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
return obj;
}
void *alloc_array (int len) {
data *obj = alloc(array_size(len));
obj->data_header = ARRAY_TAG | (len << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [ARRAY] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
return obj;
}
void *alloc_sexp (int members) {
sexp *obj = alloc(sexp_size(members));
obj->data_header = SEXP_TAG | (members << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
obj->tag = 0;
return obj;
}
void *alloc_closure (int captured) {
data *obj = alloc(closure_size(captured));
obj->data_header = CLOSURE_TAG | (captured << 3);
#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT)
fprintf(stderr, "%p, [CLOSURE] tag=%zu\n", obj, TAG(obj->data_header));
#endif
#ifdef DEBUG_VERSION
obj->id = cur_id;
#endif
obj->forward_address = 0;
return obj;
}

251
runtime/gc.h Normal file
View file

@ -0,0 +1,251 @@
// ============================================================================
// GC
// ============================================================================
// This is an implementation of a compactifying garbage collection algorithm.
// GC algorithm itself consists of two major stages:
// 1. Marking roots
// 2. Compacting stage
// Compacting is implemented in a very similar fashion to LISP2 algorithm,
// which is well-known.
// Most important pieces of code to discover to understand how everything works:
// - void *gc_alloc (size_t): this function is basically called whenever we are
// not able to allocate memory on the existing heap via simple bump allocator.
// - mark_phase(): this function will tell you everything you need to know
// about marking. I would also recommend to pay attention to the fact that
// marking is implemented without usage of any additional memory. Already
// allocated space is sufficient (for details see 'void mark (void *obj)').
// - void compact_phase (size_t additional_size): the whole compaction phase
// can be understood by looking at this piece of code plus couple of other
// functions used in there. It is basically an implementation of LISP2.
#ifndef __LAMA_GC__
#define __LAMA_GC__
#include "runtime_common.h"
#define GET_MARK_BIT(x) (((int)(x)) & 1)
#define SET_MARK_BIT(x) (x = (((int)(x)) | 1))
#define IS_ENQUEUED(x) (((int)(x)) & 2)
#define MAKE_ENQUEUED(x) (x = (((int)(x)) | 2))
#define MAKE_DEQUEUED(x) (x = (((int)(x)) & (~2)))
#define RESET_MARK_BIT(x) (x = (((int)(x)) & (~1)))
// since last 2 bits are used for mark-bit and enqueued-bit and due to correct
// alignment we can expect that last 2 bits don't influence address (they
// should always be zero)
#define GET_FORWARD_ADDRESS(x) (((size_t)(x)) & (~3))
// take the last two bits as they are and make all others zero
#define SET_FORWARD_ADDRESS(x, addr) (x = ((x & 3) | ((int)(addr))))
// if heap is full after gc shows in how many times it has to be extended
#define EXTRA_ROOM_HEAP_COEFFICIENT 2
#ifdef DEBUG_VERSION
# define MINIMUM_HEAP_CAPACITY (8)
#else
# define MINIMUM_HEAP_CAPACITY (1 << 2)
#endif
#include <stdbool.h>
#include <stddef.h>
typedef enum { ARRAY, CLOSURE, STRING, SEXP } lama_type;
typedef struct {
size_t *current;
} heap_iterator;
typedef struct {
lama_type type; // holds type of object, which fields we are iterating over
void *obj_ptr; // place to store a pointer to the object header
void *cur_field;
} obj_field_iterator;
// Memory pool for linear memory allocation
typedef struct {
size_t *begin;
size_t *end;
size_t *current;
size_t size;
} memory_chunk;
// the only GC-related function that should be exposed, others are useful for tests and internal implementation
// allocates object of the given size on the heap
void *alloc(size_t);
// takes number of words as a parameter
void *gc_alloc(size_t);
// takes number of words as a parameter
void *gc_alloc_on_existing_heap(size_t);
// specific for mark-and-compact_phase gc
void mark (void *obj);
void mark_phase (void);
// marks each pointer from extra roots
void scan_extra_roots (void);
#ifdef LAMA_ENV
// marks each valid pointer from global area
void scan_global_area (void);
#endif
// takes number of words that are required to be allocated somewhere on the heap
void compact_phase (size_t additional_size);
// specific for Lisp-2 algorithm
size_t compute_locations ();
void update_references (memory_chunk *);
void physically_relocate (memory_chunk *);
// ============================================================================
// GC extra roots
// ============================================================================
// Lama's program stack is continuous, i.e. it never interleaves with runtime
// function's activation records. But some valid Lama's pointers can escape
// into runtime. Those values (theirs stack addresses) has to be registered in
// an auxiliary data structure called `extra_roots_pool`.
// extra_roots_pool is a simple LIFO stack. During `pop` it compares that pop's
// argument is equal to the current stack top.
#define MAX_EXTRA_ROOTS_NUMBER 32
typedef struct {
int current_free;
void **roots[MAX_EXTRA_ROOTS_NUMBER];
} extra_roots_pool;
void clear_extra_roots (void);
void push_extra_root (void **p);
void pop_extra_root (void **p);
// ============================================================================
// Implemented in GASM: see gc_runtime.s
// ============================================================================
// MANDATORY TO CALL BEFORE ANY INTERACTION WITH GC (apart from cases where we
// are working with virtual stack as happens in tests)
void __gc_init (void);
// should be called before interaction with GC in case of using in tests with
// virtual stack, otherwise it is automatically invoked by `__gc_init`
void __init (void);
// mostly useful for tests but basically you want to call this in case you want
// to deallocate all object allocated via GC
extern void __shutdown (void);
// ============================================================================
// invoked from GASM: see gc_runtime.s
// ============================================================================
extern void gc_test_and_mark_root (size_t **root);
bool is_valid_heap_pointer (const size_t *);
static inline bool is_valid_pointer (const size_t *);
// ============================================================================
// Auxiliary functions for tests
// ============================================================================
#if defined(DEBUG_VERSION)
// makes a snapshot of current objects in heap (both alive and dead), writes these ids to object_ids_buf,
// returns number of ids dumped
// object_ids_buf is pointer to area preallocated by user for dumping ids of objects in heap
// object_ids_buf_size is in WORDS, NOT BYTES
size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size);
#endif
#ifdef DEBUG_VERSION
// essential function to mock program stack
void set_stack (size_t stack_top, size_t stack_bottom);
// function to mock extra roots (Lama specific)
void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr);
#endif
// ============================================================================
// Utility functions
// ============================================================================
// accepts pointer to the start of the region and to the end of the region
// scans it and if it meets a pointer, it should be modified in according to forward address
void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end);
// takes a pointer to an object content as an argument, returns forwarding address
size_t get_forward_address (void *obj);
// takes a pointer to an object content as an argument, sets forwarding address to value 'addr'
void set_forward_address (void *obj, size_t addr);
// takes a pointer to an object content as an argument, returns whether this object was marked as live
bool is_marked (void *obj);
// takes a pointer to an object content as an argument, marks the object as live
void mark_object (void *obj);
// takes a pointer to an object content as an argument, marks the object as dead
void unmark_object (void *obj);
// takes a pointer to an object content as an argument, returns whether this object was enqueued to the queue (which is used in mark phase)
bool is_enqueued (void *obj);
// takes a pointer to an object content as an argument, marks object as enqueued
void make_enqueued (void *obj);
// takes a pointer to an object content as an argument, unmarks object as enqueued
void make_dequeued (void *obj);
// returns iterator to an object with the lowest address
heap_iterator heap_begin_iterator ();
void heap_next_obj_iterator (heap_iterator *it);
bool heap_is_done_iterator (heap_iterator *it);
// returns correct type when pointer to actual data is passed (header is excluded)
lama_type get_type_row_ptr (void *ptr);
// returns correct type when pointer to an object header is passed
lama_type get_type_header_ptr (void *ptr);
// returns correct object size (together with header) of an object, ptr is pointer to an actual data is passed (header is excluded)
size_t obj_size_row_ptr (void *ptr);
// returns correct object size (together with header) of an object, ptr is pointer to an object header
size_t obj_size_header_ptr (void *ptr);
// returns total padding size that we need to store given object type
size_t get_header_size (lama_type type);
// returns number of bytes that are required to allocate array with 'sz' elements (header included)
size_t array_size (size_t sz);
// returns number of bytes that are required to allocate string of length 'l' (header included)
size_t string_size (size_t len);
// returns number of bytes that are required to allocate closure with 'sz-1' captured values (header included)
size_t closure_size (size_t sz);
// returns number of bytes that are required to allocate s-expression with 'members' fields (header included)
size_t sexp_size (size_t members);
// returns an iterator over object fields, obj is ptr to object header
// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object,
// considering that now we store two versions of header in there)
obj_field_iterator field_begin_iterator (void *obj);
// returns an iterator over object fields which are actual pointers, obj is ptr to object header
// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object,
// considering that now we store two versions of header in there)
obj_field_iterator ptr_field_begin_iterator (void *obj);
// moves the iterator to next object field
void obj_next_field_iterator (obj_field_iterator *it);
// moves the iterator to the next object field which is an actual pointer
void obj_next_ptr_field_iterator (obj_field_iterator *it);
// returns if we are done iterating over fields of the object
bool field_is_done_iterator (obj_field_iterator *it);
// ptr is pointer to the actual object content, returns pointer to the very beginning of the object (header)
void *get_obj_header_ptr (void *ptr);
void *get_object_content_ptr (void *header_ptr);
void *get_end_of_obj (void *header_ptr);
void *alloc_string (int len);
void *alloc_array (int len);
void *alloc_sexp (int members);
void *alloc_closure (int captured);
#endif

View file

@ -1,116 +0,0 @@
.data
printf_format: .string "Stack root: %lx\n"
printf_format2: .string "BOT: %lx\n"
printf_format3: .string "TOP: %lx\n"
printf_format4: .string "EAX: %lx\n"
printf_format5: .string "LOL\n"
__gc_stack_bottom: .long 0
__gc_stack_top: .long 0
.globl __pre_gc
.globl __post_gc
.globl __gc_init
.globl __gc_root_scan_stack
.globl __gc_stack_top
.globl __gc_stack_bottom
.extern init_pool
.extern gc_test_and_copy_root
.text
__gc_init: movl %ebp, __gc_stack_bottom
addl $4, __gc_stack_bottom
call __init
ret
// if __gc_stack_top is equal to 0
// then set __gc_stack_top to %ebp
// else return
__pre_gc:
pushl %eax
movl __gc_stack_top, %eax
cmpl $0, %eax
jne __pre_gc_2
movl %ebp, %eax
// addl $8, %eax
movl %eax, __gc_stack_top
__pre_gc_2:
popl %eax
ret
// if __gc_stack_top has been set by the caller
// (i.e. it is equal to its %ebp)
// then set __gc_stack_top to 0
// else return
__post_gc:
pushl %eax
movl __gc_stack_top, %eax
cmpl %eax, %ebp
jnz __post_gc2
movl $0, __gc_stack_top
__post_gc2:
popl %eax
ret
// Scan stack for roots
// strting from __gc_stack_top
// till __gc_stack_bottom
__gc_root_scan_stack:
pushl %ebp
movl %esp, %ebp
pushl %ebx
pushl %edx
movl __gc_stack_top, %eax
jmp next
loop:
movl (%eax), %ebx
// check that it is not a pointer to code section
// i.e. the following is not true:
// __executable_start <= (%eax) <= __etext
check11:
leal __executable_start, %edx
cmpl %ebx, %edx
jna check12
jmp check21
check12:
leal __etext, %edx
cmpl %ebx, %edx
jnb next
// check that it is not a pointer into the program stack
// i.e. the following is not true:
// __gc_stack_bottom <= (%eax) <= __gc_stack_top
check21:
cmpl %ebx, __gc_stack_top
jna check22
jmp loop2
check22:
cmpl %ebx, __gc_stack_bottom
jnb next
// check if it a valid pointer
// i.e. the lastest bit is set to zero
loop2:
andl $0x00000001, %ebx
jnz next
gc_run_t:
pushl %eax
pushl %eax
call gc_test_and_copy_root
addl $4, %esp
popl %eax
next:
addl $4, %eax
cmpl %eax, __gc_stack_bottom
jne loop
returnn:
movl $0, %eax
popl %edx
popl %ebx
movl %ebp, %esp
popl %ebp
ret

View file

@ -0,0 +1,2 @@
ERROR: pop_extra_root: extra_roots are empty
: Success

View file

@ -0,0 +1,2 @@
ERROR: push_extra_roots: extra_roots_pool overflow
: Success

View file

@ -0,0 +1,2 @@
ERROR: pop_extra_root: stack invariant violation
: Success

View file

@ -0,0 +1,5 @@
#include "../gc.h"
#include <stddef.h>
int main () { pop_extra_root((void **)NULL); }

View file

@ -0,0 +1,7 @@
#include "../gc.h"
#include <stddef.h>
int main () {
for (size_t i = 0; i < MAX_EXTRA_ROOTS_NUMBER + 1; ++i) { push_extra_root(NULL); }
}

View file

@ -0,0 +1,6 @@
#include "../gc.h"
int main () {
push_extra_root(NULL);
pop_extra_root((void **)239);
}

File diff suppressed because it is too large Load diff

View file

@ -1,21 +1,20 @@
# ifndef __LAMA_RUNTIME__
# define __LAMA_RUNTIME__
#ifndef __LAMA_RUNTIME__
#define __LAMA_RUNTIME__
# include <stdio.h>
# include <stdio.h>
# include <string.h>
# include <stdarg.h>
# include <stdlib.h>
# include <sys/mman.h>
# include <assert.h>
# include <errno.h>
# include <regex.h>
# include <time.h>
# include <limits.h>
# include <ctype.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <regex.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
# define WORD_SIZE (CHAR_BIT * sizeof(int))
#define WORD_SIZE (CHAR_BIT * sizeof(int))
void failure (char *s, ...);
# endif
#endif

73
runtime/runtime_common.h Normal file
View file

@ -0,0 +1,73 @@
#ifndef __LAMA_RUNTIME_COMMON__
#define __LAMA_RUNTIME_COMMON__
#include <stddef.h>
// this flag makes GC behavior a bit different for testing purposes.
//#define DEBUG_VERSION
//#define FULL_INVARIANT_CHECKS
#define STRING_TAG 0x00000001
#define ARRAY_TAG 0x00000003
#define SEXP_TAG 0x00000005
#define CLOSURE_TAG 0x00000007
#define UNBOXED_TAG 0x00000009 // Not actually a data_header; used to return from LkindOf
#define LEN(x) ((x & 0xFFFFFFF8) >> 3)
#define TAG(x) (x & 0x00000007)
#define SEXP_ONLY_HEADER_SZ (sizeof(int))
#ifndef DEBUG_VERSION
# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(int))
#else
# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(size_t) + sizeof(int))
#endif
#define MEMBER_SIZE sizeof(int)
#define TO_DATA(x) ((data *)((char *)(x)-DATA_HEADER_SZ))
#define TO_SEXP(x) ((sexp *)((char *)(x)-DATA_HEADER_SZ))
#define UNBOXED(x) (((int)(x)) & 0x0001)
#define UNBOX(x) (((int)(x)) >> 1)
#define BOX(x) ((((int)(x)) << 1) | 0x0001)
#define BYTES_TO_WORDS(bytes) (((bytes)-1) / sizeof(size_t) + 1)
#define WORDS_TO_BYTES(words) ((words) * sizeof(size_t))
// CAREFUL WITH DOUBLE EVALUATION!
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
typedef struct {
// store tag in the last three bits to understand what structure this is, other bits are filled with
// other utility info (i.e., size for array, number of fields for s-expression)
int data_header;
#ifdef DEBUG_VERSION
size_t id;
#endif
// last bit is used as MARK-BIT, the rest are used to store address where object should move
// last bit can be used because due to alignment we can assume that last two bits are always 0's
size_t forward_address;
char contents[0];
} data;
typedef struct {
// store tag in the last three bits to understand what structure this is, other bits are filled with
// other utility info (i.e., size for array, number of fields for s-expression)
int data_header;
#ifdef DEBUG_VERSION
size_t id;
#endif
// last bit is used as MARK-BIT, the rest are used to store address where object should move
// last bit can be used because due to alignment we can assume that last two bits are always 0's
size_t forward_address;
int tag;
int contents[0];
} sexp;
#endif

275
runtime/test_main.c Normal file
View file

@ -0,0 +1,275 @@
#include "gc.h"
#include "runtime_common.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef DEBUG_VERSION
// function from runtime that maps string to int value
extern int LtagHash (char *s);
extern void *Bsexp (int n, ...);
extern void *Barray (int bn, ...);
extern void *Bstring (void *);
extern void *Bclosure (int bn, void *entry, ...);
extern size_t __gc_stack_top, __gc_stack_bottom;
void test_correct_structure_sizes (void) {
// something like induction base
assert((array_size(0) == get_header_size(ARRAY)));
assert((string_size(0) == get_header_size(STRING) + 1)); // +1 is because of '\0'
assert((sexp_size(0) == get_header_size(SEXP) + MEMBER_SIZE));
assert((closure_size(0) == get_header_size(CLOSURE)));
// just check correctness for some small sizes
for (int k = 1; k < 20; ++k) {
assert((array_size(k) == get_header_size(ARRAY) + MEMBER_SIZE * k));
assert((string_size(k) == get_header_size(STRING) + k + 1));
assert((sexp_size(k) == get_header_size(SEXP) + MEMBER_SIZE * (k + 1)));
assert((closure_size(k) == get_header_size(CLOSURE) + MEMBER_SIZE * k));
}
}
void no_gc_tests (void) { test_correct_structure_sizes(); }
// unfortunately there is no generic function pointer that can hold pointer to function with arbitrary signature
extern size_t call_runtime_function (void *virt_stack_pointer, void *function_pointer,
size_t num_args, ...);
# include "virt_stack.h"
virt_stack *init_test () {
__init();
virt_stack *st = vstack_create();
vstack_init(st);
__gc_stack_bottom = (size_t)vstack_top(st);
return st;
}
void cleanup_test (virt_stack *st) {
vstack_destruct(st);
__shutdown();
}
void force_gc_cycle (virt_stack *st) {
__gc_stack_top = (size_t)vstack_top(st) - 4;
gc_alloc(0);
__gc_stack_top = 0;
}
void test_simple_string_alloc (void) {
virt_stack *st = init_test();
for (int i = 0; i < 5; ++i) { vstack_push(st, BOX(i)); }
vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "abc"));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_array_alloc (void) {
virt_stack *st = init_test();
// allocate array [ BOX(1) ] and push it onto the stack
vstack_push(st, call_runtime_function(vstack_top(st) - 4, Barray, 2, BOX(1), BOX(1)));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_sexp_alloc (void) {
virt_stack *st = init_test();
// allocate sexp with one boxed field and push it onto the stack
// calling runtime function Bsexp(BOX(2), BOX(1), LtagHash("test"))
vstack_push(
st, call_runtime_function(vstack_top(st) - 4, Bsexp, 3, BOX(2), BOX(1), LtagHash("test")));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_closure_alloc (void) {
virt_stack *st = init_test();
// allocate closure with boxed captured value and push it onto the stack
vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bclosure, 3, BOX(1), NULL, BOX(1)));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_single_object_allocation_with_collection_virtual_stack (void) {
virt_stack *st = init_test();
vstack_push(st,
call_runtime_function(
vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_garbage_is_reclaimed (void) {
virt_stack *st = init_test();
call_runtime_function(vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
force_gc_cycle(st);
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 0));
cleanup_test(st);
}
void test_alive_are_not_reclaimed (void) {
virt_stack *st = init_test();
vstack_push(st,
call_runtime_function(
vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
force_gc_cycle(st);
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_small_tree_compaction (void) {
virt_stack *st = init_test();
// this one will increase heap size
call_runtime_function(vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaa");
vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "left-s"));
vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "right-s"));
vstack_push(st,
call_runtime_function(vstack_top(st) - 4,
Bsexp,
4,
BOX(3),
vstack_kth_from_start(st, 0),
vstack_kth_from_start(st, 1),
LtagHash("tree")));
force_gc_cycle(st);
const int SZ = 10;
int ids[SZ];
size_t alive = objects_snapshot(ids, SZ);
assert((alive == 3));
// check that order is indeed preserved
for (int i = 0; i < alive - 1; ++i) { assert((ids[i] < ids[i + 1])); }
cleanup_test(st);
}
extern size_t cur_id;
size_t generate_random_obj_forest (virt_stack *st, int cnt, int seed) {
srand(seed);
int cur_sz = 0;
size_t alive = 0;
while (cnt) {
--cnt;
if (cur_sz == 0) {
vstack_push(st, BOX(1));
++cur_sz;
continue;
}
size_t pos[2] = {rand() % vstack_size(st), rand() % vstack_size(st)};
size_t field[2];
for (int t = 0; t < 2; ++t) { field[t] = vstack_kth_from_start(st, pos[t]); }
size_t obj;
if (rand() % 2) {
obj = call_runtime_function(
vstack_top(st) - 4, Bsexp, 4, BOX(3), field[0], field[1], LtagHash("test"));
} else {
obj = BOX(1);
}
// whether object is stored on stack
if (rand() % 2 != 0) {
vstack_push(st, obj);
if ((obj & 1) == 0) { ++alive; }
}
++cur_sz;
}
force_gc_cycle(st);
return alive;
}
void run_stress_test_random_obj_forest (int seed) {
virt_stack *st = init_test();
const int SZ = 100000;
size_t expectedAlive = generate_random_obj_forest(st, SZ, seed);
int ids[SZ];
size_t alive = objects_snapshot(ids, SZ);
assert(alive == expectedAlive);
// check that order is indeed preserved
for (int i = 0; i < alive - 1; ++i) { assert((ids[i] < ids[i + 1])); }
cleanup_test(st);
}
#endif
#include <time.h>
int main (int argc, char **argv) {
#ifdef DEBUG_VERSION
no_gc_tests();
test_simple_string_alloc();
test_simple_array_alloc();
test_simple_sexp_alloc();
test_simple_closure_alloc();
test_single_object_allocation_with_collection_virtual_stack();
test_garbage_is_reclaimed();
test_alive_are_not_reclaimed();
test_small_tree_compaction();
time_t start, end;
double diff;
time(&start);
// stress test
for (int s = 0; s < 100; ++s) { run_stress_test_random_obj_forest(s); }
time(&end);
diff = difftime(end, start);
printf("Stress tests took %.2lf seconds to complete\n", diff);
#endif
}

40
runtime/test_util.s Normal file
View file

@ -0,0 +1,40 @@
# this is equivalent C-signature for this function
# size_t call_runtime_function(void *stack, void *func_ptr, int num_args, ...)
.globl call_runtime_function
.type call_runtime_function, @function
call_runtime_function:
pushl %ebp
movl %esp, %ebp
# store old stack pointer
movl %esp, %edi
# move esp to point to the virtual stack
movl 8(%ebp), %esp
# push arguments onto the stack
movl 16(%ebp), %ecx # num_args
test %ecx, %ecx
jz f_call # in case function doesn't have any parameters
leal 16(%ebp), %eax # pointer to value BEFORE first argument
leal (%eax,%ecx,4), %edx # pointer to last argument (right-to-left)
push_args_loop:
pushl (%edx)
subl $4, %edx
subl $1, %ecx
jnz push_args_loop
# call the function
f_call:
movl 12(%ebp), %eax
call *%eax
# restore the old stack pointer
movl %edi, %esp
# pop the old frame pointer and return
popl %ebp # epilogue
ret

34
runtime/virt_stack.c Normal file
View file

@ -0,0 +1,34 @@
#include "virt_stack.h"
#include <malloc.h>
virt_stack *vstack_create () { return malloc(sizeof(virt_stack)); }
void vstack_destruct (virt_stack *st) { free(st); }
void vstack_init (virt_stack *st) {
st->cur = RUNTIME_VSTACK_SIZE;
st->buf[st->cur] = 0;
}
void vstack_push (virt_stack *st, size_t value) {
if (st->cur == 0) { assert(0); }
--st->cur;
st->buf[st->cur] = value;
}
size_t vstack_pop (virt_stack *st) {
if (st->cur == RUNTIME_VSTACK_SIZE) { assert(0); }
size_t value = st->buf[st->cur];
++st->cur;
return value;
}
void *vstack_top (virt_stack *st) { return st->buf + st->cur; }
size_t vstack_size (virt_stack *st) { return RUNTIME_VSTACK_SIZE - st->cur; }
size_t vstack_kth_from_start (virt_stack *st, size_t k) {
assert(vstack_size(st) > k);
return st->buf[RUNTIME_VSTACK_SIZE - 1 - k];
}

33
runtime/virt_stack.h Normal file
View file

@ -0,0 +1,33 @@
//
// Created by egor on 24.04.23.
//
#ifndef LAMA_RUNTIME_VIRT_STACK_H
#define LAMA_RUNTIME_VIRT_STACK_H
#define RUNTIME_VSTACK_SIZE 100000
#include <assert.h>
#include <stddef.h>
struct {
size_t buf[RUNTIME_VSTACK_SIZE + 1];
size_t cur;
} typedef virt_stack;
virt_stack *vstack_create ();
void vstack_destruct (virt_stack *st);
void vstack_init (virt_stack *st);
void vstack_push (virt_stack *st, size_t value);
size_t vstack_pop (virt_stack *st);
void *vstack_top (virt_stack *st);
size_t vstack_size (virt_stack *st);
size_t vstack_kth_from_start (virt_stack *st, size_t k);
#endif //LAMA_RUNTIME_VIRT_STACK_H

1
src/.ocamlformat Normal file
View file

@ -0,0 +1 @@
profile=default

View file

@ -2,184 +2,223 @@ exception Commandline_error of string
class options args =
let n = Array.length args in
let dump_ast = 0b1 in
let dump_sm = 0b010 in
let dump_ast = 0b1 in
let dump_sm = 0b010 in
let dump_source = 0b100 in
(* Kakadu: binary masks are cool for C code, but for OCaml I don't see any reason to save memory like this *)
let help_string =
"Lama compiler. (C) JetBrains Reserach, 2017-2020.\n" ^
"Usage: lamac <options> <input file>\n\n" ^
"When no options specified, builds the source file into executable.\n" ^
"Options:\n" ^
" -c --- compile into object file\n" ^
" -o <file> --- write executable into file <file>\n" ^
" -I <path> --- add <path> into unit search path list\n" ^
" -i --- interpret on a source-level interpreter\n" ^
" -s --- compile into stack machine code and interpret on the stack machine initerpreter\n" ^
" -dp --- dump AST (the output will be written into .ast file)\n" ^
" -dsrc --- dump pretty-printed source code\n" ^
" -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^
" effect if -i option is specfied)\n" ^
" -b --- compile to a stack machine bytecode\n" ^
" -v --- show version\n" ^
" -h --- show this help\n"
"Lama compiler. (C) JetBrains Reserach, 2017-2020.\n"
^ "Usage: lamac <options> <input file>\n\n"
^ "When no options specified, builds the source file into executable.\n"
^ "Options:\n" ^ " -c --- compile into object file\n"
^ " -o <file> --- write executable into file <file>\n"
^ " -I <path> --- add <path> into unit search path list\n"
^ " -i --- interpret on a source-level interpreter\n"
^ " -s --- compile into stack machine code and interpret on the \
stack machine initerpreter\n"
^ " -dp --- dump AST (the output will be written into .ast file)\n"
^ " -dsrc --- dump pretty-printed source code\n"
^ " -ds --- dump stack machine code (the output will be written \
into .sm file; has no\n"
^ " effect if -i option is specfied)\n"
^ " -b --- compile to a stack machine bytecode\n"
^ " -v --- show version\n" ^ " -h --- show this help\n"
in
object (self)
val version = ref false
val help = ref false
val i = ref 1
val infile = ref (None : string option)
val help = ref false
val i = ref 1
val infile = ref (None : string option)
val outfile = ref (None : string option)
val paths = ref [X86.get_std_path ()]
val mode = ref (`Default : [`Default | `Eval | `SM | `Compile | `BC])
val curdir = Unix.getcwd ()
val debug = ref false
val paths = ref [ X86.get_std_path () ]
val mode = ref (`Default : [ `Default | `Eval | `SM | `Compile | `BC ])
val curdir = Unix.getcwd ()
val debug = ref false
(* Workaround until Ostap starts to memoize properly *)
val const = ref false
val const = ref false
(* end of the workaround *)
val dump = ref 0
val dump = ref 0
initializer
let rec loop () =
match self#peek with
| Some opt ->
(match opt with
(* Workaround until Ostap starts to memoize properly *)
| "-w" -> self#set_workaround
(* end of the workaround *)
| "-c" -> self#set_mode `Compile
| "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname)
| "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path)
| "-s" -> self#set_mode `SM
| "-b" -> self#set_mode `BC
| "-i" -> self#set_mode `Eval
| "-ds" -> self#set_dump dump_sm
| "-dsrc" -> self#set_dump dump_source
| "-dp" -> self#set_dump dump_ast
| "-h" -> self#set_help
| "-v" -> self#set_version
| "-g" -> self#set_debug
| _ ->
if opt.[0] = '-'
then raise (Commandline_error (Printf.sprintf "Invalid command line specifier ('%s')" opt))
else self#set_infile opt
);
loop ()
| None -> ()
in loop ()
let rec loop () =
match self#peek with
| Some opt ->
(match opt with
(* Workaround until Ostap starts to memoize properly *)
| "-w" -> self#set_workaround
(* end of the workaround *)
| "-c" -> self#set_mode `Compile
| "-o" -> (
match self#peek with
| None ->
raise
(Commandline_error "File name expected after '-o' specifier")
| Some fname -> self#set_outfile fname)
| "-I" -> (
match self#peek with
| None ->
raise (Commandline_error "Path expected after '-I' specifier")
| Some path -> self#add_include_path path)
| "-s" -> self#set_mode `SM
| "-b" -> self#set_mode `BC
| "-i" -> self#set_mode `Eval
| "-ds" -> self#set_dump dump_sm
| "-dsrc" -> self#set_dump dump_source
| "-dp" -> self#set_dump dump_ast
| "-h" -> self#set_help
| "-v" -> self#set_version
| "-g" -> self#set_debug
| _ ->
if opt.[0] = '-' then
raise
(Commandline_error
(Printf.sprintf "Invalid command line specifier ('%s')" opt))
else self#set_infile opt);
loop ()
| None -> ()
in
loop ()
(* Workaround until Ostap starts to memoize properly *)
method is_workaround = !const
method private set_workaround =
const := true
method private set_workaround = const := true
(* end of the workaround *)
method private set_help = help := true
method private set_help = help := true
method private set_version = version := true
method private set_dump mask =
dump := !dump lor mask
method private set_dump mask = dump := !dump lor mask
method private set_infile name =
match !infile with
| None -> infile := Some name
| Some name' -> raise (Commandline_error (Printf.sprintf "Input file ('%s') already specified" name'))
| None -> infile := Some name
| Some name' ->
raise
(Commandline_error
(Printf.sprintf "Input file ('%s') already specified" name'))
method private set_outfile name =
match !outfile with
| None -> outfile := Some name
| Some name' -> raise (Commandline_error (Printf.sprintf "Output file ('%s') already specified" name'))
method private add_include_path path =
paths := path :: !paths
| None -> outfile := Some name
| Some name' ->
raise
(Commandline_error
(Printf.sprintf "Output file ('%s') already specified" name'))
method private add_include_path path = paths := path :: !paths
method private set_mode s =
match !mode with
| `Default -> mode := s
| _ -> raise (Commandline_error "Extra compilation mode specifier")
method private peek =
let j = !i in
if j < n
then (incr i; Some (args.(j)))
if j < n then (
incr i;
Some args.(j))
else None
method get_mode = !mode
method get_output_option =
match !outfile with
| None -> Printf.sprintf "-o %s" self#basename
| None -> Printf.sprintf "-o %s" self#basename
| Some name -> Printf.sprintf "-o %s" name
method get_absolute_infile =
let f = self#get_infile in
if Filename.is_relative f then Filename.concat curdir f else f
method get_infile =
match !infile with
| None -> raise (Commandline_error "Input file not specified")
| None -> raise (Commandline_error "Input file not specified")
| Some name -> name
method get_help = !help
method get_include_paths = !paths
method basename = Filename.chop_suffix (Filename.basename self#get_infile) ".lama"
method basename =
Filename.chop_suffix (Filename.basename self#get_infile) ".lama"
method topname =
match !mode with
| `Compile -> "init" ^ self#basename
| _ -> "main"
match !mode with `Compile -> "init" ^ self#basename | _ -> "main"
method dump_file ext contents =
let name = self#basename in
let outf = open_out (Printf.sprintf "%s.%s" name ext) in
Printf.fprintf outf "%s" contents;
close_out outf
method dump_AST ast =
if (!dump land dump_ast) > 0
then (
if !dump land dump_ast > 0 then (
let buf = Buffer.create 1024 in
Buffer.add_string buf "<html>";
Buffer.add_string buf (Printf.sprintf "<title> %s </title>" self#get_infile);
Buffer.add_string buf
(Printf.sprintf "<title> %s </title>" self#get_infile);
Buffer.add_string buf "<body><li>";
GT.html(Language.Expr.t) ast buf;
GT.html Language.Expr.t ast buf;
Buffer.add_string buf "</li></body>";
Buffer.add_string buf "</html>";
self#dump_file "html" (Buffer.contents buf)
)
method dump_source (ast: Language.Expr.t) =
if (!dump land dump_source) > 0
then Pprinter.pp Format.std_formatter ast;
method dump_SM sm =
if (!dump land dump_sm) > 0
then self#dump_file "sm" (SM.show_prg sm)
self#dump_file "html" (Buffer.contents buf))
method dump_source (ast : Language.Expr.t) =
if !dump land dump_source > 0 then Pprinter.pp Format.std_formatter ast
method dump_SM sm =
if !dump land dump_sm > 0 then self#dump_file "sm" (SM.show_prg sm)
else ()
method greet =
(match !outfile with
| None -> ()
| Some _ -> (match !mode with `Default -> () | _ -> Printf.printf "Output file option ignored in this mode.\n")
);
| None -> ()
| Some _ -> (
match !mode with
| `Default -> ()
| _ -> Printf.printf "Output file option ignored in this mode.\n"));
if !version then Printf.printf "%s\n" Version.version;
if !help then Printf.printf "%s" help_string
method get_debug =
if !debug then "" else "-g"
method set_debug =
debug := true
if !help then Printf.printf "%s" help_string
method get_debug = if !debug then "" else "-g"
method set_debug = debug := true
end
let main =
let[@ocaml.warning "-32"] main =
try
let cmd = new options Sys.argv in
cmd#greet;
match (try Language.run_parser cmd with Language.Semantic_error msg -> `Fail msg) with
| `Ok prog ->
cmd#dump_AST (snd prog);
cmd#dump_source (snd prog);
(match cmd#get_mode with
| `Default | `Compile ->
ignore @@ X86.build cmd prog
| `BC ->
SM.ByteCode.compile cmd (SM.compile cmd prog)
match
try Language.run_parser cmd
with Language.Semantic_error msg -> `Fail msg
with
| `Ok prog -> (
cmd#dump_AST (snd prog);
cmd#dump_source (snd prog);
match cmd#get_mode with
| `Default | `Compile -> ignore @@ X86.build cmd prog
| `BC -> SM.ByteCode.compile cmd (SM.compile cmd prog)
| _ ->
let rec read acc =
try
let r = read_int () in
Printf.printf "> ";
read (acc @ [r])
with End_of_file -> acc
in
let input = read [] in
let output =
if cmd#get_mode = `Eval
then Language.eval prog input
else SM.run (SM.compile cmd prog) input
in
List.iter (fun i -> Printf.printf "%d\n" i) output
)
| `Fail er -> Printf.eprintf "Error: %s\n" er; exit 255
let rec read acc =
try
let r = read_int () in
Printf.printf "> ";
read (acc @ [ r ])
with End_of_file -> acc
in
let input = read [] in
let output =
if cmd#get_mode = `Eval then Language.eval prog input
else SM.run (SM.compile cmd prog) input
in
List.iter (fun i -> Printf.printf "%d\n" i) output)
| `Fail er ->
Printf.eprintf "Error: %s\n" er;
exit 255
with
| Language.Semantic_error msg -> Printf.printf "Error: %s\n" msg; exit 255
| Commandline_error msg -> Printf.printf "%s\n" msg; exit 255
| Language.Semantic_error msg ->
Printf.printf "Error: %s\n" msg;
exit 255
| Commandline_error msg ->
Printf.printf "%s\n" msg;
exit 255

View file

@ -3,6 +3,8 @@
*)
module OrigList = List
[@@@ocaml.warning "-7-8-13-15-20-26-27-32"]
open GT
(* Opening a library for combinator-based syntax analysis *)
@ -55,7 +57,7 @@ module Loc =
let report_error ?(loc=None) str =
raise (Semantic_error (str ^ match loc with None -> "" | Some (l, c) -> Printf.sprintf " at (%d, %d)" l c));;
@type k = Unmut | Mut | FVal with show, html, foldl
(* Values *)
@ -85,7 +87,7 @@ module Value =
with show, html, foldl
let is_int = function Int _ -> true | _ -> false
let to_int = function
| Int n -> n
| x -> failwith (Printf.sprintf "int value expected (%s)\n" (show(t) (fun _ -> "<not supported>") (fun _ -> "<not supported>") x))
@ -114,6 +116,7 @@ module Value =
match x with
| Sexp (_, a) | Array a -> ignore (update_array a i v)
| String a -> ignore (update_string a i (Char.chr @@ to_int v))
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let string_val v =
let buf = Buffer.create 128 in
@ -121,8 +124,7 @@ module Value =
let rec inner = function
| Int n -> append (string_of_int n)
| String s -> append "\""; append @@ Bytes.to_string s; append "\""
| Array a -> let n = Array.length a in
append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]"
| Array a -> append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]"
| Sexp (t, a) -> let n = Array.length a in
if t = "cons"
then (
@ -131,6 +133,7 @@ module Value =
| [||] -> ()
| [|x; Int 0|] -> inner x
| [|x; Sexp ("cons", a)|] -> inner x; append ", "; inner_list a
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
in inner_list a;
append "}"
)
@ -139,6 +142,7 @@ module Value =
(if n > 0 then (append " ("; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a;
append ")"))
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
in
inner v;
Bytes.of_string @@ Buffer.contents buf
@ -156,24 +160,27 @@ module Builtin =
let eval (st, i, o, vs) args = function
| "read" -> (match i with z::i' -> (st, i', o, (Value.of_int z)::vs) | _ -> failwith "Unexpected end of input")
| "write" -> (st, i, o @ [Value.to_int @@ List.hd args], Value.Empty :: vs)
| ".elem" -> let [b; j] = args in
(st, i, o, let i = Value.to_int j in
(match b with
| Value.String s -> Value.of_int @@ Char.code (Bytes.get s i)
| Value.Array a -> a.(i)
| Value.Sexp (_, a) -> a.(i)
) :: vs
| ".elem" -> (match args with
| [b; j] -> (st, i, o, let i = Value.to_int j in
(match b with
| Value.String s -> Value.of_int @@ Char.code (Bytes.get s i)
| Value.Array a -> a.(i)
| Value.Sexp (_, a) -> a.(i)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
) :: vs
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
)
| "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s))::vs)
| "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)))::vs)
| ".array" -> (st, i, o, (Value.of_array @@ Array.of_list args)::vs)
| "string" -> let [a] = args in (st, i, o, (Value.of_string @@ Value.string_val a)::vs)
| "string" -> (match args with | [a] -> (st, i, o, (Value.of_string @@ Value.string_val a)::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
end
(* States *)
module State =
struct
(* State: global state, local state, scope variables *)
@type 'a t =
| I
@ -273,7 +280,7 @@ module State =
| _ -> L (xs, s, st)
(* Drop a local scope *)
let drop = function L (_, _, e) -> e | G _ -> I
let drop = function L (_, _, e) -> e | G _ -> I | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
(* Observe a variable in a state and print it to stderr *)
let observe st x =
@ -440,19 +447,18 @@ module Expr =
let seq x = function Skip -> x | y -> Seq (x, y)
let schedule_list h::tl =
List.fold_left seq h tl
let schedule_list = function h::tl -> List.fold_left seq h tl | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let rec take = function
| 0 -> fun rest -> [], rest
| n -> fun h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest
| n -> function h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
let rec eval ((st, i, o, vs) as conf) k expr =
let print_values vs =
(* let print_values vs =
Printf.eprintf "Values:\n%!";
List.iter (fun v -> Printf.eprintf "%s\n%!" @@ show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v) vs;
Printf.eprintf "End Values\n%!"
in
in *)
match expr with
| Lambda (args, body) ->
eval (st, i, o, Value.Closure (args, body, [|st|]) :: vs) Skip k
@ -500,73 +506,78 @@ module Expr =
| Sexp (t, xs) ->
eval conf k (schedule_list (xs @ [Intrinsic (fun (st, i, o, vs) -> let es, vs' = take (List.length xs) vs in (st, i, o, Value.Sexp (t, Array.of_list (List.rev es)) :: vs'))]))
| Binop (op, x, y) ->
eval conf k (schedule_list [x; y; Intrinsic (fun (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs))])
eval conf k (schedule_list [x; y; Intrinsic (function (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Elem (b, i) ->
eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem")])
eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem" | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| ElemRef (b, i) ->
eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs))])
eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Call (f, args) ->
eval conf k (schedule_list (f :: args @ [Intrinsic (fun (st, i, o, vs) ->
let es, vs' = take (List.length args + 1) vs in
let f :: es = List.rev es in
(match f with
| Value.Builtin name ->
Builtin.eval (st, i, o, vs') es name
| Value.Closure (args, body, closure) ->
let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in
let st'', i', o', vs'' = eval (st', i, o, []) Skip body in
closure.(0) <- st'';
(State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs')
| _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") f))
))]))
match List.rev es with
| f :: es ->
(match f with
| Value.Builtin name ->
Builtin.eval (st, i, o, vs') es name
| Value.Closure (args, body, closure) ->
let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in
let st'', i', o', vs'' = eval (st', i, o, []) Skip body in
closure.(0) <- st'';
(State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs')
| _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") f))
)
| _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
)]))
| Leave -> eval (State.drop st, i, o, vs) Skip k
| Assign (x, e) ->
eval conf k (schedule_list [x; e; Intrinsic (fun (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs))])
eval conf k (schedule_list [x; e; Intrinsic (function (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| Seq (s1, s2) ->
eval conf (seq s2 k) s1
| Skip ->
(match k with Skip -> conf | _ -> eval conf Skip k)
| If (e, s1, s2) ->
eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs))])
eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| While (e, s) ->
eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs))])
eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))])
| DoWhile (s, e) ->
eval conf (seq (While (e, s)) k) s
| Case (e, bs, _, _)->
let rec branch ((st, i, o, v::vs) as conf) = function
| [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v))
| (patt, body)::tl ->
let rec match_patt patt v st =
let update x v = function
| None -> None
| Some s -> Some (State.bind x v s)
in
match patt, v with
| Pattern.Named (x, p), v -> update x v (match_patt p v st )
| Pattern.Wildcard , _ -> st
| Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Const n , Value.Int n' when n = n' -> st
| Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st
| Pattern.Boxed , Value.String _
| Pattern.Boxed , Value.Array _
| Pattern.UnBoxed , Value.Int _
| Pattern.Boxed , Value.Sexp (_, _)
| Pattern.StringTag , Value.String _
| Pattern.ArrayTag , Value.Array _
| Pattern.ClosureTag , Value.Closure _
| Pattern.SexpTag , Value.Sexp (_, _) -> st
| _ -> None
and match_list ps vs s =
match ps, vs with
| [], [] -> s
| p::ps, v::vs -> match_list ps vs (match_patt p v s)
| _ -> None
in
match match_patt patt v (Some State.undefined) with
| None -> branch conf tl
| Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave))
let rec branch =
function (_,_,_,[]) -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
| ((st, i, o, v::vs) as conf) -> function
| [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "<expr>") (fun _ -> "<state>") v))
| (patt, body)::tl ->
let rec match_patt patt v st =
let update x v = function
| None -> None
| Some s -> Some (State.bind x v s)
in
match patt, v with
| Pattern.Named (x, p), v -> update x v (match_patt p v st )
| Pattern.Wildcard , _ -> st
| Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st
| Pattern.Const n , Value.Int n' when n = n' -> st
| Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st
| Pattern.Boxed , Value.String _
| Pattern.Boxed , Value.Array _
| Pattern.UnBoxed , Value.Int _
| Pattern.Boxed , Value.Sexp (_, _)
| Pattern.StringTag , Value.String _
| Pattern.ArrayTag , Value.Array _
| Pattern.ClosureTag , Value.Closure _
| Pattern.SexpTag , Value.Sexp (_, _) -> st
| _ -> None
and match_list ps vs s =
match ps, vs with
| [], [] -> s
| p::ps, v::vs -> match_list ps vs (match_patt p v s)
| _ -> None
in
match match_patt patt v (Some State.undefined) with
| None -> branch conf tl
| Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave))
in
eval conf Skip (schedule_list [e; Intrinsic (fun conf -> branch conf bs)])
@ -635,14 +646,14 @@ module Expr =
let not_a_reference s = new Reason.t (Msg.make "not a reference" [||] (Msg.Locator.Point s#coord))
(* UGLY! *)
let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Pervasives.ref (fun _ _ -> invalid_arg "must not happen")
let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Stdlib.ref (fun _ _ -> invalid_arg "must not happen")
let defCell = Pervasives.ref 0
let defCell = Stdlib.ref 0
(* ======= *)
let makeParsers env =
let makeParser, makeBasicParser, makeScopeParser =
let def s = let Some def = Obj.magic !defCell in def s in
let [@ocaml.warning "-26"] makeParser, makeBasicParser, makeScopeParser =
let [@ocaml.warning "-20"] def s = let [@ocaml.warning "-8"] Some def = Obj.magic !defCell in def s in
let ostap (
parse[infix][atr]: h:basic[infix][Void] -";" t:parse[infix][atr] {Seq (h, t)} | basic[infix][atr];
scope[infix][atr]: <(d, infix')> : def[infix] expr:parse[infix'][atr] {Scope (d, expr)} | {isVoid atr} => <(d, infix')> : def[infix] => {d <> []} => {Scope (d, materialize atr Skip)};
@ -872,7 +883,7 @@ module Infix =
show(showable) @@ Array.map (fun (ass, (_, l)) -> List.map (fun (str, kind, _) -> ass, str, kind) l) infix
let extract_exports infix =
let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in
(* let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in *)
let exported =
Array.map
(fun (ass, (_, ops)) ->
@ -1013,7 +1024,7 @@ module Definition =
(* end of the workaround *)
)
let makeParser env exprBasic exprScope =
let [@ocaml.warning "-26"] makeParser env exprBasic exprScope =
let ostap (
arg : l:$ x:LIDENT {Loc.attach x l#coord; x};
position[pub][ass][coord][newp]:
@ -1107,7 +1118,7 @@ module Interface =
Buffer.contents buf
(* Read an interface file *)
let read fname =
let [@ocaml.warning "-26"] read fname =
let ostap (
funspec: "F" "," i:IDENT ";" {`Fun i};
varspec: "V" "," i:IDENT ";" {`Variable i};
@ -1201,8 +1212,8 @@ ostap (
let parse cmd =
let env =
object
val imports = Pervasives.ref ([] : string list)
val tmp_index = Pervasives.ref 0
val imports = Stdlib.ref ([] : string list)
val tmp_index = Stdlib.ref 0
method add_import imp = imports := imp :: !imports
method get_tmp = let index = !tmp_index in incr tmp_index; Printf.sprintf "__tmp%d" index
@ -1223,7 +1234,7 @@ let parse cmd =
definitions
in
let definitions = Pervasives.ref None in
let definitions = Stdlib.ref None in
let (makeParser, makeBasicParser, makeScopeParser) = Expr.makeParsers env in
@ -1233,7 +1244,7 @@ let parse cmd =
definitions := Some (makeDefinitions env exprBasic exprScope);
let Some definitions = !definitions in
let [@ocaml.warning "-8-20"] Some definitions = !definitions in
let ostap (
parse[cmd]:
@ -1255,7 +1266,7 @@ let run_parser cmd =
"while"; "do"; "od";
"for";
"fun"; "var"; "public"; "external"; "import";
"case"; "of"; "esac";
"case"; "of"; "esac";
"box"; "val"; "str"; "sexp"; "array";
"infix"; "infixl"; "infixr"; "at"; "before"; "after";
"true"; "false"; "lazy"; "eta"; "syntax"]

View file

@ -8,7 +8,9 @@ PXFLAGS = $(CAMLP5)
BFLAGS = -rectypes -g -w -13-58 -package GT,ostap,unix
OFLAGS = $(BFLAGS)
all: depend metagen $(TOPFILE)
all: # depend metagen # $(TOPFILE)
dune build ./Driver.exe
ln -sf ../_build/default/src/Driver.exe lamac
metagen:
echo "let version = \"Version `git rev-parse --abbrev-ref HEAD`, `git rev-parse --short HEAD`, `git rev-parse --verify HEAD |git show --no-patch --no-notes --pretty='%cd'`\"" > version.ml
@ -25,6 +27,7 @@ $(TOPFILE).byte: $(SOURCES:.ml=.cmo)
clean:
$(RM) $(TOPFILE) *.cm[ioxa] *.annot *.o *.opt *.byte *~ .depend
dune clean
-include .depend
# generic rules
@ -44,4 +47,4 @@ clean:
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<
%.cmx: %.ml
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<
$(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $<

2482
src/SM.ml

File diff suppressed because it is too large Load diff

1394
src/X86.ml

File diff suppressed because it is too large Load diff

111
src/dune Normal file
View file

@ -0,0 +1,111 @@
(env
(dev
(flags
(:standard -warn-error -3-7-8-13-15-20-26-27-32-33-39))))
(rule
(targets version.ml)
(action
(progn
(with-stdout-to
version2.ml
(progn
(run echo let version = "\"")
(run echo "Version 1.2: ")
(run git rev-parse --abbrev-ref HEAD)
(run echo , " ")
(run git rev-parse --short HEAD)
(run echo , " ")
(pipe-stdout
(run git rev-parse --verify HEAD)
(run git show --no-patch --no-notes --pretty='%cd'))
(run echo "\"")))
(with-stdout-to
version.ml
(pipe-stdout
(run cat version2.ml)
(run tr -d '\n'))))))
(rule
(targets stdpath.ml)
(action
(progn
(with-stdout-to
stdpath2.ml
(progn
(run echo let path = "\"")
(run opam var share)
(run echo /Lama)
(run echo "\"")))
(with-stdout-to
stdpath.ml
(pipe-stdout
(run cat stdpath2.ml)
(run tr -d '\n'))))))
(library
(name liba)
(modules Language Pprinter stdpath version X86 SM)
(libraries GT ostap)
(flags
(:standard
-rectypes
;-dsource
))
; (ocamlopt_flags
; (:standard -dsource))
(wrapped false)
(preprocess
(per_module
((pps GT.ppx_all)
SM
X86)
((action
(run %{project_root}/src/pp5+gt+plugins+ostap+dump.byte %{input-file}))
Language
Pprinter
stdpath
version)))
(preprocessor_deps
(file %{project_root}/src/pp5+gt+plugins+ostap+dump.byte)
;(file %{project_root}/src/pp5+gt+plugins+ostap+dump.exe)
)
;(inline_tests)
)
(executable
(name Driver)
(flags
(:standard
-rectypes
;-dsource
))
(modules Driver)
(libraries liba unix))
; (rule
; (targets pp5+gt+plugins+ostap+dump.exe)
; (deps
; (package GT))
; (action
; (run
; mkcamlp5.opt
; -package
; camlp5,camlp5.pa_o,camlp5.pr_dump,camlp5.extend,camlp5.quotations,ostap.syntax,GT.syntax.all,GT.syntax
; -o
; %{targets})))
(rule
(targets pp5+gt+plugins+ostap+dump.byte)
(deps
(package GT))
(action
(run
mkcamlp5
-package
camlp5,camlp5.pa_o,camlp5.pr_o,ostap.syntax,GT.syntax.all,GT.syntax
-o
%{targets})))
(cram
(deps ./Driver.exe))

View file

@ -1 +0,0 @@
let path = "/home/db/.opam/4.14.0+flambda/share/Lama"

View file

@ -1 +0,0 @@
let version = "Version 1.10, 1bafe839d, Sun Mar 12 05:30:58 2023 +0300"

View file

@ -1,4 +1,4 @@
TESTS=$(sort $(basename $(wildcard test*.lama)))
TESTS=$(sort $(filter-out test30, $(basename $(wildcard test*.lama))))
LAMAC=../../src/lamac
@ -7,8 +7,8 @@ LAMAC=../../src/lamac
check: $(TESTS)
$(TESTS): %: %.lama
@echo $@
LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log
@echo "stdlib/regression/$@"
@LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log
clean:
$(RM) test*.log *.s *~ $(TESTS) *.i

View file

@ -14,5 +14,5 @@
1
0
0
31
1
-1