diff --git a/byterun/.gitignore b/byterun/.gitignore index 68995d9a0..514b6ec32 100644 --- a/byterun/.gitignore +++ b/byterun/.gitignore @@ -1 +1,10 @@ -/byterun.exe \ No newline at end of file +/byterun.exe + +build/ +.xmake/ +.cache/ + +compile_commands.json + +*.a +*.o diff --git a/byterun/Makefile b/byterun/Makefile index 64cf0ae60..c339568b7 100644 --- a/byterun/Makefile +++ b/byterun/Makefile @@ -3,7 +3,7 @@ FLAGS=-m32 -g2 -fstack-protector-all all: parser.o $(CC) $(FLAGS) -o byterun parser.o ../runtime/runtime.a -parser.o: src/parser.c +interpreter.o: src/parser.c $(CC) $(FLAGS) -Iinclude/ -g -c src/parser.c clean: diff --git a/byterun/include/builtin.h b/byterun/include/builtin.h index 7a7f9bba0..b3a3d20f8 100644 --- a/byterun/include/builtin.h +++ b/byterun/include/builtin.h @@ -22,8 +22,6 @@ inline void f_length(struct State *s) { if (type == ARRAY_T || type == STR_T) { s_put_i(s, dh_param(x->array.data_header)); - } else if (type == CONST_STR_T) { - s_put_i(s, strlen(x->const_str.value)); } else if (type == STR_T) { s_put_i(s, strlen(x->str.value)); } else { // TODO: lists ?? @@ -38,8 +36,10 @@ inline size_t str_sz(union VarT *var) { return strlen(""); case INT_T: // int return snprintf(nullptr, 0, "%d", var->int_t.value); - case CONST_STR_T: // "str" - return strlen(var->const_str.value); + case BOX_T: // ":..." + return strlen("") + (var->box.value != NULL + ? str_sz((union VarT *)&var->box.value) + 1 + : 0); case STR_T: // "str" return strlen(var->str.value); case CLOJURE_T: // // TODO @@ -49,7 +49,7 @@ inline size_t str_sz(union VarT *var) { size_t sz = 0; if (var->array.values != NULL) { for (size_t i = 0; i < dh_param(var->array.data_header); ++i) { - sz += str_sz((VarT *)var->array.values[i]) + 1; + sz += str_sz((union VarT *)var->array.values[i]) + 1; } --sz; // extra space } @@ -58,11 +58,11 @@ inline size_t str_sz(union VarT *var) { case SEXP_T: { // tag:{a_1 a_2 ...} size_t sz = 0; if (var->sexp.tag != NULL) { - sz += strlen(var->sexp.tag) + 1 // tag and ':' + sz += strlen(var->sexp.tag) + 1; // tag and ':' } if (var->sexp.values != NULL) { for (size_t i = 0; i < dh_param(var->sexp.data_header); ++i) { - sz += str_sz((VarT *)var->sexp.values[i]) + 1; + sz += str_sz((union VarT *)var->sexp.values[i]) + 1; } --sz; // extra space } @@ -83,10 +83,13 @@ inline char *to_str(union VarT *var, char *str, size_t max_sz) { case INT_T: snprintf(str, max_sz, "%d", var->int_t.value); break; - case CONST_STR_T: - strcat(str, "\""); - strcat(str, var->const_str.value); - strcat(str, "\""); + case BOX_T: + strcat(str, ""); + if (var->box.value != NULL) { + strcat(str, ":"); + str += strlen(str); + str = to_str((union VarT *)&var->box.value, str, max_sz); + } break; case STR_T: strcat(str, "\""); @@ -100,7 +103,7 @@ inline char *to_str(union VarT *var, char *str, size_t max_sz) { strcat(str, "["); ++str; for (size_t i = 0; i < dh_param(var->array.data_header); ++i) { - str = to_str((VarT *)var->array.values[i], str, max_sz); + str = to_str((union VarT *)var->array.values[i], str, max_sz); strcat(str, " "); ++str; } @@ -114,7 +117,7 @@ inline char *to_str(union VarT *var, char *str, size_t max_sz) { strcat(str, "{"); str += strlen(str); for (size_t i = 0; i < dh_param(var->sexp.data_header); ++i) { - str = to_str((VarT *)var->sexp.values[i], str, max_sz); + str = to_str((union VarT *)var->sexp.values[i], str, max_sz); strcat(str, " "); ++str; } @@ -161,7 +164,7 @@ inline void f_binop(struct State *s, const char *opr) { z = x - y; break; case '*': - z - x *y; + z = x * y; break; case '/': if (y == 0) { diff --git a/byterun/include/gc.h b/byterun/include/gc.h new file mode 100644 index 000000000..627577c15 --- /dev/null +++ b/byterun/include/gc.h @@ -0,0 +1,251 @@ +// ============================================================================ +// GC +// ============================================================================ +// This is an implementation of a compactifying garbage collection algorithm. +// GC algorithm itself consists of two major stages: +// 1. Marking roots +// 2. Compacting stage +// Compacting is implemented in a very similar fashion to LISP2 algorithm, +// which is well-known. +// Most important pieces of code to discover to understand how everything works: +// - void *gc_alloc (size_t): this function is basically called whenever we are +// not able to allocate memory on the existing heap via simple bump allocator. +// - mark_phase(): this function will tell you everything you need to know +// about marking. I would also recommend to pay attention to the fact that +// marking is implemented without usage of any additional memory. Already +// allocated space is sufficient (for details see 'void mark (void *obj)'). +// - void compact_phase (size_t additional_size): the whole compaction phase +// can be understood by looking at this piece of code plus couple of other +// functions used in there. It is basically an implementation of LISP2. + +#ifndef __LAMA_GC__ +#define __LAMA_GC__ + +#include "runtime_common.h" + +#define GET_MARK_BIT(x) (((int)(x)) & 1) +#define SET_MARK_BIT(x) (x = (((int)(x)) | 1)) +#define IS_ENQUEUED(x) (((int)(x)) & 2) +#define MAKE_ENQUEUED(x) (x = (((int)(x)) | 2)) +#define MAKE_DEQUEUED(x) (x = (((int)(x)) & (~2))) +#define RESET_MARK_BIT(x) (x = (((int)(x)) & (~1))) +// since last 2 bits are used for mark-bit and enqueued-bit and due to correct +// alignment we can expect that last 2 bits don't influence address (they +// should always be zero) +#define GET_FORWARD_ADDRESS(x) (((size_t)(x)) & (~3)) +// take the last two bits as they are and make all others zero +#define SET_FORWARD_ADDRESS(x, addr) (x = ((x & 3) | ((int)(addr)))) +// if heap is full after gc shows in how many times it has to be extended +#define EXTRA_ROOM_HEAP_COEFFICIENT 2 +#ifdef DEBUG_VERSION +# define MINIMUM_HEAP_CAPACITY (8) +#else +# define MINIMUM_HEAP_CAPACITY (1 << 2) +#endif + +#include +#include + +typedef enum { ARRAY, CLOSURE, STRING, SEXP } lama_type; + +typedef struct { + size_t *current; +} heap_iterator; + +typedef struct { + lama_type type; // holds type of object, which fields we are iterating over + void *obj_ptr; // place to store a pointer to the object header + void *cur_field; +} obj_field_iterator; + +// Memory pool for linear memory allocation +typedef struct { + size_t *begin; + size_t *end; + size_t *current; + size_t size; +} memory_chunk; + + +// the only GC-related function that should be exposed, others are useful for tests and internal implementation +// allocates object of the given size on the heap +void *alloc(size_t); +// takes number of words as a parameter +void *gc_alloc(size_t); +// takes number of words as a parameter +void *gc_alloc_on_existing_heap(size_t); + +// specific for mark-and-compact_phase gc +void mark (void *obj); +void mark_phase (void); +// marks each pointer from extra roots +void scan_extra_roots (void); +#ifdef LAMA_ENV +// marks each valid pointer from global area +void scan_global_area (void); +#endif +// takes number of words that are required to be allocated somewhere on the heap +void compact_phase (size_t additional_size); +// specific for Lisp-2 algorithm +size_t compute_locations (); +void update_references (memory_chunk *); +void physically_relocate (memory_chunk *); + + +// ============================================================================ +// GC extra roots +// ============================================================================ +// Lama's program stack is continuous, i.e. it never interleaves with runtime +// function's activation records. But some valid Lama's pointers can escape +// into runtime. Those values (theirs stack addresses) has to be registered in +// an auxiliary data structure called `extra_roots_pool`. +// extra_roots_pool is a simple LIFO stack. During `pop` it compares that pop's +// argument is equal to the current stack top. +#define MAX_EXTRA_ROOTS_NUMBER 32 + +typedef struct { + int current_free; + void **roots[MAX_EXTRA_ROOTS_NUMBER]; +} extra_roots_pool; + +void clear_extra_roots (void); +void push_extra_root (void **p); +void pop_extra_root (void **p); + + +// ============================================================================ +// Implemented in GASM: see gc_runtime.s +// ============================================================================ +// MANDATORY TO CALL BEFORE ANY INTERACTION WITH GC (apart from cases where we +// are working with virtual stack as happens in tests) +void __gc_init (void); + +// should be called before interaction with GC in case of using in tests with +// virtual stack, otherwise it is automatically invoked by `__gc_init` +void __init (void); + +// mostly useful for tests but basically you want to call this in case you want +// to deallocate all object allocated via GC +extern void __shutdown (void); + + +// ============================================================================ +// invoked from GASM: see gc_runtime.s +// ============================================================================ +extern void gc_test_and_mark_root (size_t **root); +bool is_valid_heap_pointer (const size_t *); +static inline bool is_valid_pointer (const size_t *); + + +// ============================================================================ +// Auxiliary functions for tests +// ============================================================================ +#if defined(DEBUG_VERSION) +// makes a snapshot of current objects in heap (both alive and dead), writes these ids to object_ids_buf, +// returns number of ids dumped +// object_ids_buf is pointer to area preallocated by user for dumping ids of objects in heap +// object_ids_buf_size is in WORDS, NOT BYTES +size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size); +#endif + + +#ifdef DEBUG_VERSION +// essential function to mock program stack +void set_stack (size_t stack_top, size_t stack_bottom); + +// function to mock extra roots (Lama specific) +void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr); +#endif + + +// ============================================================================ +// Utility functions +// ============================================================================ +// accepts pointer to the start of the region and to the end of the region +// scans it and if it meets a pointer, it should be modified in according to forward address +void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end); + +// takes a pointer to an object content as an argument, returns forwarding address +size_t get_forward_address (void *obj); + +// takes a pointer to an object content as an argument, sets forwarding address to value 'addr' +void set_forward_address (void *obj, size_t addr); + +// takes a pointer to an object content as an argument, returns whether this object was marked as live +bool is_marked (void *obj); + +// takes a pointer to an object content as an argument, marks the object as live +void mark_object (void *obj); + +// takes a pointer to an object content as an argument, marks the object as dead +void unmark_object (void *obj); + +// takes a pointer to an object content as an argument, returns whether this object was enqueued to the queue (which is used in mark phase) +bool is_enqueued (void *obj); + +// takes a pointer to an object content as an argument, marks object as enqueued +void make_enqueued (void *obj); + +// takes a pointer to an object content as an argument, unmarks object as enqueued +void make_dequeued (void *obj); + +// returns iterator to an object with the lowest address +heap_iterator heap_begin_iterator (); +void heap_next_obj_iterator (heap_iterator *it); +bool heap_is_done_iterator (heap_iterator *it); + +// returns correct type when pointer to actual data is passed (header is excluded) +lama_type get_type_row_ptr (void *ptr); +// returns correct type when pointer to an object header is passed +lama_type get_type_header_ptr (void *ptr); + +// returns correct object size (together with header) of an object, ptr is pointer to an actual data is passed (header is excluded) +size_t obj_size_row_ptr (void *ptr); +// returns correct object size (together with header) of an object, ptr is pointer to an object header +size_t obj_size_header_ptr (void *ptr); + +// returns total padding size that we need to store given object type +size_t get_header_size (lama_type type); + +// returns number of bytes that are required to allocate array with 'sz' elements (header included) +size_t array_size (size_t sz); + +// returns number of bytes that are required to allocate string of length 'l' (header included) +size_t string_size (size_t len); + +// returns number of bytes that are required to allocate closure with 'sz-1' captured values (header included) +size_t closure_size (size_t sz); + +// returns number of bytes that are required to allocate s-expression with 'members' fields (header included) +size_t sexp_size (size_t members); + +// returns an iterator over object fields, obj is ptr to object header +// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object, +// considering that now we store two versions of header in there) +obj_field_iterator field_begin_iterator (void *obj); + +// returns an iterator over object fields which are actual pointers, obj is ptr to object header +// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object, +// considering that now we store two versions of header in there) +obj_field_iterator ptr_field_begin_iterator (void *obj); + +// moves the iterator to next object field +void obj_next_field_iterator (obj_field_iterator *it); + +// moves the iterator to the next object field which is an actual pointer +void obj_next_ptr_field_iterator (obj_field_iterator *it); + +// returns if we are done iterating over fields of the object +bool field_is_done_iterator (obj_field_iterator *it); + +// ptr is pointer to the actual object content, returns pointer to the very beginning of the object (header) +void *get_obj_header_ptr (void *ptr); +void *get_object_content_ptr (void *header_ptr); +void *get_end_of_obj (void *header_ptr); + +void *alloc_string (int len); +void *alloc_array (int len); +void *alloc_sexp (int members); +void *alloc_closure (int captured); + +#endif diff --git a/byterun/include/operations.h b/byterun/include/operations.h index 0cb4bed37..f0a181a01 100644 --- a/byterun/include/operations.h +++ b/byterun/include/operations.h @@ -1,7 +1,7 @@ #pragma once -#include "../../runtime/gc.h" -#include "../../runtime/runtime.h" +#include "gc.h" +#include "runtime.h" #include "types.h" #include "stdlib.h" @@ -16,18 +16,16 @@ inline void free_var(union VarT var) { break; case INT_T: break; - case CONST_STR_T: + case BOX_T: + // pointer, do not free original object break; case STR_T: - free(var.str.value); + if (dh_param(var.str.data_header)) { // not const string + // free(var.str.value); // FIXME + } break; case CLOJURE_T: - if (var.list.value != NULL) { - free_var_ptr(to_var(var.list.value)); - } - if (var.list.next != NULL) { - free_var_ptr(to_var(var.list.next)); - } + // TODO break; case ARRAY_T: // dh param is size @@ -38,8 +36,11 @@ inline void free_var(union VarT var) { break; case SEXP_T: // tag is const string, no need to free - if (var.sexp.next != NULL) { - // free(var.sexp.next); // FIXME + if (var.sexp.values != NULL) { + for (size_t i = 0; i < dh_param(var.sexp.data_header); ++i) { + free_var_ptr(to_var(var.sexp.values[i])); + } + // free(var.sexp.values); // FIXME } break; case FUN_T: @@ -55,17 +56,20 @@ inline void free_var_ptr(union VarT *var) { // -inline struct NilT clear_var() { return NilT{.data_header = NIL_T}; } +inline struct NilT clear_var() { + struct NilT var = {.data_header = NIL_T}; + return var; +} // ------ put on stack --- inline void s_put_ptr(struct State *s, char *val) { // any var - *s->vp = (NilT *)val; + *s->vp = (struct NilT *)val; ++s->vp; } inline void s_put_var_ptr(struct State *s, struct NilT **val) { // any var - *s->vp = (NilT *)val; + *s->vp = (struct NilT *)val; ++s->vp; } @@ -75,7 +79,7 @@ inline void s_put_var(struct State *s, struct NilT *val) { // any var } inline void s_put_nil(struct State *s) { - struct NilT *var = (NilT *)alloc(sizeof(NilT)); + struct NilT *var = (struct NilT *)alloc(sizeof(struct NilT)); var->data_header = NIL_T; // no param s_put_var(s, var); } @@ -87,28 +91,35 @@ inline void s_putn_nil(struct State *s, size_t n) { } inline void s_put_i(struct State *s, int val) { - struct IntT *var = (IntT *)alloc(sizeof(IntT)); + struct IntT *var = (struct IntT *)alloc(sizeof(struct IntT)); var->data_header = INT_T; // no param var->value = val; - s_put_var(s, (NilT *)var); + s_put_var(s, (struct NilT *)var); +} + +inline void s_put_box(struct State *s, struct NilT **val) { + struct BoxT *var = (struct BoxT *)alloc(sizeof(struct BoxT)); + var->data_header = BOX_T; // no param + var->value = val; + s_put_var(s, (struct NilT *)var); } inline void s_put_const_str(struct State *s, const char *val) { - struct ConstStrT *var = (ConstStrT *)alloc(sizeof(ConstStrT)); - var->data_header = CONST_STR_T; // no param + struct StrT *var = (struct StrT *)alloc(sizeof(struct StrT)); + var->data_header = 0 & STR_T; // param - is const var->value = val; - s_put_var(s, (NilT *)var); + s_put_var(s, (struct NilT *)var); } inline void s_put_str(struct State *s, char *val) { - struct StrT *var = (StrT *)alloc(sizeof(StrT)); - var->data_header = STR_T; // no param + struct StrT *var = (struct StrT *)alloc(sizeof(struct StrT)); + var->data_header = 1 & STR_T; // param - is not const var->value = val; - s_put_var(s, (NilT *)var); + s_put_var(s, (struct NilT *)var); } inline void s_put_array(struct State *s, int sz) { - struct ArrayT *var = (ArrayT *)alloc(sizeof(ArrayT)); + struct ArrayT *var = (struct ArrayT *)alloc(sizeof(struct ArrayT)); if (sz < 0) { failure("array size < 0"); @@ -119,17 +130,17 @@ inline void s_put_array(struct State *s, int sz) { } var->data_header = sz & ARRAY_T; - var->values = (NilT **)alloc(sizeof(NilT *) * sz); + var->values = (struct NilT **)alloc(sizeof(struct NilT *) * sz); for (size_t i = 0; i < sz; ++i) { var->values[i] = NULL; } - s_put_var(s, (NilT *)var); + s_put_var(s, (struct NilT *)var); } inline union VarT *s_take_var(struct State *s); inline void s_put_sexp(struct State *s, const char *tag, int sz) { - struct SExpT *var = (SExpT *)alloc(sizeof(SExpT)); + struct SExpT *var = (struct SExpT *)alloc(sizeof(struct SExpT)); if (sz < 0) { failure("array size < 0"); @@ -140,14 +151,14 @@ inline void s_put_sexp(struct State *s, const char *tag, int sz) { } var->data_header = sz & SEXP_T; - var->values = (NilT **)alloc(sizeof(NilT *) * sz); + var->values = (struct NilT **)alloc(sizeof(struct NilT *) * sz); var->tag = tag; for (size_t i = 0; i < sz; ++i) { - var->values[i] = (NilT *)s_take_var(s); + var->values[i] = (struct NilT *)s_take_var(s); } - s_put_var(s, (NilT *)var); + s_put_var(s, (struct NilT *)var); } // inline void s_put_empty_list(struct State *s, struct NilT *first_elem) { @@ -156,15 +167,11 @@ inline void s_put_sexp(struct State *s, const char *tag, int sz) { // var->value = first_elem; // var->next = NULL; -// s_put_var(s, (NilT *)var); +// s_put_var(s, (struct NilT *)var); // *first_elem = clear_var(); // } -inline void s_put_sexp(struct State *s, , int args_sz) { - // TODO FIXME -} - // ------ take from stack ------ inline union VarT *s_take_var(struct State *s) { @@ -173,7 +180,7 @@ inline union VarT *s_take_var(struct State *s) { } --s->vp; - union VarT *ret = (VarT *)*s->vp; + union VarT *ret = (union VarT *)*s->vp; *s->vp = NULL; // clear top var return ret; } @@ -191,7 +198,7 @@ inline void s_drop_var(struct State *s) { failure("drop: no var"); } --s->vp; - free_var_ptr((VarT *)*s->vp); + free_var_ptr((union VarT *)*s->vp); *s->vp = NULL; } @@ -210,11 +217,11 @@ inline void s_dropn_var(struct State *s, size_t n) { // before / after new frame added inline void s_enter_f(struct State *s, char *func_ip, size_t params_sz, size_t locals_sz) { - if (params_sz > s->vp - s->stack or - (s->fp != NULL and params_sz > s->vp - s->fp->end)) { + if (params_sz > s->vp - s->stack || + (s->fp != NULL && params_sz > s->vp - s->fp->end)) { failure("not enough parameters in stack"); } - size_t frame_sz_in_ptr = sizeof(Frame) / sizeof(void *); + size_t frame_sz_in_ptr = sizeof(struct Frame) / sizeof(void *); struct Frame frame = { .ret = NULL, // field in frame itself .rp = s->ip, @@ -225,7 +232,7 @@ inline void s_enter_f(struct State *s, char *func_ip, size_t params_sz, }; // put frame on stack - s->fp = (Frame *)s->vp; + s->fp = (struct Frame *)s->vp; (*s->fp) = frame; // update stack pointer @@ -261,7 +268,7 @@ inline void s_exit_f(struct State *s) { inline union VarT **var_by_category(struct State *s, enum VarCategory category, int id) { - VarT **var = NULL; + union VarT **var = NULL; switch (category) { case VAR_GLOBAL: // TODO: FIXME @@ -277,7 +284,7 @@ inline union VarT **var_by_category(struct State *s, enum VarCategory category, failure("can't read local: too big id, %i >= %ul", frame_locals_sz(s->fp), id); } - var = (VarT **)&s->fp->locals[id]; + var = (union VarT **)&s->fp->locals[id]; break; case VAR_A: // TODO diff --git a/byterun/include/runtime.h b/byterun/include/runtime.h new file mode 100644 index 000000000..43ec9a50b --- /dev/null +++ b/byterun/include/runtime.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WORD_SIZE (CHAR_BIT * sizeof(int)) + +inline void vfailure(char *s, va_list args) { + fprintf(stderr, "*** FAILURE: "); + vfprintf(stderr, s, + args); // vprintf (char *, va_list) <-> printf (char *, ...) + exit(255); +} + +inline void failure(char *s, ...) { + va_list args; + + va_start(args, s); + vfailure(s, args); +} diff --git a/byterun/include/runtime_common.h b/byterun/include/runtime_common.h new file mode 100644 index 000000000..9ebae0720 --- /dev/null +++ b/byterun/include/runtime_common.h @@ -0,0 +1,73 @@ +#ifndef __LAMA_RUNTIME_COMMON__ +#define __LAMA_RUNTIME_COMMON__ +#include + +// this flag makes GC behavior a bit different for testing purposes. +//#define DEBUG_VERSION +//#define FULL_INVARIANT_CHECKS + +#define STRING_TAG 0x00000001 +#define ARRAY_TAG 0x00000003 +#define SEXP_TAG 0x00000005 +#define CLOSURE_TAG 0x00000007 +#define UNBOXED_TAG 0x00000009 // Not actually a data_header; used to return from LkindOf + +#define LEN(x) ((x & 0xFFFFFFF8) >> 3) +#define TAG(x) (x & 0x00000007) + +#define SEXP_ONLY_HEADER_SZ (sizeof(int)) + +#ifndef DEBUG_VERSION +# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(int)) +#else +# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(size_t) + sizeof(int)) +#endif + +#define MEMBER_SIZE sizeof(int) + +#define TO_DATA(x) ((data *)((char *)(x) - DATA_HEADER_SZ)) +#define TO_SEXP(x) ((sexp *)((char *)(x) - DATA_HEADER_SZ)) + +#define UNBOXED(x) (((int)(x)) & 0x0001) +#define UNBOX(x) (((int)(x)) >> 1) +#define BOX(x) ((((int)(x)) << 1) | 0x0001) + +#define BYTES_TO_WORDS(bytes) (((bytes) - 1) / sizeof(size_t) + 1) +#define WORDS_TO_BYTES(words) ((words) * sizeof(size_t)) + +// CAREFUL WITH DOUBLE EVALUATION! +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +typedef struct { + // store tag in the last three bits to understand what structure this is, other bits are filled with + // other utility info (i.e., size for array, number of fields for s-expression) + int data_header; + +#ifdef DEBUG_VERSION + size_t id; +#endif + + // last bit is used as MARK-BIT, the rest are used to store address where object should move + // last bit can be used because due to alignment we can assume that last two bits are always 0's + size_t forward_address; + char contents[0]; +} data; + +typedef struct { + // store tag in the last three bits to understand what structure this is, other bits are filled with + // other utility info (i.e., size for array, number of fields for s-expression) + int data_header; + +#ifdef DEBUG_VERSION + size_t id; +#endif + + // last bit is used as MARK-BIT, the rest are used to store address where object should move + // last bit can be used because due to alignment we can assume that last two bits are always 0's + size_t forward_address; + int tag; + int contents[0]; +} sexp; + +#endif diff --git a/byterun/include/types.h b/byterun/include/types.h index 24be61467..12305aecc 100644 --- a/byterun/include/types.h +++ b/byterun/include/types.h @@ -1,7 +1,7 @@ #pragma once -#include "../../runtime/runtime.h" #include "parser.h" +#include "runtime.h" #include // ------ Var ------ @@ -9,7 +9,7 @@ enum Type { NIL_T = 0x00000000, INT_T = 0x00000001, - CONST_STR_T = 0x00000002, + BOX_T = 0x00000002, STR_T = 0x00000003, CLOJURE_T = 0x00000004, ARRAY_T = 0x00000005, @@ -23,24 +23,24 @@ struct NilT { // AnyVarT too struct IntT { uint32_t data_header; - int32_t value; // int value => size = 1; + int32_t value; }; -struct ConstStrT { +struct BoxT { uint32_t data_header; - const char *value; + struct NilT **value; }; struct StrT { - uint32_t data_header; - char *value; + uint32_t data_header; // param - is not const (0 for const, 1 for not const) + const char *value; }; struct ClojureT { // TODO uint32_t data_header; char *fun_ip; struct ArrayT *vars; -} +}; // struct ListT { // uint32_t data_header; @@ -68,7 +68,7 @@ struct FunT { union VarT { struct NilT nil; struct IntT int_t; - struct ConstStrT const_str; + struct BoxT box; struct StrT str; struct ClojureT clojure; // struct ListT list; @@ -79,7 +79,7 @@ union VarT { // same to TAG in runtime inline enum Type dh_type(int data_header) { - return (Type)(data_header & 0x00000007); + return (enum Type)(data_header & 0x00000007); } // same to LEN in runtime @@ -141,5 +141,5 @@ inline enum VarCategory to_var_category(uint8_t category) { if (category > 3) { failure("unexpected variable category"); } - return (VarCategory)category; + return (enum VarCategory)category; } diff --git a/byterun/include/utils.h b/byterun/include/utils.h deleted file mode 100644 index 6f70f09be..000000000 --- a/byterun/include/utils.h +++ /dev/null @@ -1 +0,0 @@ -#pragma once diff --git a/byterun/src/cli.c b/byterun/src/cli.c new file mode 100644 index 000000000..c63092638 --- /dev/null +++ b/byterun/src/cli.c @@ -0,0 +1,21 @@ +#include "interpreter.h" +#include "parser.h" +#include "runtime.h" + +int main(int argc, char** argv) { + if (argc < 2) { + failure("no file name provided"); + } + if (argc > 2) { + failure("too many arguments"); + } + + + bytefile *f = read_file (argv[1]); + run(f); +// dump_file (stdout, f); + + free(f); + + return 0; +} diff --git a/byterun/src/gc.c b/byterun/src/gc.c new file mode 100644 index 000000000..829a82077 --- /dev/null +++ b/byterun/src/gc.c @@ -0,0 +1,922 @@ +#define _GNU_SOURCE 1 + +#include "gc.h" + +#include "runtime_common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const size_t INIT_HEAP_SIZE = MINIMUM_HEAP_CAPACITY; + +#ifdef DEBUG_VERSION +size_t cur_id = 0; +#endif + +static extra_roots_pool extra_roots; + +size_t __gc_stack_top = 0, __gc_stack_bottom = 0; +#ifdef LAMA_ENV +extern const size_t __start_custom_data, __stop_custom_data; +#endif + +#ifdef DEBUG_VERSION +memory_chunk heap; +#else +static memory_chunk heap; +#endif + +#ifdef DEBUG_VERSION +void dump_heap (); +#endif + +void handler (int sig) { + void *array[10]; + int size; + + // get void*'s for all entries on the stack + size = backtrace(array, 10); + fprintf(stderr, "heap size is %zu\n", heap.size); + backtrace_symbols_fd(array, size, STDERR_FILENO); + exit(1); +} + +void *alloc (size_t size) { +#ifdef DEBUG_VERSION + ++cur_id; +#endif + size_t bytes_sz = size; + size = BYTES_TO_WORDS(size); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz); +#endif + void *p = gc_alloc_on_existing_heap(size); + if (!p) { + // not enough place in the heap, need to perform GC cycle + p = gc_alloc(size); + } + return p; +} + +#ifdef FULL_INVARIANT_CHECKS + +// precondition: obj_content is a valid address pointing to the content of an object +static void print_object_info (FILE *f, void *obj_content) { + data *d = TO_DATA(obj_content); + size_t obj_tag = TAG(d->data_header); + size_t obj_id = d->id; + fprintf(f, "id %zu tag %zu | ", obj_id, obj_tag); +} + +static void print_unboxed (FILE *f, int unboxed) { fprintf(f, "unboxed %zu | ", unboxed); } + +static FILE *print_stack_content (char *filename) { + FILE *f = fopen(filename, "w+"); + ftruncate(fileno(f), 0); + fprintf(f, "Stack content:\n"); + for (size_t *stack_ptr = (size_t *)((void *)__gc_stack_top + 4); + stack_ptr < (size_t *)__gc_stack_bottom; + ++stack_ptr) { + size_t value = *stack_ptr; + if (is_valid_heap_pointer((size_t *)value)) { + fprintf(f, "%p, ", (void *)value); + print_object_info(f, (void *)value); + } else { + print_unboxed(f, (int)value); + } + fprintf(f, "\n"); + } + fprintf(f, "Stack content end.\n"); + return f; +} + +// precondition: obj_content is a valid address pointing to the content of an object +static void objects_dfs (FILE *f, void *obj_content) { + void *obj_header = get_obj_header_ptr(obj_content); + data *obj_data = TO_DATA(obj_content); + // internal mark-bit for this dfs, should be recovered by the caller + if ((obj_data->forward_address & 2) != 0) { return; } + // set this bit as 1 + obj_data->forward_address |= 2; + fprintf(f, "object at addr %p: ", obj_content); + print_object_info(f, obj_content); + /*fprintf(f, "object id: %zu | ", obj_data->id);*/ + // first cycle: print object's fields + for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header); + !field_is_done_iterator(&field_it); + obj_next_field_iterator(&field_it)) { + size_t field_value = *(size_t *)field_it.cur_field; + if (is_valid_heap_pointer((size_t *)field_value)) { + print_object_info(f, (void *)field_value); + /*fprintf(f, "%zu ", TO_DATA(field_value)->id);*/ + } else { + print_unboxed(f, (int)field_value); + } + } + fprintf(f, "\n"); + for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header); + !field_is_done_iterator(&field_it); + obj_next_field_iterator(&field_it)) { + size_t field_value = *(size_t *)field_it.cur_field; + if (is_valid_heap_pointer((size_t *)field_value)) { objects_dfs(f, (void *)field_value); } + } +} + +FILE *print_objects_traversal (char *filename, bool marked) { + FILE *f = fopen(filename, "w+"); + ftruncate(fileno(f), 0); + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it)) { + void *obj_header = it.current; + data *obj_data = TO_DATA(get_object_content_ptr(obj_header)); + if ((obj_data->forward_address & 1) == marked) { + objects_dfs(f, get_object_content_ptr(obj_header)); + } + } + + // resetting bit that represent mark-bit for this internal dfs-traversal + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it)) { + void *obj_header = it.current; + data *obj_data = TO_DATA(get_object_content_ptr(obj_header)); + obj_data->forward_address &= (~2); + } + fflush(f); + + // print extra roots + for (int i = 0; i < extra_roots.current_free; i++) { + fprintf(f, "extra root %p %p: ", extra_roots.roots[i], *(size_t **)extra_roots.roots[i]); + } + fflush(f); + return f; +} + +int files_cmp (FILE *f1, FILE *f2) { + int symbol1, symbol2; + int position = 0; + while (true) { + symbol1 = fgetc(f1); + symbol2 = fgetc(f2); + if (symbol1 == EOF && symbol2 == EOF) { return -1; } + if (symbol1 != symbol2) { return position; } + ++position; + } +} + +#endif + +void *gc_alloc_on_existing_heap (size_t size) { + if (heap.current + size <= heap.end) { + void *p = (void *)heap.current; + heap.current += size; + memset(p, 0, size * sizeof(size_t)); + return p; + } + return NULL; +} + +void *gc_alloc (size_t size) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "===============================GC cycle has started\n"); +#endif +#ifdef FULL_INVARIANT_CHECKS + FILE *stack_before = print_stack_content("stack-dump-before-compaction"); + FILE *heap_before = print_objects_traversal("before-mark", 0); + fclose(heap_before); +#endif + mark_phase(); +#ifdef FULL_INVARIANT_CHECKS + FILE *heap_before_compaction = print_objects_traversal("after-mark", 1); +#endif + + compact_phase(size); +#ifdef FULL_INVARIANT_CHECKS + FILE *stack_after = print_stack_content("stack-dump-after-compaction"); + FILE *heap_after_compaction = print_objects_traversal("after-compaction", 0); + + int pos = files_cmp(stack_before, stack_after); + if (pos >= 0) { // position of difference is found + fprintf(stderr, "Stack is modified incorrectly, see position %d\n", pos); + exit(1); + } + fclose(stack_before); + fclose(stack_after); + pos = files_cmp(heap_before_compaction, heap_after_compaction); + if (pos >= 0) { // position of difference is found + fprintf(stderr, "GC invariant is broken, pos is %d\n", pos); + exit(1); + } + fclose(heap_before_compaction); + fclose(heap_after_compaction); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "===============================GC cycle has finished\n"); +#endif + return gc_alloc_on_existing_heap(size); +} + +static void gc_root_scan_stack () { + for (size_t *p = (size_t *)(__gc_stack_top + 4); p < (size_t *)__gc_stack_bottom; ++p) { + gc_test_and_mark_root((size_t **)p); + } +} + +void mark_phase (void) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "marking has started\n"); + fprintf(stderr, + "gc_root_scan_stack has started: gc_top=%p bot=%p\n", + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +#endif + gc_root_scan_stack(); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "gc_root_scan_stack has finished\n"); + fprintf(stderr, "scan_extra_roots has started\n"); +#endif + scan_extra_roots(); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "scan_extra_roots has finished\n"); + fprintf(stderr, "scan_global_area has started\n"); +#endif +#ifdef LAMA_ENV + scan_global_area(); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "scan_global_area has finished\n"); + fprintf(stderr, "marking has finished\n"); +#endif +} + +void compact_phase (size_t additional_size) { + size_t live_size = compute_locations(); + + // all in words + size_t next_heap_size = + MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY); + size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size); + + memory_chunk old_heap = heap; + heap.begin = mremap( + heap.begin, WORDS_TO_BYTES(heap.size), WORDS_TO_BYTES(next_heap_pseudo_size), MREMAP_MAYMOVE); + if (heap.begin == MAP_FAILED) { + perror("ERROR: compact_phase: mremap failed\n"); + exit(1); + } + heap.end = heap.begin + next_heap_pseudo_size; + heap.size = next_heap_pseudo_size; + heap.current = heap.begin + (old_heap.current - old_heap.begin); + + update_references(&old_heap); + physically_relocate(&old_heap); + + heap.current = heap.begin + live_size; +} + +size_t compute_locations () { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC compute_locations started\n"); +#endif + size_t *free_ptr = heap.begin; + heap_iterator scan_iter = heap_begin_iterator(); + + for (; !heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) { + void *header_ptr = scan_iter.current; + void *obj_content = get_object_content_ptr(header_ptr); + if (is_marked(obj_content)) { + size_t sz = BYTES_TO_WORDS(obj_size_header_ptr(header_ptr)); + // forward address is responsible for object header pointer + set_forward_address(obj_content, (size_t)free_ptr); + free_ptr += sz; + } + } + +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC compute_locations finished\n"); +#endif + // it will return number of words + return free_ptr - heap.begin; +} + +void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC scan_and_fix_region started\n"); +#endif + for (size_t *ptr = (size_t *)start; ptr < (size_t *)end; ++ptr) { + size_t ptr_value = *ptr; + // this can't be expressed via is_valid_heap_pointer, because this pointer may point area corresponding to the old + // heap + if (is_valid_pointer((size_t *)ptr_value) && (size_t)old_heap->begin <= ptr_value + && ptr_value <= (size_t)old_heap->current) { + void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin); + void *new_addr = + (void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin); + size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr)); + *(void **)ptr = new_addr + content_offset; + } + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC scan_and_fix_region finished\n"); +#endif +} + +void scan_and_fix_region_roots (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "extra roots started: number of extra roots %i\n", extra_roots.current_free); +#endif + for (int i = 0; i < extra_roots.current_free; i++) { + size_t *ptr = (size_t *)extra_roots.roots[i]; + size_t ptr_value = *ptr; + if (!is_valid_pointer((size_t *)ptr_value)) { continue; } + // skip this one since it was already fixed from scanning the stack + if ((extra_roots.roots[i] >= (void **)__gc_stack_top + && extra_roots.roots[i] < (void **)__gc_stack_bottom) +#ifdef LAMA_ENV + || (extra_roots.roots[i] <= (void **)&__stop_custom_data + && extra_roots.roots[i] >= (void **)&__start_custom_data) +#endif + ) { +#ifdef DEBUG_VERSION + if (is_valid_heap_pointer((size_t *)ptr_value)) { +# ifdef DEBUG_PRINT + fprintf(stderr, + "|\tskip extra root: %p (%p), since it points to Lama's stack top=%p bot=%p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +# endif + } +# ifdef LAMA_ENV + else if ((extra_roots.roots[i] <= (void *)&__stop_custom_data + && extra_roots.roots[i] >= (void *)&__start_custom_data)) { + fprintf( + stderr, + "|\tskip extra root: %p (%p), since it points to Lama's static area stop=%p start=%p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)&__stop_custom_data, + (void *)&__start_custom_data); + exit(1); + } +# endif + else { +# ifdef DEBUG_PRINT + fprintf(stderr, + "|\tskip extra root: %p (%p): not a valid Lama pointer \n", + extra_roots.roots[i], + (void *)ptr_value); +# endif + } +#endif + continue; + } + if ((size_t)old_heap->begin <= ptr_value && ptr_value <= (size_t)old_heap->current) { + void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin); + void *new_addr = + (void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin); + size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr)); + *(void **)ptr = new_addr + content_offset; +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, + "|\textra root (%p) %p -> %p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)*ptr); +#endif + } + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "|\textra roots finished\n"); +#endif +} + +void update_references (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC update_references started\n"); +#endif + heap_iterator it = heap_begin_iterator(); + while (!heap_is_done_iterator(&it)) { + if (is_marked(get_object_content_ptr(it.current))) { + for (obj_field_iterator field_iter = ptr_field_begin_iterator(it.current); + !field_is_done_iterator(&field_iter); + obj_next_ptr_field_iterator(&field_iter)) { + + size_t *field_value = *(size_t **)field_iter.cur_field; + if (field_value < old_heap->begin || field_value > old_heap->current) { continue; } + // this pointer should also be modified according to old_heap->begin + void *field_obj_content_addr = + (void *)heap.begin + (*(void **)field_iter.cur_field - (void *)old_heap->begin); + // important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk + void *new_addr = + heap.begin + + ((size_t *)get_forward_address(field_obj_content_addr) - (size_t *)old_heap->begin); + // update field reference to point to new_addr + // since, we want fields to point to an actual content, we need to add this extra content_offset + // because forward_address itself is a pointer to the object's header + size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content_addr)); +#ifdef DEBUG_VERSION + if (!is_valid_heap_pointer((void *)(new_addr + content_offset))) { +# ifdef DEBUG_PRINT + fprintf(stderr, + "ur: incorrect pointer assignment: on object with id %d", + TO_DATA(get_object_content_ptr(it.current))->id); +# endif + exit(1); + } +#endif + *(void **)field_iter.cur_field = new_addr + content_offset; + } + } + heap_next_obj_iterator(&it); + } + // fix pointers from stack + scan_and_fix_region(old_heap, (void *)__gc_stack_top + 4, (void *)__gc_stack_bottom + 4); + + // fix pointers from extra_roots + scan_and_fix_region_roots(old_heap); + +#ifdef LAMA_ENV + assert((void *)&__stop_custom_data >= (void *)&__start_custom_data); + scan_and_fix_region(old_heap, (void *)&__start_custom_data, (void *)&__stop_custom_data); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC update_references finished\n"); +#endif +} + +void physically_relocate (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC physically_relocate started\n"); +#endif + heap_iterator from_iter = heap_begin_iterator(); + + while (!heap_is_done_iterator(&from_iter)) { + void *obj = get_object_content_ptr(from_iter.current); + heap_iterator next_iter = from_iter; + heap_next_obj_iterator(&next_iter); + if (is_marked(obj)) { + // Move the object from its old location to its new location relative to + // the heap's (possibly new) location, 'to' points to future object header + size_t *to = heap.begin + ((size_t *)get_forward_address(obj) - (size_t *)old_heap->begin); + memmove(to, from_iter.current, obj_size_header_ptr(from_iter.current)); + unmark_object(get_object_content_ptr(to)); + } + from_iter = next_iter; + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC physically_relocate finished\n"); +#endif +} + +inline bool is_valid_heap_pointer (const size_t *p) { + return !UNBOXED(p) && (size_t)heap.begin <= (size_t)p && (size_t)p <= (size_t)heap.current; +} + +static inline bool is_valid_pointer (const size_t *p) { return !UNBOXED(p); } + +static inline void queue_enqueue (heap_iterator *tail_iter, void *obj) { + void *tail = tail_iter->current; + void *tail_content = get_object_content_ptr(tail); + set_forward_address(tail_content, (size_t)obj); + make_enqueued(obj); + heap_next_obj_iterator(tail_iter); +} + +static inline void *queue_dequeue (heap_iterator *head_iter) { + void *head = head_iter->current; + void *head_content = get_object_content_ptr(head); + void *value = (void *)get_forward_address(head_content); + make_dequeued(value); + heap_next_obj_iterator(head_iter); + return value; +} + +void mark (void *obj) { + if (!is_valid_heap_pointer(obj) || is_marked(obj)) { return; } + + // TL;DR: [q_head_iter, q_tail_iter) q_head_iter -- current dequeue's victim, q_tail_iter -- place for next enqueue + // in forward_address of corresponding element we store address of element to be removed after dequeue operation + heap_iterator q_head_iter = heap_begin_iterator(); + // iterator where we will write address of the element that is going to be enqueued + heap_iterator q_tail_iter = q_head_iter; + queue_enqueue(&q_tail_iter, obj); + + // invariant: queue contains only objects that are valid heap pointers (each corresponding to content of unmarked + // object) also each object is in queue only once + while (q_head_iter.current != q_tail_iter.current) { + // while the queue is non-empty + void *cur_obj = queue_dequeue(&q_head_iter); + mark_object(cur_obj); + void *header_ptr = get_obj_header_ptr(cur_obj); + for (obj_field_iterator ptr_field_it = ptr_field_begin_iterator(header_ptr); + !field_is_done_iterator(&ptr_field_it); + obj_next_ptr_field_iterator(&ptr_field_it)) { + void *field_value = *(void **)ptr_field_it.cur_field; + if (!is_valid_heap_pointer(field_value) || is_marked(field_value) + || is_enqueued(field_value)) { + continue; + } + // if we came to this point it must be true that field_value is unmarked and not currently in queue + // thus, we maintain the invariant + queue_enqueue(&q_tail_iter, field_value); + } + } +} + +void scan_extra_roots (void) { + for (int i = 0; i < extra_roots.current_free; ++i) { + // this dereferencing is safe since runtime is pushing correct pointers into extra_roots + mark(*extra_roots.roots[i]); + } +} + +#ifdef LAMA_ENV +void scan_global_area (void) { + // __start_custom_data is pointing to beginning of global area, thus all dereferencings are safe + for (size_t *ptr = (size_t *)&__start_custom_data; ptr < (size_t *)&__stop_custom_data; ++ptr) { + mark(*(void **)ptr); + } +} +#endif + +extern void gc_test_and_mark_root (size_t **root) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, + "\troot = %p (%p), stack addresses: [%p, %p)\n", + root, + *root, + (void *)__gc_stack_top + 4, + (void *)__gc_stack_bottom); +#endif + mark((void *)*root); +} + +void __gc_init (void) { + __gc_stack_bottom = (size_t)__builtin_frame_address(1) + 4; + __init(); +} + +void __init (void) { + signal(SIGSEGV, handler); + size_t space_size = INIT_HEAP_SIZE * sizeof(size_t); + + srandom(time(NULL)); + + heap.begin = mmap( + NULL, space_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + if (heap.begin == MAP_FAILED) { + perror("ERROR: __init: mmap failed\n"); + exit(1); + } + heap.end = heap.begin + INIT_HEAP_SIZE; + heap.size = INIT_HEAP_SIZE; + heap.current = heap.begin; + clear_extra_roots(); +} + +extern void __shutdown (void) { + munmap(heap.begin, heap.size); +#ifdef DEBUG_VERSION + cur_id = 0; +#endif + heap.begin = NULL; + heap.end = NULL; + heap.size = 0; + heap.current = NULL; + __gc_stack_top = 0; + __gc_stack_bottom = 0; +} + +void clear_extra_roots (void) { extra_roots.current_free = 0; } + +void push_extra_root (void **p) { + if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) { + perror("ERROR: push_extra_roots: extra_roots_pool overflow\n"); + exit(1); + } + assert(p >= (void **)__gc_stack_top || p < (void **)__gc_stack_bottom); + extra_roots.roots[extra_roots.current_free] = p; + extra_roots.current_free++; +} + +void pop_extra_root (void **p) { + if (extra_roots.current_free == 0) { + perror("ERROR: pop_extra_root: extra_roots are empty\n"); + exit(1); + } + extra_roots.current_free--; + if (extra_roots.roots[extra_roots.current_free] != p) { + perror("ERROR: pop_extra_root: stack invariant violation\n"); + exit(1); + } +} + +/* Functions for tests */ + +#if defined(DEBUG_VERSION) +size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size) { + size_t *ids_ptr = (size_t *)object_ids_buf; + size_t i = 0; + for (heap_iterator it = heap_begin_iterator(); + !heap_is_done_iterator(&it) && i < object_ids_buf_size; + heap_next_obj_iterator(&it), ++i) { + void *header_ptr = it.current; + data *d = TO_DATA(get_object_content_ptr(header_ptr)); + ids_ptr[i] = d->id; + } + return i; +} +#endif + +#ifdef DEBUG_VERSION +extern char *de_hash (int); + +void dump_heap () { + size_t i = 0; + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it), ++i) { + void *header_ptr = it.current; + void *content_ptr = get_object_content_ptr(header_ptr); + data *d = TO_DATA(content_ptr); + lama_type t = get_type_header_ptr(header_ptr); + switch (t) { + case ARRAY: fprintf(stderr, "of kind ARRAY\n"); break; + case CLOSURE: fprintf(stderr, "of kind CLOSURE\n"); break; + case STRING: fprintf(stderr, "of kind STRING\n"); break; + case SEXP: + fprintf(stderr, "of kind SEXP with tag %s\n", de_hash(TO_SEXP(content_ptr)->tag)); + break; + } + } +} + +void set_stack (size_t stack_top, size_t stack_bottom) { + __gc_stack_top = stack_top; + __gc_stack_bottom = stack_bottom; +} + +void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr) { + memcpy(extra_roots.roots, extra_roots_ptr, MIN(sizeof(extra_roots.roots), extra_roots_size)); + clear_extra_roots(); +} + +#endif + +/* Utility functions */ + +size_t get_forward_address (void *obj) { + data *d = TO_DATA(obj); + return GET_FORWARD_ADDRESS(d->forward_address); +} + +void set_forward_address (void *obj, size_t addr) { + data *d = TO_DATA(obj); + SET_FORWARD_ADDRESS(d->forward_address, addr); +} + +bool is_marked (void *obj) { + data *d = TO_DATA(obj); + int mark_bit = GET_MARK_BIT(d->forward_address); + return mark_bit; +} + +void mark_object (void *obj) { + data *d = TO_DATA(obj); + SET_MARK_BIT(d->forward_address); +} + +void unmark_object (void *obj) { + data *d = TO_DATA(obj); + RESET_MARK_BIT(d->forward_address); +} + +bool is_enqueued (void *obj) { + data *d = TO_DATA(obj); + return IS_ENQUEUED(d->forward_address) != 0; +} + +void make_enqueued (void *obj) { + data *d = TO_DATA(obj); + MAKE_ENQUEUED(d->forward_address); +} + +void make_dequeued (void *obj) { + data *d = TO_DATA(obj); + MAKE_DEQUEUED(d->forward_address); +} + +heap_iterator heap_begin_iterator () { + heap_iterator it = {.current = heap.begin}; + return it; +} + +void heap_next_obj_iterator (heap_iterator *it) { + void *ptr = it->current; + size_t obj_size = obj_size_header_ptr(ptr); + // make sure we take alignment into consideration + obj_size = BYTES_TO_WORDS(obj_size); + it->current += obj_size; +} + +bool heap_is_done_iterator (heap_iterator *it) { return it->current >= heap.current; } + +lama_type get_type_row_ptr (void *ptr) { + data *data_ptr = TO_DATA(ptr); + return get_type_header_ptr(data_ptr); +} + +lama_type get_type_header_ptr (void *ptr) { + int *header = (int *)ptr; + switch (TAG(*header)) { + case ARRAY_TAG: return ARRAY; + case STRING_TAG: return STRING; + case CLOSURE_TAG: return CLOSURE; + case SEXP_TAG: return SEXP; + default: { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id); + raise(SIGINT); // only for debug purposes +#else +# ifdef FULL_INVARIANT_CHECKS +# ifdef DEBUG_PRINT + fprintf(stderr, + "ERROR: get_type_header_ptr: unknown object header, ptr is %p, tag %i, heap size is " + "%d cur_id=%d stack_top=%p stack_bot=%p ", + ptr, + TAG(*header), + heap.size, + cur_id, + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +# endif + FILE *heap_before_compaction = print_objects_traversal("dump_kill", 1); + fclose(heap_before_compaction); +# endif + kill(getpid(), SIGSEGV); +#endif + exit(1); + } + } +} + +size_t obj_size_row_ptr (void *ptr) { + data *data_ptr = TO_DATA(ptr); + return obj_size_header_ptr(data_ptr); +} + +size_t obj_size_header_ptr (void *ptr) { + int len = LEN(*(int *)ptr); + switch (get_type_header_ptr(ptr)) { + case ARRAY: return array_size(len); + case STRING: return string_size(len); + case CLOSURE: return closure_size(len); + case SEXP: return sexp_size(len); + default: { +#ifdef DEBUG_VERSION + fprintf(stderr, "ERROR: obj_size_header_ptr: unknown object header, cur_id=%d", cur_id); + raise(SIGINT); // only for debug purposes +#else + perror("ERROR: obj_size_header_ptr: unknown object header\n"); +#endif + exit(1); + } + } +} + +size_t array_size (size_t sz) { return get_header_size(ARRAY) + MEMBER_SIZE * sz; } + +size_t string_size (size_t len) { + // string should be null terminated + return get_header_size(STRING) + len + 1; +} + +size_t closure_size (size_t sz) { return get_header_size(CLOSURE) + MEMBER_SIZE * sz; } + +size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * (members + 1); } + +obj_field_iterator field_begin_iterator (void *obj) { + lama_type type = get_type_header_ptr(obj); + obj_field_iterator it = {.type = type, .obj_ptr = obj, .cur_field = get_object_content_ptr(obj)}; + switch (type) { + case STRING: { + it.cur_field = get_end_of_obj(it.obj_ptr); + break; + } + case CLOSURE: + case SEXP: { + it.cur_field += MEMBER_SIZE; + break; + } + default: break; + } + return it; +} + +obj_field_iterator ptr_field_begin_iterator (void *obj) { + obj_field_iterator it = field_begin_iterator(obj); + // corner case when obj has no fields + if (field_is_done_iterator(&it)) { return it; } + if (is_valid_pointer(*(size_t **)it.cur_field)) { return it; } + obj_next_ptr_field_iterator(&it); + return it; +} + +void obj_next_field_iterator (obj_field_iterator *it) { it->cur_field += MEMBER_SIZE; } + +void obj_next_ptr_field_iterator (obj_field_iterator *it) { + do { + obj_next_field_iterator(it); + } while (!field_is_done_iterator(it) && !is_valid_pointer(*(size_t **)it->cur_field)); +} + +bool field_is_done_iterator (obj_field_iterator *it) { + return it->cur_field >= get_end_of_obj(it->obj_ptr); +} + +void *get_obj_header_ptr (void *ptr) { + lama_type type = get_type_row_ptr(ptr); + return ptr - get_header_size(type); +} + +void *get_object_content_ptr (void *header_ptr) { + lama_type type = get_type_header_ptr(header_ptr); + return header_ptr + get_header_size(type); +} + +void *get_end_of_obj (void *header_ptr) { return header_ptr + obj_size_header_ptr(header_ptr); } + +size_t get_header_size (lama_type type) { + switch (type) { + case STRING: + case CLOSURE: + case ARRAY: + case SEXP: return DATA_HEADER_SZ; + default: perror("ERROR: get_header_size: unknown object type\n"); +#ifdef DEBUG_VERSION + raise(SIGINT); // only for debug purposes +#endif + exit(1); + } +} + +void *alloc_string (int len) { + data *obj = alloc(string_size(len)); + obj->data_header = STRING_TAG | (len << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [STRING] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} + +void *alloc_array (int len) { + data *obj = alloc(array_size(len)); + obj->data_header = ARRAY_TAG | (len << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [ARRAY] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} + +void *alloc_sexp (int members) { + sexp *obj = alloc(sexp_size(members)); + obj->data_header = SEXP_TAG | (members << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + obj->tag = 0; + return obj; +} + +void *alloc_closure (int captured) { + + data *obj = alloc(closure_size(captured)); + obj->data_header = CLOSURE_TAG | (captured << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [CLOSURE] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} diff --git a/byterun/src/interpreter.c b/byterun/src/interpreter.c index 1de789dd2..ce7291de9 100644 --- a/byterun/src/interpreter.c +++ b/byterun/src/interpreter.c @@ -1,9 +1,9 @@ -#include "../include/interpreter.h" -#include "../include/types.h" -#include "../include/builtin.h" -#include "../include/operations.h" -#include "../../runtime/runtime.h" -#include "../../runtime/gc.h" +#include "interpreter.h" +#include "types.h" +#include "builtin.h" +#include "operations.h" +#include "runtime.h" +#include "gc.h" int ip_read_int(char** ip) { *ip += sizeof(int); diff --git a/byterun/src/parser.c b/byterun/src/parser.c index fda5d8653..906a79349 100644 --- a/byterun/src/parser.c +++ b/byterun/src/parser.c @@ -4,9 +4,9 @@ #include #include -#include "../../runtime/runtime.h" +#include "runtime.h" -#include "../include/parser.h" +#include "parser.h" void *__start_custom_data; void *__stop_custom_data; @@ -284,8 +284,3 @@ void dump_file (FILE *f, bytefile *bf) { disassemble (f, bf); } -int main (int argc, char* argv[]) { - bytefile *f = read_file (argv[1]); - dump_file (stdout, f); - return 0; -} diff --git a/byterun/src/types.c b/byterun/src/types.c index c00df02fd..eb5502c29 100644 --- a/byterun/src/types.c +++ b/byterun/src/types.c @@ -1,4 +1,4 @@ -#include "../include/types.h" +#include "types.h" #include diff --git a/byterun/src/utils.c b/byterun/src/utils.c deleted file mode 100644 index 8d0f08780..000000000 --- a/byterun/src/utils.c +++ /dev/null @@ -1 +0,0 @@ -#include "../include/utils.h" diff --git a/byterun/xmake.lua b/byterun/xmake.lua new file mode 100644 index 000000000..7e66280c0 --- /dev/null +++ b/byterun/xmake.lua @@ -0,0 +1,9 @@ +add_rules("mode.debug", "mode.release") + +set_languages("c23") + +target("byterun") + set_kind("binary") + add_includedirs("include") + add_headerfiles("include/*.h") + add_files("src/*.c")