From 3937ecf3872793a2320e66c7ddbd1e1a2761cd40 Mon Sep 17 00:00:00 2001 From: Egor Sheremetov Date: Wed, 9 Aug 2023 20:16:51 +0200 Subject: [PATCH] Removed extra word in sexp --- runtime/TODO.md | 18 +++++++ runtime/gc.c | 53 +++++++++----------- runtime/runtime.c | 102 ++++++++++++++++++++++----------------- runtime/runtime_common.h | 27 ++++++----- 4 files changed, 114 insertions(+), 86 deletions(-) create mode 100644 runtime/TODO.md diff --git a/runtime/TODO.md b/runtime/TODO.md new file mode 100644 index 000000000..3e67e4019 --- /dev/null +++ b/runtime/TODO.md @@ -0,0 +1,18 @@ +### TODO list + +- [x] Fix heap&stack&extra_roots dump +- [x] Remove extra and dead code +- [x] Debug print -> DEBUG_PRINT mode +- [x] Check `mmap`/`remap`/... +- [x] Check: `__gc_stack_bot`: same issue as `__gc_stack_top`? +- [x] Check: Can we get rid of `__gc_init` (as an assembly (implement in C instead))? (answer: if we make main in which every Lama file is compiled set `__gc_stack_bottom` to current `ebp` then yes, otherwise we need access to registers) +- [x] Check: runtime tags: should always the last bit be 1? (Answer: not really, however, we still need to distinguish between 5 different options (because unboxed values should have its own value to be returned from `LkindOf`)) +- [x] Fix warnings in ML code +- [x] TODO: debug flag doesn't compile +- [x] Sexp: move the tag to be `contents[0]` instead of the word in sexp header; i.e. get rid of sexp as separate data structure +- [ ] Magic constants +- [ ] Normal documentation: a-la doxygen +- [ ] Think: normal debug mode +- [ ] Fix warnings in C code +- [ ] Add more stress tests to `stdlib/regression` and unit tests +- [ ] Modes (like FULL_INVARIANTS) -> separate files \ No newline at end of file diff --git a/runtime/gc.c b/runtime/gc.c index 745f1eeb1..b3f11fcab 100644 --- a/runtime/gc.c +++ b/runtime/gc.c @@ -256,8 +256,6 @@ void compact_phase (size_t additional_size) { MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY); size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size); // this is weird but here is why it happens: - // if we allocate too little heap right now, we may lose access to some alive objects - // however, after we physically relocate all of our objects we will shrink allocated memory if it is possible memory_chunk old_heap = heap; heap.begin = mremap( @@ -273,21 +271,6 @@ void compact_phase (size_t additional_size) { update_references(&old_heap); physically_relocate(&old_heap); - /* // shrink it if possible, otherwise this code won't do anything, in both cases references - // will remain valid - heap.begin = mremap( - heap.begin, - WORDS_TO_BYTES(heap.size), - WORDS_TO_BYTES(next_heap_size), - 0 // in this case we don't set MREMAP_MAYMOVE because it shouldn't move :) - ); - if (heap.begin == MAP_FAILED) { - perror("ERROR: compact_phase: mremap failed\n"); - exit(1); - } - heap.end = heap.begin + next_heap_size; - heap.size = next_heap_size; -*/ heap.current = heap.begin + live_size; } @@ -741,7 +724,7 @@ lama_type get_type_header_ptr (void *ptr) { fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id); raise(SIGINT); // only for debug purposes #else -#ifdef FULL_INVARIANT_CHECKS +# ifdef FULL_INVARIANT_CHECKS fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, ptr is %p, tag %i, heap size is " "%d cur_id=%d stack_top=%p stack_bot=%p ", @@ -753,7 +736,7 @@ lama_type get_type_header_ptr (void *ptr) { (void *)__gc_stack_bottom); FILE *heap_before_compaction = print_objects_traversal("dump_kill", 1); fclose(heap_before_compaction); -#endif +# endif kill(getpid(), SIGSEGV); #endif exit(1); @@ -794,15 +777,23 @@ size_t string_size (size_t len) { size_t closure_size (size_t sz) { return get_header_size(CLOSURE) + MEMBER_SIZE * sz; } -size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * members; } +size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * (members + 1); } obj_field_iterator field_begin_iterator (void *obj) { lama_type type = get_type_header_ptr(obj); obj_field_iterator it = {.type = type, .obj_ptr = obj, .cur_field = get_object_content_ptr(obj)}; - // since string doesn't have any actual fields we set cur_field to the end of object - if (type == STRING) { it.cur_field = get_end_of_obj(it.obj_ptr); } - // skip first member which is basically pointer to the code - if (type == CLOSURE) { it.cur_field += MEMBER_SIZE; } + switch (type) { + case STRING: { + it.cur_field = get_end_of_obj(it.obj_ptr); + break; + } + case CLOSURE: + case SEXP: { + it.cur_field += MEMBER_SIZE; + break; + } + default: break; + } return it; } @@ -843,8 +834,8 @@ size_t get_header_size (lama_type type) { switch (type) { case STRING: case CLOSURE: - case ARRAY: return DATA_HEADER_SZ; - case SEXP: return SEXP_ONLY_HEADER_SZ + DATA_HEADER_SZ; + case ARRAY: + case SEXP: return DATA_HEADER_SZ; default: perror("ERROR: get_header_size: unknown object type\n"); #ifdef DEBUG_VERSION raise(SIGINT); // only for debug purposes @@ -881,15 +872,15 @@ void *alloc_array (int len) { void *alloc_sexp (int members) { sexp *obj = alloc(sexp_size(members)); - obj->sexp_header = obj->contents.data_header = SEXP_TAG | (members << 3); + obj->data_header = SEXP_TAG | (members << 3); #ifdef DEBUG_VERSION - fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->contents.data_header)); + fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->data_header)); #endif #ifdef FULL_INVARIANT_CHECKS - obj->contents.id = cur_id; + obj->id = cur_id; #endif - obj->contents.forward_address = 0; - obj->tag = 0; + obj->forward_address = 0; + obj->tag = 0; return obj; } diff --git a/runtime/runtime.c b/runtime/runtime.c index 667bbf063..79e7e25ae 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -354,7 +354,8 @@ static void printValue (void *p) { switch (TAG(a->data_header)) { case STRING_TAG: printStringBuf("\"%s\"", a->contents); break; - case CLOSURE_TAG: + case CLOSURE_TAG: { + printStringBuf("data_header); i++) { if (i) printValue((void *)((int *)a->contents)[i]); @@ -363,8 +364,8 @@ static void printValue (void *p) { } printStringBuf(">"); break; - - case ARRAY_TAG: + } + case ARRAY_TAG: { printStringBuf("["); for (i = 0; i < LEN(a->data_header); i++) { printValue((void *)((int *)a->contents)[i]); @@ -372,28 +373,31 @@ static void printValue (void *p) { } printStringBuf("]"); break; + } case SEXP_TAG: { - char *tag = de_hash(TO_SEXP(p)->tag); + sexp *sa = (sexp *)a; + char *tag = de_hash(sa->tag); if (strcmp(tag, "cons") == 0) { - data *b = a; + sexp *sb = sa; printStringBuf("{"); - while (LEN(b->data_header)) { - printValue((void *)((int *)b->contents)[0]); - b = (data *)((int *)b->contents)[1]; - if (!UNBOXED(b)) { + while (LEN(sb->data_header)) { + printValue((void *)((int *)sb->contents)[0]); + int list_next = ((int *)sb->contents)[1]; + if (!UNBOXED(list_next)) { printStringBuf(", "); - b = TO_DATA(b); + sb = TO_SEXP(list_next); } else break; } printStringBuf("}"); } else { printStringBuf("%s", tag); + sexp *sexp_a = (sexp *)a; if (LEN(a->data_header)) { printStringBuf(" ("); - for (i = 0; i < LEN(a->data_header); i++) { - printValue((void *)((int *)a->contents)[i]); - if (i != LEN(a->data_header) - 1) printStringBuf(", "); + for (i = 0; i < LEN(sexp_a->data_header); i++) { + printValue((void *)((int *)sexp_a->contents)[i]); + if (i != LEN(sexp_a->data_header) - 1) printStringBuf(", "); } printStringBuf(")"); } @@ -554,9 +558,9 @@ void *Lclone (void *p) { break; case SEXP_TAG: - sobj = (sexp *)alloc_sexp(l); - memcpy(sobj, TO_SEXP(p), sexp_size(l)); - res = (void *)sobj->contents.contents; + obj = (data *)alloc_sexp(l); + memcpy(obj, TO_DATA(p), sexp_size(l)); + res = (void *)obj->contents; break; default: failure("invalid data_header %d in clone *****\n", t); @@ -604,7 +608,8 @@ int inner_hash (int depth, unsigned acc, void *p) { case SEXP_TAG: { int ta = TO_SEXP(p)->tag; acc = HASH_APPEND(acc, ta); - i = 0; + i = 1; + ++l; break; } @@ -654,6 +659,7 @@ extern int Lcompare (void *p, void *q) { int ta = TAG(a->data_header), tb = TAG(b->data_header); int la = LEN(a->data_header), lb = LEN(b->data_header); int i; + int shift = 0; COMPARE_AND_RETURN(ta, tb); @@ -672,10 +678,11 @@ extern int Lcompare (void *p, void *q) { break; case SEXP_TAG: { - int ta = TO_SEXP(p)->tag, tb = TO_SEXP(q)->tag; - COMPARE_AND_RETURN(ta, tb); + int tag_a = TO_SEXP(p)->tag, tag_b = TO_SEXP(q)->tag; + COMPARE_AND_RETURN(tag_a, tag_b); COMPARE_AND_RETURN(la, lb); - i = 0; + i = 0; + shift = 1; break; } @@ -683,7 +690,7 @@ extern int Lcompare (void *p, void *q) { } for (; i < la; i++) { - int c = Lcompare(((void **)a->contents)[i], ((void **)b->contents)[i]); + int c = Lcompare(((void **)a->contents)[i + shift], ((void **)b->contents)[i + shift]); if (c != BOX(0)) return BOX(c); } @@ -697,15 +704,17 @@ extern int Lcompare (void *p, void *q) { extern void *Belem (void *p, int i) { data *a = (data *)BOX(NULL); - ASSERT_BOXED(".elem:1", p); + if (UNBOXED(p)) { ASSERT_BOXED(".elem:1", p); } ASSERT_UNBOXED(".elem:2", i); a = TO_DATA(p); i = UNBOX(i); - if (TAG(a->data_header) == STRING_TAG) { return (void *)BOX(a->contents[i]); } - - return (void *)((int *)a->contents)[i]; + switch (TAG(a->data_header)) { + case STRING_TAG: return (void *)BOX(a->contents[i]); + case SEXP_TAG: return (void *)((int *)a->contents)[i + 1]; + default: return (void *)((int *)a->contents)[i]; + } } extern void *LmakeArray (int length) { @@ -864,32 +873,29 @@ extern void *Bsexp (int bn, ...) { int i; int ai; size_t *p; - sexp *r; - data *d; + data *r; int n = UNBOX(bn); PRE_GC(); - int fields_cnt = n - 1; - r = (sexp *)alloc_sexp(fields_cnt); - d = &(r->contents); - r->tag = 0; + int fields_cnt = n - 1; + r = (data *)alloc_sexp(fields_cnt); + ((sexp *)r)->tag = 0; va_start(args, bn); - for (i = 0; i < n - 1; i++) { - ai = va_arg(args, int); - + for (i = 1; i < n; i++) { + ai = va_arg(args, int); p = (size_t *)ai; - ((int *)d->contents)[i] = ai; + ((int *)r->contents)[i] = ai; } - r->tag = UNBOX(va_arg(args, int)); + ((sexp *)r)->tag = UNBOX(va_arg(args, int)); va_end(args); POST_GC(); - return d->contents; + return (int *)r->contents; } extern int Btag (void *d, int t, int n) { @@ -964,15 +970,25 @@ extern int Bsexp_tag_patt (void *x) { extern void *Bsta (void *v, int i, void *x) { if (UNBOXED(i)) { ASSERT_BOXED(".sta:3", x); + data *d = TO_DATA(x); - if (TAG(TO_DATA(x)->data_header) == STRING_TAG) ((char *)x)[UNBOX(i)] = (char)UNBOX(v); - else ((int *)x)[UNBOX(i)] = (int)v; - - return v; + switch (TAG(d->data_header)) { + case STRING_TAG: { + ((char *)x)[UNBOX(i)] = (char)UNBOX(v); + break; + } + case SEXP_TAG: { + ((int *)x)[UNBOX(i) + 1] = (int)v; + break; + } + default: { + ((int *)x)[UNBOX(i)] = (int)v; + } + } + } else { + *(void **)x = v; } - *(void **)x = v; - return v; } diff --git a/runtime/runtime_common.h b/runtime/runtime_common.h index 718e16278..a386719e0 100644 --- a/runtime/runtime_common.h +++ b/runtime/runtime_common.h @@ -7,20 +7,15 @@ //#define FULL_INVARIANT_CHECKS #define STRING_TAG 0x00000001 -//# define STRING_TAG 0x00000000 #define ARRAY_TAG 0x00000003 -//# define ARRAY_TAG 0x00000002 #define SEXP_TAG 0x00000005 -//# define SEXP_TAG 0x00000004 #define CLOSURE_TAG 0x00000007 -//# define CLOSURE_TAG 0x00000006 #define UNBOXED_TAG 0x00000009 // Not actually a data_header; used to return from LkindOf #define LEN(x) ((x & 0xFFFFFFF8) >> 3) #define TAG(x) (x & 0x00000007) -//# define TAG(x) (x & 0x00000006) -#define SEXP_ONLY_HEADER_SZ (2 * sizeof(int)) +#define SEXP_ONLY_HEADER_SZ (sizeof(int)) #ifndef FULL_INVARIANT_CHECKS # define DATA_HEADER_SZ (sizeof(size_t) + sizeof(int)) @@ -31,7 +26,7 @@ #define MEMBER_SIZE sizeof(int) #define TO_DATA(x) ((data *)((char *)(x)-DATA_HEADER_SZ)) -#define TO_SEXP(x) ((sexp *)((char *)(x)-DATA_HEADER_SZ - SEXP_ONLY_HEADER_SZ)) +#define TO_SEXP(x) ((sexp *)((char *)(x)-DATA_HEADER_SZ)) #define UNBOXED(x) (((int)(x)) & 0x0001) #define UNBOX(x) (((int)(x)) >> 1) @@ -60,11 +55,19 @@ typedef struct { } data; typedef struct { - // duplicates contents.data_header in order to be able to understand if it is s-exp during iteration over heap - int sexp_header; - // stores hashed s-expression constructor name - int tag; - data contents; + // store tag in the last three bits to understand what structure this is, other bits are filled with + // other utility info (i.e., size for array, number of fields for s-expression) + int data_header; + +#ifdef FULL_INVARIANT_CHECKS + size_t id; +#endif + + // last bit is used as MARK-BIT, the rest are used to store address where object should move + // last bit can be used because due to alignment we can assume that last two bits are always 0's + size_t forward_address; + int tag; + int contents[0]; } sexp; #endif