Added tests + fixed bunch of bugs in GC implementation

This commit is contained in:
Egor Sheremetov 2023-04-26 14:22:14 +02:00
parent 3826c8dd32
commit 313997496d
9 changed files with 1577 additions and 1534 deletions

View file

@ -1,15 +1,22 @@
CC=gcc
all: gc_runtime.o gc.o runtime.o
all: gc_runtime.o gc.o runtime.o test.o
ar rc runtime.a gc_runtime.o runtime.o gc.o
test.o: gc.o gc_runtime.o runtime.o virt_stack.o test_main.c test_util.s
$(CC) -o test.o -g2 -fstack-protector-all -m32 gc.o gc_runtime.o virt_stack.o runtime.o test_main.c test_util.s
virt_stack.o: virt_stack.h virt_stack.c
$(CC) -g2 -fstack-protector-all -m32 -c virt_stack.c
gc.o: gc.c gc.h
$(CC) -g -fstack-protector-all -m32 -c gc.c
$(CC) -g2 -fstack-protector-all -m32 -c gc.c
gc_runtime.o: gc_runtime.s
$(CC) -g -fstack-protector-all -m32 -c gc_runtime.s
$(CC) -g2 -fstack-protector-all -m32 -c gc_runtime.s
runtime.o: runtime.c runtime.h
$(CC) -g -fstack-protector-all -m32 -c runtime.c
$(CC) -g2 -fstack-protector-all -m32 -c runtime.c
clean:
$(RM) *.a *.o *~

View file

@ -1,30 +1,46 @@
# define _GNU_SOURCE 1
#include "gc.h"
#include "runtime_common.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/mman.h>
#include <string.h>
#include "gc.h"
#include "runtime_common.h"
#include <assert.h>
#ifdef DEBUG_VERSION
#include <signal.h>
#endif
#ifndef DEBUG_VERSION
static const size_t INIT_HEAP_SIZE = 1 << 18;
#else
static const size_t INIT_HEAP_SIZE = 8;
#endif
static const size_t SIZE_T_CHARS = sizeof(size_t)/sizeof(char);
#ifdef DEBUG_VERSION
static const size_t cur_id = 1;
size_t cur_id = 0;
#endif
static extra_roots_pool extra_roots;
extern size_t __gc_stack_top, __gc_stack_bottom;
#ifndef DEBUG_VERSION
extern const size_t __start_custom_data, __stop_custom_data;
#endif
#ifdef DEBUG_VERSION
memory_chunk heap;
#else
static memory_chunk heap;
#endif
void* alloc(size_t size) {
void *alloc(size_t size) {
#ifdef DEBUG_VERSION
++cur_id;
#endif
size = BYTES_TO_WORDS(size);
void *p = gc_alloc_on_existing_heap(size);
if (!p) {
@ -34,55 +50,79 @@ void* alloc(size_t size) {
return p;
}
void* gc_alloc_on_existing_heap(size_t size) {
if (heap.current + size < heap.end) {
void *gc_alloc_on_existing_heap(size_t size) {
if (heap.current + size <= heap.end) {
void *p = (void *) heap.current;
heap.current += size;
memset(p, 0, size * sizeof(size_t));
return p;
}
return NULL;
}
void* gc_alloc(size_t size) {
// mark phase
// TODO: add extra roots and static area scan
__gc_root_scan_stack();
void *gc_alloc(size_t size) {
mark_phase();
// compact phase
compact(size);
compact_phase(size);
return gc_alloc_on_existing_heap(size);
}
void compact(size_t additional_size) {
void mark_phase(void) {
__gc_root_scan_stack();
scan_extra_roots();
#ifndef DEBUG_VERSION
scan_global_area();
#endif
}
void compact_phase(size_t additional_size) {
size_t live_size = compute_locations();
size_t next_heap_size = MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY);
size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size); // this is weird but here is why it happens:
// if we allocate too little heap right now, we may loose access to some alive objects
// however, after we physically relocate all of our objects we will shrink allocated memory if it is possible
memory_chunk new_memory;
new_memory.begin = mremap(
memory_chunk old_heap = heap;
heap.begin = mremap(
heap.begin,
WORDS_TO_BYTES(heap.size),
WORDS_TO_BYTES(next_heap_pseudo_size),
MREMAP_MAYMOVE
);
if (heap.begin == MAP_FAILED) {
perror("ERROR: compact_phase: mremap failed\n");
exit(1);
}
heap.end = heap.begin + next_heap_pseudo_size;
heap.size = next_heap_pseudo_size;
heap.current = heap.begin + (old_heap.current - old_heap.begin);
update_references(&old_heap);
physically_relocate(&old_heap);
// shrink it if possible, otherwise this code won'test_small_tree_compaction do anything, in both cases references will remain valid
heap.begin = mremap(
heap.begin,
WORDS_TO_BYTES(heap.size),
WORDS_TO_BYTES(next_heap_size),
MREMAP_MAYMOVE
);
if (new_memory.begin == MAP_FAILED) {
perror ("ERROR: compact: mremap failed\n");
exit (1);
0 // in this case we don't set MREMAP_MAYMOVE because it shouldn'test_small_tree_compaction move :)
);
if (heap.begin == MAP_FAILED) {
perror("ERROR: compact_phase: mremap failed\n");
exit(1);
}
new_memory.end = new_memory.begin + next_heap_size;
new_memory.size = next_heap_size;
new_memory.current = new_memory.begin + live_size;
update_references(&new_memory);
physically_relocate(&new_memory);
heap.end = heap.begin + next_heap_size;
heap.size = next_heap_size;
heap.current = heap.begin + live_size;
}
size_t compute_locations() {
size_t* free_ptr = heap.begin;
size_t *free_ptr = heap.begin;
heap_iterator scan_iter = heap_begin_iterator();
for (; heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) {
for (; !heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) {
void *header_ptr = scan_iter.current;
void *obj_content = get_object_content_ptr(header_ptr);
size_t sz = BYTES_TO_WORDS(obj_size_header_ptr(header_ptr));
@ -94,32 +134,66 @@ size_t compute_locations() {
}
// it will return number of words
return scan_iter.current - heap.begin;
return free_ptr - heap.begin;
}
// TODO: fix pointers on stack and in static area
void update_references(memory_chunk *next_memory) {
heap_iterator it = heap_begin_iterator();
while (!heap_is_done_iterator(&it)) {
for (
obj_field_iterator field_iter = ptr_field_begin_iterator(it.current);
!field_is_done_iterator(&field_iter);
obj_next_ptr_field_iterator(&field_iter)
void scan_and_fix_region(memory_chunk *old_heap, void *start, void *end) {
for (size_t *ptr = (size_t *) start; ptr < (size_t *) end; ++ptr) {
size_t ptr_value = *ptr;
// this can't be expressed via is_valid_heap_pointer, because this pointer may point area corresponding to the old heap
if (is_valid_pointer((size_t *) ptr_value)
&& (size_t) old_heap->begin <= ptr_value
&& ptr_value < (size_t) old_heap->current
) {
void *field_obj_content = *(void **) field_iter.cur_field; // TODO: create iterator method 'dereference', so that code would be a bit more readable
// important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk
size_t *new_addr = next_memory->begin + ((size_t *) get_forward_address(field_obj_content) - heap.begin);
// update field reference to point to new_addr
// since, we want fields to point to actual content, we need to add this extra content_offset
// because forward_address itself is pointer to object header
size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content));
* (void **) field_iter.cur_field = new_addr + content_offset;
void *obj_ptr = (void*) heap.begin + ((void *) ptr_value - (void *) old_heap->begin);
void *new_addr = (void*) heap.begin + ((void *) get_forward_address(obj_ptr) - (void *) old_heap->begin);
size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr));
*(void **) ptr = new_addr + content_offset;
}
heap_next_obj_iterator(&it);
}
}
void physically_relocate(memory_chunk *next_memory) {
void update_references(memory_chunk *old_heap) {
heap_iterator it = heap_begin_iterator();
while (!heap_is_done_iterator(&it)) {
if (is_marked(get_object_content_ptr(it.current))) {
for (
obj_field_iterator field_iter = ptr_field_begin_iterator(it.current);
!field_is_done_iterator(&field_iter);
obj_next_ptr_field_iterator(&field_iter)
) {
// this pointer should also be modified according to old_heap->begin
void *field_obj_content_addr = (void *) heap.begin + (*(void **) field_iter.cur_field - (void *) old_heap->begin); // TODO: vstack_create iterator method 'dereference', so that code would be a bit more readable
// important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk
void *new_addr =
heap.begin + ((size_t *) get_forward_address(field_obj_content_addr) - (size_t *) old_heap->begin);
// update field reference to point to new_addr
// since, we want fields to point to an actual content, we need to add this extra content_offset
// because forward_address itself is a pointer to the object's header
size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content_addr));
if (!is_valid_heap_pointer((void *) (new_addr + content_offset))) {
fprintf(stderr, "ur: incorrect pointer assignment: on object with id %d", TO_DATA(get_object_content_ptr(it.current))->id);
exit(1);
}
*(void **) field_iter.cur_field = new_addr + content_offset;
}
}
heap_next_obj_iterator(&it);
}
// fix pointers from stack
scan_and_fix_region(old_heap, (void*) __gc_stack_top, (void*) __gc_stack_bottom);
// fix pointers from extra_roots
scan_and_fix_region(old_heap, (void*) extra_roots.roots, (size_t*) extra_roots.roots + extra_roots.current_free);
#ifndef DEBUG_VERSION
// fix pointers from static area
scan_and_fix_region(old_heap, (void*) &__start_custom_data, (void*) &__stop_custom_data);
#endif
}
void physically_relocate(memory_chunk *old_heap) {
heap_iterator from_iter = heap_begin_iterator();
while (!heap_is_done_iterator(&from_iter)) {
@ -127,16 +201,20 @@ void physically_relocate(memory_chunk *next_memory) {
if (is_marked(obj)) {
// Move the object from its old location to its new location relative to
// the heap's (possibly new) location, 'to' points to future object header
void* to = next_memory->begin + ((size_t *) get_forward_address(obj) - heap.begin);
memmove(to, from_iter.current, BYTES_TO_WORDS(obj_size_header_ptr(obj)));
unmark_object(to + ((size_t *) obj - from_iter.current));
size_t *to = heap.begin + ((size_t *) get_forward_address(obj) - (size_t *) old_heap->begin);
memmove(to, from_iter.current, obj_size_header_ptr(from_iter.current));
unmark_object(get_object_content_ptr(to));
}
heap_next_obj_iterator(&from_iter);
}
}
bool is_valid_heap_pointer(const size_t *p) {
return !UNBOXED(p) && (size_t) heap.begin <= (size_t) p && (size_t) p < (size_t) heap.end;
return !UNBOXED(p) && (size_t) heap.begin <= (size_t) p && (size_t) p < (size_t) heap.current;
}
bool is_valid_pointer(const size_t *p) {
return !UNBOXED(p);
}
void mark(void *obj) {
@ -152,54 +230,83 @@ void mark(void *obj) {
obj_field_iterator ptr_field_it = ptr_field_begin_iterator(header_ptr);
!field_is_done_iterator(&ptr_field_it);
obj_next_ptr_field_iterator(&ptr_field_it)
) {
mark(ptr_field_it.cur_field);
) {
mark(* (void **) ptr_field_it.cur_field);
}
}
extern void gc_test_and_mark_root(size_t ** root) {
mark((void*) *root);
void scan_extra_roots(void) {
for (int i = 0; i < extra_roots.current_free; ++i) {
// this dereferencing is safe since runtime is pushing correct pointers into extra_roots
mark(*extra_roots.roots[i]);
}
}
extern void __init (void) {
#ifndef DEBUG_VERSION
void scan_global_area(void) {
// __start_custom_data is pointing to beginning of global area, thus all dereferencings are safe
for (const size_t *ptr = &__start_custom_data; ptr < &__stop_custom_data; ++ptr) {
mark(*(void **)ptr);
}
}
#endif
extern void gc_test_and_mark_root(size_t **root) {
mark((void *) *root);
}
extern void __init(void) {
size_t space_size = INIT_HEAP_SIZE * sizeof(size_t);
srandom (time (NULL));
srandom(time(NULL));
heap.begin = mmap (NULL, space_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
heap.begin = mmap(NULL, space_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
if (heap.begin == MAP_FAILED) {
perror ("ERROR: __init: mmap failed\n");
exit (1);
perror("ERROR: __init: mmap failed\n");
exit(1);
}
heap.end = heap.begin + INIT_HEAP_SIZE;
heap.size = INIT_HEAP_SIZE;
heap.current = heap.begin;
heap.end = heap.begin + INIT_HEAP_SIZE;
heap.size = INIT_HEAP_SIZE;
heap.current = heap.begin;
clear_extra_roots();
}
void clear_extra_roots (void) {
extern void __shutdown(void) {
munmap(heap.begin, heap.size);
#ifdef DEBUG_VERSION
cur_id = 0;
#endif
heap.begin = NULL;
heap.end = NULL;
heap.size = 0;
heap.current = NULL;
__gc_stack_top = 0;
__gc_stack_bottom = 0;
}
void clear_extra_roots(void) {
extra_roots.current_free = 0;
}
void push_extra_root (void ** p) {
void push_extra_root(void **p) {
if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) {
perror ("ERROR: push_extra_roots: extra_roots_pool overflow");
exit (1);
perror("ERROR: push_extra_roots: extra_roots_pool overflow");
exit(1);
}
extra_roots.roots[extra_roots.current_free] = p;
extra_roots.current_free++;
}
void pop_extra_root (void ** p) {
void pop_extra_root(void **p) {
if (extra_roots.current_free == 0) {
perror ("ERROR: pop_extra_root: extra_roots are empty");
exit (1);
perror("ERROR: pop_extra_root: extra_roots are empty");
exit(1);
}
extra_roots.current_free--;
if (extra_roots.roots[extra_roots.current_free] != p) {
perror ("ERROR: pop_extra_root: stack invariant violation");
exit (1);
perror("ERROR: pop_extra_root: stack invariant violation");
exit(1);
}
}
@ -207,18 +314,19 @@ void pop_extra_root (void ** p) {
#ifdef DEBUG_VERSION
void objects_snapshot(void *objects_ptr, size_t objects_cnt) {
size_t *ids_ptr = (size_t *) objects_ptr;
size_t objects_snapshot(int *object_ids_buf, size_t object_ids_buf_size) {
size_t *ids_ptr = (size_t *) object_ids_buf;
size_t i = 0;
for (
heap_iterator it = heap_begin_iterator();
!heap_is_done_iterator(&it) && i < objects_cnt;
heap_next_obj_iterator(&it)
) {
!heap_is_done_iterator(&it) && i < object_ids_buf_size;
heap_next_obj_iterator(&it), ++i
) {
void *header_ptr = it.current;
data *d = TO_DATA(get_object_content_ptr(header_ptr));
ids_ptr[i] = d->id;
}
return i;
}
void set_stack(size_t stack_top, size_t stack_bottom) {
@ -241,7 +349,7 @@ size_t get_forward_address(void *obj) {
return GET_FORWARD_ADDRESS(d->forward_address);
}
size_t set_forward_address(void *obj, size_t addr) {
void set_forward_address(void *obj, size_t addr) {
data *d = TO_DATA(obj);
SET_FORWARD_ADDRESS(d->forward_address, addr);
}
@ -263,7 +371,7 @@ void unmark_object(void *obj) {
}
heap_iterator heap_begin_iterator() {
heap_iterator it = { .current=heap.begin };
heap_iterator it = {.current=heap.begin};
return it;
}
@ -296,8 +404,13 @@ lama_type get_type_header_ptr(void *ptr) {
case SEXP_TAG:
return SEXP;
default:
perror ("ERROR: get_type_header_ptr: unknown object header");
exit (1);
#ifdef DEBUG_VERSION
fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id);
raise(SIGINT); // only for debug purposes
#else
perror("ERROR: get_type_header_ptr: unknown object header");
#endif
exit(1);
}
}
@ -318,8 +431,11 @@ size_t obj_size_header_ptr(void *ptr) {
case SEXP:
return sexp_size(len);
default:
perror ("ERROR: obj_size_header_ptr: unknown object header");
exit (1);
perror("ERROR: obj_size_header_ptr: unknown object header");
#ifdef DEBUG_VERSION
raise(SIGINT); // only for debug purposes
#endif
exit(1);
}
}
@ -336,18 +452,22 @@ size_t closure_size(size_t sz) {
return get_header_size(CLOSURE) + MEMBER_SIZE * sz;
}
size_t sexp_size(size_t sz) {
return get_header_size(SEXP) + MEMBER_SIZE * sz;
size_t sexp_size(size_t members) {
return get_header_size(SEXP) + MEMBER_SIZE * members;
}
obj_field_iterator field_begin_iterator(void *obj) {
lama_type type = get_type_row_ptr(obj);
obj_field_iterator it = { .type=type, .obj_ptr=get_obj_header_ptr(obj, type), .cur_field=obj };
lama_type type = get_type_header_ptr(obj);
obj_field_iterator it = {.type=type, .obj_ptr=obj, .cur_field=get_object_content_ptr(obj)};
// since string doesn't have any actual fields we set cur_field to the end of object
if (type == STRING) {
it.cur_field = get_end_of_obj(it.obj_ptr);
}
// skip first member which is basically pointer to the code
if (type == CLOSURE) {
it.cur_field += MEMBER_SIZE;
}
return it;
}
@ -357,7 +477,7 @@ obj_field_iterator ptr_field_begin_iterator(void *obj) {
if (field_is_done_iterator(&it)) {
return it;
}
if (is_valid_heap_pointer(it.cur_field)) {
if (is_valid_pointer(*(size_t **) it.cur_field)) {
return it;
}
obj_next_ptr_field_iterator(&it);
@ -371,23 +491,23 @@ void obj_next_field_iterator(obj_field_iterator *it) {
void obj_next_ptr_field_iterator(obj_field_iterator *it) {
do {
obj_next_field_iterator(it);
} while (!field_is_done_iterator(it) && !is_valid_heap_pointer(it->cur_field));
} while (!field_is_done_iterator(it) && !is_valid_pointer(*(size_t **) it->cur_field));
}
bool field_is_done_iterator(obj_field_iterator *it) {
return it->cur_field >= get_end_of_obj(it->obj_ptr);
}
void* get_obj_header_ptr(void *ptr, lama_type type) {
void *get_obj_header_ptr(void *ptr, lama_type type) {
return ptr - get_header_size(type);
}
void* get_object_content_ptr(void *header_ptr) {
void *get_object_content_ptr(void *header_ptr) {
lama_type type = get_type_header_ptr(header_ptr);
return header_ptr + get_header_size(type);
}
void* get_end_of_obj(void *header_ptr) {
void *get_end_of_obj(void *header_ptr) {
return header_ptr + obj_size_header_ptr(header_ptr);
}
@ -400,8 +520,43 @@ size_t get_header_size(lama_type type) {
case SEXP:
return SEXP_ONLY_HEADER_SZ + DATA_HEADER_SZ;
default:
perror ("ERROR: get_header_size: unknown object type");
exit (1);
perror("ERROR: get_header_size: unknown object type");
#ifdef DEBUG_VERSION
raise(SIGINT); // only for debug purposes
#endif
exit(1);
}
}
void *alloc_string(int len) {
data *obj = alloc(string_size(len));
obj->data_header = STRING_TAG | (len << 3);
obj->id = cur_id;
obj->forward_address = 0;
return obj;
}
void *alloc_array(int len) {
data *obj = alloc(array_size(len));
obj->data_header = ARRAY_TAG | (len << 3);
obj->id = cur_id;
obj->forward_address = 0;
return obj;
}
void *alloc_sexp(int members) {
sexp *obj = alloc(sexp_size(members));
obj->sexp_header = obj->contents.data_header = SEXP_TAG | (members << 3);
obj->contents.id = cur_id;
obj->contents.forward_address = 0;
obj->tag = 0;
return obj;
}
void *alloc_closure(int captured) {
data *obj = alloc(closure_size(captured));
obj->data_header = CLOSURE_TAG | (captured << 3);
obj->id = cur_id;
obj->forward_address = 0;
return obj;
}

View file

@ -1,22 +1,26 @@
#ifndef __LAMA_GC__
#define __LAMA_GC__
// this flag makes GC behavior a bit different for testing purposes.
#define DEBUG_VERSION
# define GET_MARK_BIT(x) (((int) (x)) & 1)
# define SET_MARK_BIT(x) (x = (((int) (x)) | 1))
# define RESET_MARK_BIT(x) (x = (((int) (x)) & (~1)))
# define GET_FORWARD_ADDRESS(x) (((int) (x)) & (~1)) // since last bit is used as mark-bit and due to correct alignment we can expect that last bit doesn't influence address (it should always be zero)
# define SET_FORWARD_ADDRESS(x, addr) (x = (((int) (x)) | ((int) (addr))))
# define GET_FORWARD_ADDRESS(x) (((size_t) (x)) & (~1)) // since last bit is used as mark-bit and due to correct alignment we can expect that last bit doesn'test_small_tree_compaction influence address (it should always be zero)
# define SET_FORWARD_ADDRESS(x, addr) (x = (GET_MARK_BIT(x) | ((int) (addr))))
# define EXTRA_ROOM_HEAP_COEFFICIENT 2 // TODO: tune this parameter
#ifdef DEBUG_VERSION
# define MINIMUM_HEAP_CAPACITY (1<<8) // TODO: tune this parameter
#else
# define MINIMUM_HEAP_CAPACITY (1<<8) // TODO: tune this parameter
#endif
#include <stddef.h>
#include <stdbool.h>
#include "runtime_common.h"
// this flag makes GC behavior a bit different for testing purposes.
#define DEBUG_VERSION
typedef enum { ARRAY, CLOSURE, STRING, SEXP } lama_type;
typedef struct {
@ -53,27 +57,37 @@ void* gc_alloc(size_t);
// takes number of words as a parameter
void *gc_alloc_on_existing_heap(size_t);
void collect();
// specific for mark-and-compact gc
// specific for mark-and-compact_phase gc
void mark(void *obj);
void mark_phase(void);
// written in ASM, scans stack for pointers to the heap and starts marking process
extern void __gc_root_scan_stack(void); // TODO: write without ASM, since it is absolutely not necessary
// marks each pointer from extra roots
void scan_extra_roots(void);
#ifndef DEBUG_VERSION
// marks each valid pointer from global area
void scan_global_area(void);
#endif
// takes number of words that are required to be allocated somewhere on the heap
void compact(size_t additional_size);
void compact_phase(size_t additional_size);
// specific for Lisp-2 algorithm
size_t compute_locations();
void update_references(memory_chunk *);
void physically_relocate(memory_chunk *);
// written in ASM
extern void __gc_init (void); // MANDATORY TO CALL BEFORE ANY INTERACTION WITH GC (apart from cases where we are working with virtual stack as happens in tests)
extern void __init (void); // should be called before interaction with GC in case of using in tests with virtual stack, otherwise it is automatically invoked by __gc_init
extern void __shutdown (void); // mostly useful for tests but basically you want to call this in case you want to deallocate all object allocated via GC
// written in ASM
extern void __pre_gc (void);
// written in ASM
extern void __post_gc (void);
extern void __gc_root_scan_stack(void); // TODO: write without ASM, since it is absolutely not necessary
// invoked from ASM
extern void gc_test_and_mark_root(size_t ** root);
inline bool is_valid_heap_pointer(const size_t *);
inline bool is_valid_pointer(const size_t *);
void clear_extra_roots (void);
@ -86,8 +100,11 @@ void pop_extra_root (void ** p);
#ifdef DEBUG_VERSION
// test-only function, these pointer parameters are just a fancy way to return two values at a time
void objects_snapshot(void *objects_ptr, size_t objects_cnt);
// makes a snapshot of current objects in heap (both alive and dead), writes these ids to object_ids_buf,
// returns number of ids dumped
// object_ids_buf is pointer to area preallocated by user for dumping ids of objects in heap
// object_ids_buf_size is in WORDS, NOT BYTES
size_t objects_snapshot(int *object_ids_buf, size_t object_ids_buf_size);
// essential function to mock program stack
void set_stack(size_t stack_top, size_t stack_bottom);
@ -100,11 +117,15 @@ void set_extra_roots(size_t extra_roots_size, void** extra_roots_ptr);
/* Utility functions */
// accepts pointer to the start of the region and to the end of the region
// scans it and if it meets a pointer, it should be modified in according to forward address
void scan_and_fix_region(memory_chunk *old_heap, void *start, void *end);
// takes a pointer to an object content as an argument, returns forwarding address
size_t get_forward_address(void *obj);
// takes a pointer to an object content as an argument, sets forwarding address to value 'addr'
size_t set_forward_address(void *obj, size_t addr);
void set_forward_address(void *obj, size_t addr);
// takes a pointer to an object content as an argument, returns whether this object was marked as live
bool is_marked(void *obj);
@ -139,8 +160,8 @@ size_t string_size(size_t len);
// TODO: ask if it is actually so? number of captured elements is actually sz-1 and 1 extra word is code ptr?
// returns number of bytes that are required to allocate closure with 'sz-1' captured values (header included)
size_t closure_size(size_t sz);
// returns number of bytes that are required to allocate s-expression with 'sz' fields (header included)
size_t sexp_size(size_t sz);
// returns number of bytes that are required to allocate s-expression with 'members' fields (header included)
size_t sexp_size(size_t members);
// returns an iterator over object fields, obj is ptr to object header
// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object,
@ -161,4 +182,9 @@ void* get_obj_header_ptr(void *ptr, lama_type type);
void* get_object_content_ptr(void *header_ptr);
void* get_end_of_obj(void *header_ptr);
void *alloc_string(int len);
void *alloc_array(int len);
void *alloc_sexp(int members);
void *alloc_closure(int captured);
#endif

View file

@ -17,7 +17,8 @@ __gc_stack_top: .long 0
.extern gc_test_and_copy_root
.text
__gc_init: movl %ebp, __gc_stack_bottom
__gc_init:
movl %ebp, __gc_stack_bottom
addl $4, __gc_stack_bottom
call __init
ret
@ -60,7 +61,9 @@ __gc_root_scan_stack:
pushl %ebx
pushl %edx
movl __gc_stack_top, %eax
jmp next
// jmp next
cmpl %eax, __gc_stack_bottom
jb returnn
loop:
movl (%eax), %ebx
@ -106,7 +109,7 @@ gc_run_t:
next:
addl $4, %eax
cmpl %eax, __gc_stack_bottom
jne loop
jnb loop
returnn:
movl $0, %eax
popl %edx

File diff suppressed because it is too large Load diff

259
runtime/test_main.c Normal file
View file

@ -0,0 +1,259 @@
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "gc.h"
#include "runtime_common.h"
// function from runtime that maps string to int value
extern int LtagHash (char *s);
extern void* Bsexp (int n, ...);
extern void* Barray (int bn, ...);
extern void* Bstring (void*);
extern void* Bclosure (int bn, void *entry, ...);
extern size_t __gc_stack_top, __gc_stack_bottom;
void test_correct_structure_sizes(void) {
// something like induction base
assert((array_size(0) == get_header_size(ARRAY)));
assert((string_size(0) == get_header_size(STRING) + 1)); // +1 is because of '\0'
assert((sexp_size(0) == get_header_size(SEXP)));
assert((closure_size(0) == get_header_size(CLOSURE)));
// just check correctness for some small sizes
for (int k = 1; k < 20; ++k) {
assert((array_size(k) == get_header_size(ARRAY) + sizeof (int) * k));
assert((string_size(k) == get_header_size(STRING) + k + 1));
assert((sexp_size(k) == get_header_size(SEXP) + sizeof (int) * k));
assert((closure_size(k) == get_header_size(CLOSURE) + sizeof (int) * k));
}
}
void no_gc_tests(void) {
test_correct_structure_sizes();
}
// unfortunately there is no generic function pointer that can hold pointer to function with arbitrary signature
extern size_t call_runtime_function(void *virt_stack_pointer, void *function_pointer, size_t num_args, ...);
#include "virt_stack.h"
virt_stack* init_test() {
__init();
virt_stack *st = vstack_create();
vstack_init(st);
__gc_stack_bottom = (size_t) vstack_top(st);
return st;
}
void cleanup_test(virt_stack *st) {
vstack_destruct(st);
__shutdown();
}
void force_gc_cycle(virt_stack *st) {
__gc_stack_top = (size_t) vstack_top(st);
gc_alloc(0);
__gc_stack_top = 0;
}
void test_simple_string_alloc(void) {
virt_stack *st = init_test();
for (int i = 0; i < 5; ++i) {
vstack_push(st, BOX(i));
}
vstack_push(st, call_runtime_function(vstack_top(st), Bstring, 1, "abc"));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_array_alloc(void) {
virt_stack* st = init_test();
// allocate array [ BOX(1) ] and push it onto the stack
vstack_push(st, call_runtime_function(vstack_top(st), Barray, 2, BOX(1), BOX(1)));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_sexp_alloc(void) {
virt_stack* st = init_test();
// allocate sexp with one boxed field and push it onto the stack
// calling runtime function Bsexp(BOX(2), BOX(1), LtagHash("test"))
vstack_push(st, call_runtime_function(vstack_top(st), Bsexp, 3, BOX(2), BOX(1), LtagHash("test")));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_simple_closure_alloc(void) {
virt_stack* st = init_test();
// allocate closure with boxed captured value and push it onto the stack
vstack_push(st, call_runtime_function(vstack_top(st), Bclosure, 3, BOX(1), NULL, BOX(1)));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_single_object_allocation_with_collection_virtual_stack(void) {
virt_stack *st = init_test();
vstack_push(st, call_runtime_function(vstack_top(st), Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_garbage_is_reclaimed(void) {
virt_stack *st = init_test();
call_runtime_function(vstack_top(st), Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
force_gc_cycle(st);
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 0));
cleanup_test(st);
}
void test_alive_are_not_reclaimed(void) {
virt_stack *st = init_test();
vstack_push(st, call_runtime_function(vstack_top(st), Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"));
force_gc_cycle(st);
const int N = 10;
int ids[N];
size_t alive = objects_snapshot(ids, N);
assert((alive == 1));
cleanup_test(st);
}
void test_small_tree_compaction(void) {
virt_stack *st = init_test();
// this one will increase heap size
call_runtime_function(vstack_top(st), Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaa");
size_t l = call_runtime_function(vstack_top(st), Bstring, 1, "left-s");
size_t r = call_runtime_function(vstack_top(st), Bstring, 1, "right-s");
vstack_push(st, call_runtime_function(vstack_top(st), Bsexp, 4, BOX(3), (size_t)l, (size_t) r, LtagHash("tree")));
force_gc_cycle(st);
const int SZ = 10;
int ids[SZ];
size_t alive = objects_snapshot(ids, SZ);
assert((alive == 3));
// check that order is indeed preserved
for (int i = 0; i < alive - 1; ++i) {
assert((ids[i] < ids[i + 1]));
}
cleanup_test(st);
}
extern size_t cur_id;
size_t generate_random_obj_forest(virt_stack *st, int cnt, int seed) {
srand(seed);
int cur_sz = 0;
size_t alive = 0;
while (cnt) {
--cnt;
if (cur_sz == 0) {
vstack_push(st, BOX(1));
++cur_sz;
continue;
}
size_t pos[2] = {rand() % vstack_size(st), rand() % vstack_size(st)};
size_t field[2];
for (int t = 0; t < 2; ++t) {
field[t] = vstack_kth_from_start(st, pos[t]);
}
size_t obj;
if (rand() % 2) {
obj = call_runtime_function(vstack_top(st), Bsexp, 4, BOX(3), field[0], field[1], LtagHash("test"));
} else {
obj = BOX(1);
}
// whether object is stored on stack
if (rand() % 2 != 0) {
vstack_push(st, obj);
if ((obj & 1) == 0) {
++alive;
}
}
++cur_sz;
}
force_gc_cycle(st);
return alive;
}
void run_stress_test_random_obj_forest(int seed) {
virt_stack *st = init_test();
const int SZ = 10000;
size_t expectedAlive = generate_random_obj_forest(st, SZ, seed);
int ids[SZ];
size_t alive = objects_snapshot(ids, SZ);
assert(alive == expectedAlive);
// check that order is indeed preserved
for (int i = 0; i < alive - 1; ++i) {
assert((ids[i] < ids[i + 1]));
}
cleanup_test(st);
}
int main(int argc, char ** argv) {
no_gc_tests();
test_simple_string_alloc();
test_simple_array_alloc();
test_simple_sexp_alloc();
test_simple_closure_alloc();
test_single_object_allocation_with_collection_virtual_stack();
test_garbage_is_reclaimed();
test_alive_are_not_reclaimed();
test_small_tree_compaction();
// stress test
for (int s = 0; s < 100; ++s) {
run_stress_test_random_obj_forest(s);
}
}

40
runtime/test_util.s Normal file
View file

@ -0,0 +1,40 @@
# this is equivalent C-signature for this function
# size_t call_runtime_function(void *stack, void *func_ptr, int num_args, ...)
.globl call_runtime_function
.type call_runtime_function, @function
call_runtime_function:
pushl %ebp
movl %esp, %ebp
# store old stack pointer
movl %esp, %edi
# move esp to point to the virtual stack
movl 8(%ebp), %esp
# push arguments onto the stack
movl 16(%ebp), %ecx # num_args
test %ecx, %ecx
jz f_call # in case function doesn't have any parameters
leal 16(%ebp), %eax # pointer to value BEFORE first argument
leal (%eax,%ecx,4), %edx # pointer to last argument (right-to-left)
push_args_loop:
pushl (%edx)
subl $4, %edx
subl $1, %ecx
jnz push_args_loop
# call the function
f_call:
movl 12(%ebp), %eax
call *%eax
# restore the old stack pointer
movl %edi, %esp
# pop the old frame pointer and return
popl %ebp # epilogue
ret

45
runtime/virt_stack.c Normal file
View file

@ -0,0 +1,45 @@
#include "virt_stack.h"
#include <malloc.h>
virt_stack *vstack_create() {
return malloc(sizeof (virt_stack));
}
void vstack_destruct(virt_stack *st) {
free(st);
}
void vstack_init(virt_stack *st) {
st->cur = RUNTIME_VSTACK_SIZE;
st->buf[st->cur] = 0;
}
void vstack_push(virt_stack *st, size_t value) {
if (st->cur == 0) {
assert(0);
}
--st->cur;
st->buf[st->cur] = value;
}
size_t vstack_pop(virt_stack *st) {
if (st->cur == RUNTIME_VSTACK_SIZE) {
assert(0);
}
size_t value = st->buf[st->cur];
++st->cur;
return value;
}
void* vstack_top(virt_stack *st) {
return st->buf + st->cur;
}
size_t vstack_size(virt_stack *st) {
return RUNTIME_VSTACK_SIZE - st->cur;
}
size_t vstack_kth_from_start(virt_stack *st, size_t k) {
assert(vstack_size(st) > k);
return st->buf[RUNTIME_VSTACK_SIZE - 1 - k];
}

33
runtime/virt_stack.h Normal file
View file

@ -0,0 +1,33 @@
//
// Created by egor on 24.04.23.
//
#ifndef LAMA_RUNTIME_VIRT_STACK_H
#define LAMA_RUNTIME_VIRT_STACK_H
#define RUNTIME_VSTACK_SIZE 100000
#include <stddef.h>
#include <assert.h>
struct {
size_t buf[RUNTIME_VSTACK_SIZE + 1];
size_t cur;
} typedef virt_stack;
virt_stack *vstack_create();
void vstack_destruct(virt_stack *st);
void vstack_init(virt_stack *st);
void vstack_push(virt_stack *st, size_t value);
size_t vstack_pop(virt_stack *st);
void* vstack_top(virt_stack *st);
size_t vstack_size(virt_stack *st);
size_t vstack_kth_from_start(virt_stack *st, size_t k);
#endif //LAMA_RUNTIME_VIRT_STACK_H