some refactorings, analyzer global var publics fix, include publics into merged bytefile

This commit is contained in:
ProgramSnail 2025-05-11 12:34:13 +03:00
parent 1f42c9ff4b
commit b1ab8ee4b3
8 changed files with 73 additions and 141 deletions

View file

@ -8,6 +8,9 @@ extern "C" {
#include "utils.h"
}
static const constexpr char *GLOBAL_VAR_TAG = "global_";
static const size_t GLOBAL_VAR_TAG_LEN = strlen(GLOBAL_VAR_TAG);
enum class Cmd : int8_t {
BINOP,
CONST,

View file

@ -48,10 +48,6 @@ static inline void **s_peek() {
s_failure(&s, "peek: empty function stack");
}
#endif
#ifdef DEBUG_VERSION
printf("--> peek\n");
#endif
return (void **)__gc_stack_top;
}
@ -62,9 +58,6 @@ static inline void s_push(void *val) {
if ((void **)__gc_stack_top == s.stack) {
s_failure(&s, "stack overflow");
}
#endif
#ifdef DEBUG_VERSION
printf("--> push\n");
#endif
__gc_stack_top -= sizeof(void *);
*(void **)__gc_stack_top = val;
@ -79,9 +72,6 @@ static inline void s_pushn_nil(size_t n) {
if ((void **)__gc_stack_top + (aint)n - 1 <= s.stack) {
s_failure(&s, "pushn: stack overflow");
}
#endif
#ifdef DEBUG_VERSION
printf("--> push %zu\n", n);
#endif
for (size_t i = 0; i < n; ++i) {
__gc_stack_top -= sizeof(void *);
@ -97,9 +87,6 @@ static inline void *s_pop() {
if (s.fp != NULL && (void **)__gc_stack_top == f_locals(s.fp)) {
s_failure(&s, "pop: empty function stack");
}
#endif
#ifdef DEBUG_VERSION
printf("--> pop\n");
#endif
void *value = *(void **)__gc_stack_top;
__gc_stack_top += sizeof(void *);
@ -116,9 +103,6 @@ static inline void s_popn(size_t n) {
if (s.fp != NULL && (void **)__gc_stack_top + (aint)n - 1 >= f_locals(s.fp)) {
s_failure(&s, "popn: empty function stack");
}
#endif
#ifdef DEBUG_VERSION
printf("--> popn %zu\n", n);
#endif
__gc_stack_top += n * sizeof(void *);
}
@ -180,11 +164,6 @@ static inline void s_rotate_n(size_t n) {
// location before / after new frame added
static inline void s_enter_f(char *rp, bool is_closure_call, auint args_sz,
auint locals_sz) {
#ifdef DEBUG_VERSION
printf("-> %i args sz\n", args_sz);
printf("-> %i locals sz\n", locals_sz);
#endif
// check that params count is valid
if ((void **)__gc_stack_top + (aint)args_sz - (is_closure_call ? 0 : 1) >=
s_top()) {
@ -232,15 +211,9 @@ static inline void s_exit_f() {
// drop stack entities, locals, frame
size_t to_pop = f_args(s.fp) - (void **)__gc_stack_top;
s.fp = (struct Frame *)f_prev_fp(&frame);
#ifdef DEBUG_VERSION
printf("-> %zu to pop\n", to_pop);
#endif
s_popn(to_pop);
// drop args
#ifdef DEBUG_VERSION
printf("-> + %zu to pop\n", f_args_sz(&frame));
#endif
s_popn(f_args_sz(&frame));
if (frame.closure) {
@ -321,9 +294,6 @@ static inline void **var_by_category(enum VarCategory category, size_t id) {
}
data *d = TO_DATA(s.fp->closure);
int count = get_len(d) - 1;
#ifdef DEBUG_VERSION
printf("id is %i, count is %i\n", id, count);
#endif
if ((int64_t)id >= count) {
s_failure(&s,
"can't read arguments: too big id"); //, %i >= %ul", id, count);

View file

@ -54,6 +54,12 @@ static inline size_t get_public_name_offset_unsafe(const Bytefile *bf,
return bf->public_ptr[i * 2];
}
/* Sets a name offset for a public symbol */
static inline void set_public_name_offset_unsafe(size_t offset,
const Bytefile *bf, size_t i) {
bf->public_ptr[i * 2] = offset;
}
/* Gets a name for a public symbol */
static inline const char *get_public_name_unsafe(const Bytefile *bf, size_t i) {
return get_string_unsafe(bf, get_public_name_offset_unsafe(bf, i));
@ -64,6 +70,12 @@ static inline size_t get_public_offset_unsafe(const Bytefile *bf, size_t i) {
return bf->public_ptr[i * 2 + 1];
}
/* Sets an offset for a publie symbol */
static inline void set_public_offset_unsafe(size_t offset, const Bytefile *bf,
size_t i) {
bf->public_ptr[i * 2 + 1] = offset;
}
// read from ip
static inline void ip_write_int_unsafe(char *ip, int32_t x) {

View file

@ -93,7 +93,10 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
// add publics
to_visit_func.reserve(bf->public_symbols_number + to_visit_func.size());
for (size_t i = 0; i < bf->public_symbols_number; ++i) {
func_to_visit_push(get_public_offset_safe(bf, i));
const char *name = get_public_name_unsafe(bf, i);
if (memcmp(name, GLOBAL_VAR_TAG, GLOBAL_VAR_TAG_LEN) != 0) {
func_to_visit_push(get_public_offset_safe(bf, i));
}
}
if (to_visit_func.size() == 0) {

View file

@ -41,30 +41,13 @@ int main(int argc, char **argv) {
failure("no file name provided");
}
#ifdef DEBUG_VERSION
std::cout << "- read code file" << std::endl;
#endif
Bytefile *f = read_file(argv[2]);
if (do_print) {
#ifdef DEBUG_VERSION
std::cout << "- print code file" << std::endl;
#endif
print_file(*f, std::cout);
}
if (do_verification || do_interpretation) {
#ifdef DEBUG_VERSION
std::cout << "- init stack" << std::endl;
#endif
size_t stack[STACK_SIZE];
run_init(stack);
#ifdef DEBUG_VERSION
std::cout << "- run with imports" << std::endl;
#endif
f = run_with_imports(f, argc - 2, argv + 2, do_verification);
}

View file

@ -74,18 +74,9 @@ void set_argc_argv(int argc, char **argv) {
s_push(args_array);
// NOTE: V,sysargs from Std
*var_by_category(VAR_GLOBAL, 0) = args_array;
#ifdef DEBUG_VERSION
print_stack(&s);
printf("- state init done\n");
#endif
}
static inline void call_Bsexp(const char* name, size_t args_count) {
#ifdef DEBUG_VERSION
printf("tag hash is %i, n is %i\n", UNBOX(LtagHash((char *)name)),
args_count);
#endif
s_push((void *)LtagHash((char *)name));
s_rotate_n(args_count + 1);
@ -106,9 +97,6 @@ static inline void call_Barray(size_t elem_count) {
}
void call_builtin(uint builtin_id, uint args_count) {
#ifdef DEBUG_VERSION
printf("builtin id: %zu\n", builtin_id);
#endif
#ifndef WITH_CHECK
if (builtin_id >= BUILTIN_NONE) {
s_failure(&s, "invalid builtin");
@ -123,10 +111,6 @@ void call_builtin(uint builtin_id, uint args_count) {
}
void run_main(Bytefile* bf, int argc, char **argv) {
#ifdef DEBUG_VERSION
printf("--- init state ---\n");
#endif
prepare_state(bf, &s);
void *buffer[BUFFER_SIZE];
@ -438,12 +422,6 @@ void run_main(Bytefile* bf, int argc, char **argv) {
case CMD_CTRL_TAG: { // TAG %s %d
const char *name = ip_read_string(&s.ip);
aint args_count = ip_read_int(&s.ip);
#ifdef DEBUG_VERSION
printf("tag hash is %i, n is %i, peek is %i, unboxed: %li\n",
UNBOX(LtagHash((char *)name)), args_count, s_peek(&s), UNBOXED(s_peek(&s)));
#endif
s_push_i(Btag(s_pop(), LtagHash((char *)name), BOX(args_count)));
break;
}

View file

@ -1,5 +1,4 @@
#include <cstring>
#include <iostream>
extern "C" {
#include "interpreter.h"
#include "module_manager.h"
@ -17,9 +16,6 @@ extern "C" {
#include <unordered_map>
#include <vector>
static const constexpr char *GLOBAL_VAR_TAG = "global_";
static const size_t GLOBAL_VAR_TAG_LEN = std::strlen(GLOBAL_VAR_TAG);
template <size_t N, bool return_value, typename... Args>
requires(N == 0)
void call_func(void (*f)(), size_t n, Args... args) {
@ -65,6 +61,16 @@ struct Offsets {
};
void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
// rewrite publics
for (size_t i = 0; i < bytefile->public_symbols_number; ++i) {
set_public_name_offset_unsafe(get_public_name_offset_unsafe(bytefile, i) +
offsets.strings,
bytefile, i);
set_public_offset_unsafe(
get_public_offset_unsafe(bytefile, i) + offsets.code, bytefile, i);
}
// rewrite code
char *ip = bytefile->code_ptr;
while (ip - bytefile->code_ptr < bytefile->code_size) {
char *instr_ip = ip;
@ -143,10 +149,8 @@ void print_subst_to_bytes(BuiltinSubst subst, char **loc) {
using BuiltinSubstMap = std::map<BuiltinSubst,
/*generated builtin offset*/ size_t>;
// std::vector<size_t> /*subst offsets*/>;
// TODO: shared iteration over substs in functions
void add_subst_builtin_offsets(BuiltinSubstMap &subst_map, size_t code_offset,
void add_subst_builtin_offsets(BuiltinSubstMap &subst_map,
const Bytefile *bytefile) {
for (size_t i = 0; i < bytefile->substs_area_size; ++i) {
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
@ -173,7 +177,7 @@ void add_subst_builtin_offsets(BuiltinSubstMap &subst_map, size_t code_offset,
char *ip = bytefile->code_ptr + offset;
ip_read_int_unsafe(&ip); // read ptr placeholder
uint32_t args_count = ip_read_int_unsafe(&ip); // read args count
subst_map[{builtin, args_count}] = 0; // .push_back(offset + code_offset);
subst_map[{builtin, args_count}] = 0;
}
}
}
@ -237,20 +241,42 @@ void subst_in_code(Bytefile *bytefile,
}
}
Offsets initial_offsets() {
return {.strings = 0,
.globals = 1, // NOTE: V,sysargs from, Std
.code = 0,
.publics_num = 0};
}
Offsets calc_merge_sizes(const std::vector<Bytefile *> &bytefiles) {
Offsets sizes{.strings = 0,
.globals = 1, // NOTE: V,sysargs from, Std
.code = 0,
.publics_num = 0};
Offsets sizes = initial_offsets();
for (size_t i = 0; i < bytefiles.size(); ++i) {
sizes.strings += bytefiles[i]->stringtab_size;
sizes.globals += bytefiles[i]->global_area_size;
sizes.code += bytefiles[i]->code_size;
// sizes.publics_num += bytefiles[i]->public_symbols_number;
sizes.publics_num += bytefiles[i]->public_symbols_number;
}
return sizes;
}
void init_result_bytefile(Bytefile *result, const Offsets &sizes) {
result->code_size = sizes.code;
result->stringtab_size = sizes.strings;
result->global_area_size = sizes.globals;
result->substs_area_size = 0;
result->imports_number = 0;
result->public_symbols_number = sizes.publics_num;
result->main_offset = 0;
result->public_ptr = (int *)result->buffer;
result->string_ptr =
(char *)result->public_ptr + calc_publics_size(sizes.publics_num);
result->code_ptr = result->string_ptr + result->stringtab_size;
result->imports_ptr = NULL;
result->global_ptr = NULL;
result->substs_ptr = NULL;
}
struct MergeResult {
Bytefile *bf;
std::vector<size_t> main_offsets;
@ -262,12 +288,8 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
// find all builtin variations ad extract them
BuiltinSubstMap builtins_map;
{
size_t code_offset = 0;
for (size_t i = 0; i < bytefiles.size(); ++i) {
add_subst_builtin_offsets(builtins_map, code_offset, bytefiles[i]);
code_offset += bytefiles[i]->code_size;
}
for (const auto &bytefile : bytefiles) {
add_subst_builtin_offsets(builtins_map, bytefile);
}
auto [builtins_code, builtins_code_size] =
gen_builtins(sizes.code, builtins_map);
@ -275,17 +297,15 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
Bytefile *result =
(Bytefile *)malloc(sizeof(Bytefile) + sizes.strings + sizes.code +
public_symbols_size); // globals are on the stack
public_symbols_size); // NOTE: globals are on the stack
// collect publics
// TODO: add publics + updat name offsets too ?())
std::unordered_map<std::string, size_t> publics;
std::vector<size_t> main_offsets;
// NOTE: V,sysargs from, Std
publics.insert({"global_sysargs", 0});
{
// NOTE: V,sysargs from Std
publics.insert({"global_sysargs", 0});
size_t code_offset = 0;
size_t globals_offset = 1; // NOTE: V,sysargs from, Std
for (size_t i = 0; i < bytefiles.size(); ++i) {
@ -310,27 +330,10 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
}
// init result
result->code_size = sizes.code;
result->stringtab_size = sizes.strings;
result->global_area_size = sizes.globals;
result->substs_area_size = 0;
result->imports_number = 0;
result->public_symbols_number =
0; // sizes.publics_num; // TODO: correctly set and update publics
result->main_offset = 0; // TODO: save al main offsets in some way (?)
result->public_ptr = (int *)result->buffer;
result->string_ptr = (char *)result->public_ptr + public_symbols_size;
result->code_ptr = result->string_ptr + result->stringtab_size;
result->imports_ptr = NULL;
result->global_ptr = NULL;
result->substs_ptr = NULL;
init_result_bytefile(result, sizes);
// update & merge code segments
Offsets offsets{.strings = 0,
.globals = 1, // NOTE: V,sysargs from, Std
.code = 0,
.publics_num = 0};
Offsets offsets = initial_offsets();
for (size_t i = 0; i < bytefiles.size(); ++i) {
rewrite_code_with_offsets(bytefiles[i], offsets);
subst_in_code(bytefiles[i], publics, builtins_map);
@ -342,18 +345,15 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
bytefiles[i]->stringtab_size);
memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr,
bytefiles[i]->code_size);
// memcpy((char *)result->public_ptr + publics_offset,
// (char *)bytefiles[i]->public_ptr,
// calc_publics_size(
// bytefiles[i]->public_symbols_number)); // TODO: recalc
// publics:
// // offsets, strings
memcpy((char *)result->public_ptr + publics_offset,
(char *)bytefiles[i]->public_ptr,
calc_publics_size(bytefiles[i]->public_symbols_number));
// update offsets
offsets.strings += bytefiles[i]->stringtab_size;
offsets.globals += bytefiles[i]->global_area_size;
offsets.code += bytefiles[i]->code_size;
// offsets.publics_num += bytefiles[i]->public_symbols_number;
offsets.publics_num += bytefiles[i]->public_symbols_number;
free(bytefiles[i]);
}
@ -375,7 +375,7 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
// ---
Bytefile *path_mod_load(const char *name, std::filesystem::path &&path) {
Bytefile *path_mod_load(std::filesystem::path &&path) {
return read_file(path.c_str());
}
@ -388,12 +388,12 @@ Bytefile *mod_load(const char *name) {
std::string full_name = std::string{name} + ".bc";
if (std::filesystem::exists(full_name)) {
return path_mod_load(name, full_name);
return path_mod_load(full_name);
}
for (const auto &dir_path : search_paths) {
auto path = dir_path / full_name;
if (std::filesystem::exists(path)) {
return path_mod_load(name, std::move(path));
return path_mod_load(std::move(path));
}
}
@ -405,16 +405,10 @@ Bytefile *mod_load(const char *name) {
void mod_load_rec(Bytefile *mod,
std::unordered_map<std::string, Bytefile *> &loaded,
std::vector<Bytefile *> &loaded_ord) {
#ifdef DEBUG_VERSION
printf("- run mod rec, %i imports\n", mod->imports_number);
#endif
for (size_t i = 0; i < mod->imports_number; ++i) {
const char *import_str = get_import_safe(mod, i);
if (loaded.count(import_str) == 0 &&
strcmp(import_str, "Std") != 0) { // not loaded
#ifdef DEBUG_VERSION
printf("- mod load <%s>\n", import_str);
#endif
Bytefile *import_mod = mod_load(import_str);
if (import_mod == NULL) {
failure("module <%s> not found\n", import_str);
@ -434,13 +428,7 @@ MergeResult load_with_imports(Bytefile *root, bool do_verification) {
MergeResult result = merge_files(std::move(loaded_ord));
if (do_verification) {
#ifdef DEBUG_VERSION
printf("main offsets count: %zu\n", result.main_offsets.size());
#endif
analyze(result.bf, std::move(result.main_offsets));
#ifdef DEBUG_VERSION
std::cout << "verification done" << std::endl;
#endif
}
return result;
}
@ -540,9 +528,7 @@ BUILTIN id_by_builtin(const char *name) {
special assemply functions in `printf.S`. We additionally pass them amount
of arguments to unbox using register r11. */
void run_stdlib_func(BUILTIN id, size_t args_count) {
// std::cout << "RUN BUILTIN: " << id << '\n'; // TODO: TMP
void *ret = NULL;
// TODO: deal with right pointers, etc.
switch (id) {
case BUILTIN_Luppercase:
ret = (void *)Luppercase(*s_nth(0));

View file

@ -31,7 +31,6 @@ void init_state(struct State* s, void** stack) {
__gc_stack_top = __gc_stack_bottom;
#ifdef DEBUG_VERSION
print_stack(s);
printf("- state init done\n");
#endif
}
@ -52,7 +51,6 @@ void prepare_state(Bytefile* bf, struct State* s) {
s->instr_ip = s->ip;
#ifdef DEBUG_VERSION
print_stack(s);
printf("- mod state init done\n");
#endif
}
@ -62,7 +60,6 @@ void push_globals(struct State *s) {
s_pushn_nil(s->bf->global_area_size);
#ifdef DEBUG_VERSION
print_stack(s);
printf("- state globals init done\n");
#endif
}