modules: 'rebase' from byterun_with_modules, initial impl, without verification

This commit is contained in:
ProgramSnail 2025-01-08 23:52:39 +03:00
parent 73d3fbc388
commit eb1ddfa447
14 changed files with 420 additions and 115 deletions

View file

@ -315,6 +315,9 @@ void analyze(Bytefile *bf) {
break;
case Cmd::LINE:
break;
case Cmd::CALLF: // FIXME TODO
ip_failure(saved_current_ip, bf, "CALLF analysis is not implemented yet");
break;
case Cmd::PATT:
--current_stack_depth;
if (l == CMD_PATT_STR) {

View file

@ -6,7 +6,9 @@
extern "C" {
#include "../../runtime/runtime.h"
#include "interpreter.h"
#include "module_manager.h"
#include "parser.h"
#include "types.h"
#include "utils.h"
}
@ -40,9 +42,6 @@ int main(int argc, char **argv) {
}
Bytefile *f = read_file(argv[2]);
#ifdef DEBUG_VERSION
dump_file(stdout, f);
#endif
if (do_print) {
print_file(*f, std::cout);
}
@ -50,12 +49,19 @@ int main(int argc, char **argv) {
analyze(f);
}
if (do_interpretation) {
run(f, argc - 2, argv + 2);
// TODO FIXME: add all loaded modules verification
size_t stack[STACK_SIZE];
run_init(stack);
uint main_mod_id = mod_add(f);
run_mod_rec(main_mod_id, argc - 1, argv + 1);
}
// TODO: remove, before modules
// // NOTE: not used for now
// // free(f->global_ptr);
free(f);
// free(f);
return 0;
}

View file

@ -3,6 +3,7 @@
#include "../../runtime/gc.h"
#include "../../runtime/runtime.h"
#include "module_manager.h"
#include "runtime_externs.h"
#include "stack.h"
#include "types.h"
@ -53,17 +54,11 @@ static inline const char *ip_read_string(char **ip) {
const size_t BUFFER_SIZE = 1000;
void run(Bytefile *bf, int argc, char **argv) {
size_t stack[STACK_SIZE];
void *buffer[BUFFER_SIZE];
construct_state(bf, &s, (void **)stack);
void run_init(size_t *stack) {
init_state(&s, (void**)stack);
}
#ifdef DEBUG_VERSION
printf("--- interpreter run ---\n");
#endif
// argc, argv
{
void run_prepare_exec(int argc, char **argv) {
s_push_i(BOX(argc));
for (size_t i = 0; i < argc; ++i) {
s_push(Bstring((aint *)&argv[argc - i - 1]));
@ -72,21 +67,50 @@ void run(Bytefile *bf, int argc, char **argv) {
void *argv_elem = s_pop();
s_popn(argc);
s_push(argv_elem);
}
// TODO: use unsafe, move checks to verifier (?)
void run_mod_rec(uint mod_id, int argc, char **argv) {
Bytefile* mod = mod_get(mod_id); // TODO: pass as param ??
for (size_t i = 0; i < mod->imports_number; ++i) {
if (find_mod_loaded(get_import_safe(mod, i)) < 0 && strcmp(get_import_safe(mod, i), "Std") != 0) { // not loaded
int32_t import_mod = mod_load(get_import_safe(mod, i));
if (import_mod < 0) {
failure("module %s not found\n", get_import_safe(mod, i));
}
run_mod_rec(mod_id, argc, argv);
}
}
init_mod_state(mod_id, &s);
init_mod_state_globals(&s);
run_prepare_exec(argc, argv); // args for module main
run_mod(mod_id, argc, argv);
}
void run_mod(uint mod_id, int argc, char **argv) {
#ifdef DEBUG_VERSION
printf("- loop start\n");
printf("--- module init state ---\n");
#endif
init_mod_state(mod_id, &s);
void *buffer[BUFFER_SIZE];
#ifdef DEBUG_VERSION
printf("--- module run begin ---\n");
#endif
do {
bool call_happened = false;
#ifndef WITH_CHECK
if (s.ip >= bf->code_ptr + bf->code_size) {
if (s.ip >= s.bf->code_ptr + s.bf->code_size) {
s_failure(&s, "instruction pointer is out of range (>= size)");
}
if (s.ip < bf->code_ptr) {
if (s.ip < s.bf->code_ptr) {
s_failure(&s, "instruction pointer is out of range (< 0)");
}
#endif
@ -94,9 +118,9 @@ void run(Bytefile *bf, int argc, char **argv) {
s.instr_ip = s.ip;
uint8_t x = ip_read_byte(&s.ip), h = (x & 0xF0) >> 4, l = x & 0x0F;
// #ifdef DEBUG_VERSION
printf("0x%.8x: %s\n", s.ip - bf->code_ptr - 1, read_cmd(s.ip - 1, s.bf));
// #endif
#ifdef DEBUG_VERSION
printf("0x%.8x: %s\n", s.ip - s.bf->code_ptr - 1, read_cmd(s.ip - 1, s.bf));
#endif
switch (h) {
case CMD_EXIT:
@ -193,11 +217,11 @@ void run(Bytefile *bf, int argc, char **argv) {
uint jmp_p = ip_read_int(&s.ip);
#ifndef WITH_CHECK
if (jmp_p >= bf->code_size) {
if (jmp_p >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
#endif
s.ip = bf->code_ptr + jmp_p;
s.ip = s.bf->code_ptr + jmp_p;
break;
}
@ -270,12 +294,12 @@ void run(Bytefile *bf, int argc, char **argv) {
uint jmp_p = ip_read_int(&s.ip);
#ifndef WITH_CHECK
if (jmp_p >= bf->code_size) {
if (jmp_p >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
#endif
if (UNBOX(s_pop_i()) == 0) {
s.ip = bf->code_ptr + jmp_p;
s.ip = s.bf->code_ptr + jmp_p;
}
break;
}
@ -284,12 +308,12 @@ void run(Bytefile *bf, int argc, char **argv) {
uint jmp_p = ip_read_int(&s.ip);
#ifndef WITH_CHECK
if (jmp_p >= bf->code_size) {
if (jmp_p >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
#endif
if (UNBOX(s_pop_i()) != 0) {
s.ip = bf->code_ptr + jmp_p;
s.ip = s.bf->code_ptr + jmp_p;
}
break;
}
@ -307,8 +331,8 @@ void run(Bytefile *bf, int argc, char **argv) {
s_failure(&s, "begin should only be called after call");
}
#endif
s_enter_f(s.call_ip /*ip from call*/, s.is_closure_call, args_sz,
locals_sz);
s_enter_f(s.call_ip /*ip from call*/, s.call_module_id,
s.is_closure_call, args_sz, locals_sz);
#ifndef WITH_CHECK
if ((void **)__gc_stack_top + (aint)max_additional_stack_sz - 1 <= s.stack) {
s_failure(&s, "stack owerflow");
@ -331,8 +355,8 @@ void run(Bytefile *bf, int argc, char **argv) {
s_failure(&s, "begin should only be called after call");
}
#endif
s_enter_f(s.call_ip /*ip from call*/, s.is_closure_call, args_sz,
locals_sz);
s_enter_f(s.call_ip /*ip from call*/, s.call_module_id,
s.is_closure_call, args_sz, locals_sz);
#ifdef WITH_CHECK
if ((void **)__gc_stack_top + (aint)max_additional_stack_sz - 1 <= s.stack) {
s_failure(&s, "stack owerflow");
@ -354,11 +378,11 @@ void run(Bytefile *bf, int argc, char **argv) {
s_push(*var_ptr);
}
#ifndef WITH_CHECK
if (call_offset >= bf->code_size) {
if (call_offset >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
#endif
s_push(bf->code_ptr + call_offset);
s_push(s.bf->code_ptr + call_offset);
void *closure = Bclosure((aint *)__gc_stack_top, BOX(args_count));
// printf("args is %li, count is %li\n", args_count, get_len(TO_DATA(closure)));
@ -374,6 +398,7 @@ void run(Bytefile *bf, int argc, char **argv) {
call_happened = true;
s.is_closure_call = true;
s.call_ip = s.ip;
s.call_module_id = s.current_module_id;
s.ip = (char*)Belem(*s_nth(args_count), BOX(0)); // use offset instead ??
break;
@ -386,13 +411,14 @@ void run(Bytefile *bf, int argc, char **argv) {
call_happened = true;
s.is_closure_call = false;
s.call_ip = s.ip;
s.call_module_id = s.current_module_id;
#ifndef WITH_CHECK
if (call_p >= bf->code_size) {
if (call_p >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
#endif
s.ip = bf->code_ptr + call_p;
s.ip = s.bf->code_ptr + call_p;
break;
}
@ -425,6 +451,31 @@ void run(Bytefile *bf, int argc, char **argv) {
// maybe some metainfo should be collected
break;
case CMD_CTRL_CALLF: { // CALLF %s %d // call external function
const char *call_func_name = ip_read_string(&s.ip);
ip_read_int(&s.ip); // args count
// TODO: jump to other module, save ret module
struct ModSearchResult func = mod_search_pub_symbol(call_func_name);
if (func.mod_file == NULL) {
s_failure(&s, "external function not found");
}
call_happened = true;
s.is_closure_call = false;
s.call_ip = s.ip;
s.call_module_id = s.current_module_id;
s.current_module_id = func.mod_id;
s.bf = func.mod_file;
if (func.symbol_offset >= s.bf->code_size) {
s_failure(&s, "jump out of file");
}
s.ip = s.bf->code_ptr + func.symbol_offset;
break;
}
default:
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
}
@ -493,7 +544,7 @@ void run(Bytefile *bf, int argc, char **argv) {
// s_rotate_n(elem_count);
void *array =
Barray((aint *)opr_buffer,
BOX(elem_count)); // NOTE: not shure if elems should be
BOX(elem_count)); // NOTE: not sure if elems should be
// added
// void *array = Barray((aint *)s_peek(), BOX(elem_count));
@ -513,6 +564,7 @@ void run(Bytefile *bf, int argc, char **argv) {
if (!call_happened) {
s.is_closure_call = false;
s.call_ip = NULL;
s.call_module_id = 0;
}
if (s.fp == NULL) {
@ -524,7 +576,8 @@ void run(Bytefile *bf, int argc, char **argv) {
} while (1);
stop:;
#ifdef DEBUG_VERSION
printf("--- run end ---\n");
printf("--- module run end ---\n");
#endif
cleanup_state(&s);
}
void run_cleanup() { cleanup_state(&s); }

View file

@ -0,0 +1,113 @@
extern "C" {
#include "module_manager.h"
#include "utils.h"
}
#include "parser.hpp"
#include <filesystem>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
struct ModSymbolPos {
uint32_t mod_id;
size_t offset;
};
struct ModuleManager {
std::unordered_map<std::string, uint32_t> loaded_modules;
std::unordered_map<std::string, ModSymbolPos> public_symbols_mods;
std::vector<Bytefile *> modules;
std::vector<std::filesystem::path> search_paths;
};
static ModuleManager manager;
uint32_t mod_add_impl(Bytefile *module,
std::optional<const char *> name = std::nullopt) {
uint32_t id = manager.modules.size();
manager.modules.push_back(module);
for (size_t i = 0; i < module->public_symbols_number; ++i) {
if (!manager.public_symbols_mods
.insert({
get_public_name_safe(module, i),
{.mod_id = id, .offset = get_public_offset_safe(module, i)},
})
.second) {
failure("public symbol loaded more then once\n");
}
}
if (name) {
manager.loaded_modules.insert({*name, id});
}
return id;
}
uint32_t path_mod_load(const char *name, std::filesystem::path &&path) {
Bytefile *module = read_file(path.c_str());
return mod_add_impl(module, name);
}
extern "C" {
void mod_add_search_path(const char *path) {
manager.search_paths.emplace_back(path);
}
Bytefile *mod_get(uint32_t id) {
if (id > manager.modules.size()) {
failure("module id is out of range\n");
}
return manager.modules[id];
}
int32_t find_mod_loaded(const char *name) {
auto it = manager.loaded_modules.find(name);
// module already loaded
if (it != manager.loaded_modules.end()) {
return it->second;
}
return -1;
}
int32_t mod_load(const char *name) {
std::string full_name = std::string{name} + ".bc";
auto it = manager.loaded_modules.find(name);
// module already loaded
if (it != manager.loaded_modules.end()) {
return it->second;
}
if (std::filesystem::exists(full_name)) {
return path_mod_load(name, full_name);
}
for (const auto &dir_path : manager.search_paths) {
auto path = dir_path / full_name;
if (std::filesystem::exists(path)) {
return path_mod_load(name, std::move(path));
}
}
return -1;
}
uint32_t mod_add(Bytefile *module) { return mod_add_impl(module); }
ModSearchResult mod_search_pub_symbol(const char *name) {
auto it = manager.public_symbols_mods.find(name);
if (it == manager.public_symbols_mods.end()) {
return {.symbol_offset = 0, .mod_id = 0, .mod_file = NULL};
}
return {
.symbol_offset = it->second.offset,
.mod_id = it->second.mod_id,
.mod_file = mod_get(it->second.mod_id),
};
}
}

View file

@ -52,7 +52,7 @@ Bytefile *read_file(const char *fname) {
}
long size = ftell(f);
long additional_size = sizeof(void *) * 4 + sizeof(int);
long additional_size = sizeof(void *) * 5 + sizeof(int);
file = (Bytefile *)malloc(size +
additional_size); // file itself + additional data
@ -71,10 +71,14 @@ Bytefile *read_file(const char *fname) {
fclose(f);
long imports_size = file->imports_number * sizeof(int);
long public_symbols_size = file->public_symbols_number * 2 * sizeof(int);
if (file->buffer + public_symbols_size >= file_end) {
long strings_buffer_offset = public_symbols_size + imports_size;
if (file->buffer + strings_buffer_offset >= file_end) {
failure("public symbols are out of the file size\n");
}
file->string_ptr =
&file->buffer[strings_buffer_offset]; // TODO: check that should be there
if (file->string_ptr + file->stringtab_size > file_end) {
failure("strings table is out of the file size\n");
}
@ -87,14 +91,13 @@ Bytefile *read_file(const char *fname) {
failure("file zones sizes should be >= 0\n");
}
file->string_ptr = &file->buffer[public_symbols_size];
file->public_ptr = (int *)file->buffer;
file->code_ptr = &file->string_ptr[file->stringtab_size];
// NOTE: not used for now
// file->global_ptr = (int *)calloc(file->global_area_size, sizeof(int));
file->global_ptr = nullptr;
file->imports_ptr = (int *)file->buffer;
file->public_ptr = (int *)(file->buffer + imports_size);
// is allocated on module run on stack
file->global_ptr = NULL;
// file->global_ptr = (int*) calloc (file->global_area_size, sizeof (int));
file->code_size = size - public_symbols_size - file->stringtab_size;
file->code_size = size - strings_buffer_offset - file->stringtab_size;
return file;
}
@ -202,6 +205,8 @@ const char *command_name(Cmd cmd, int8_t l) {
return "FAIL";
case Cmd::LINE:
return "LINE";
case Cmd::CALLF:
return "CALLF";
case Cmd::PATT:
if (l >= sizeof(pats) / sizeof(char *)) {
return "_UNDEF_PATT_";
@ -520,6 +525,11 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
out);
break;
case CMD_CTRL_CALLF: // CALLF %s %d
cmd = Cmd::CALLF;
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::STR, ArgT::INT>(
cmd, l, ip, bf, out);
break;
default:
failure("invalid opcode");
@ -591,12 +601,32 @@ bool is_command_name(char *ip, const Bytefile *bf, Cmd cmd) {
return parse_command_name(&ip, bf).first == cmd;
}
void print_file(const Bytefile &bf, std::ostream &out) {
void print_file_info(const Bytefile &bf, std::ostream &out) {
out << "String table size : " << bf.stringtab_size << '\n';
out << "Global area size : " << bf.global_area_size << '\n';
out << "Number of imports : " << bf.imports_number << '\n';
out << "Number of public symbols: " << bf.public_symbols_number << '\n';
out << "Imports :\n";
for (size_t i = 0; i < bf.imports_number; i++) {
out << " %s\n" << get_import_safe(&bf, i);
}
out << "Public symbols :\n";
for (size_t i = 0; i < bf.public_symbols_number; i++) {
out << " " << std::setfill('0') << std::setw(8) << std::hex
<< get_public_offset_safe(&bf, i) << ": " << std::dec
<< get_public_name_safe(&bf, i);
}
}
void print_file_code(const Bytefile &bf, std::ostream &out) {
char *ip = bf.code_ptr;
while (true) {
out << std::setfill('0') << std::setw(8) << std::hex << ip - bf.code_ptr
<< ": " << std::dec;
out << " " << std::setfill('0') << std::setw(8) << std::hex
<< ip - bf.code_ptr << ": " << std::dec;
const auto [cmd, l] = parse_command(&ip, &bf, out);
out << '\n';
@ -606,6 +636,14 @@ void print_file(const Bytefile &bf, std::ostream &out) {
}
}
void print_file(const Bytefile &bf, std::ostream &out) {
print_file_info(bf, out);
out << "Code:\n";
print_file_code(bf, out);
}
extern "C" {
const char *read_cmd(char *ip, const Bytefile *bf) {
const auto [cmd, l] = parse_command_impl<false, false>(&ip, *bf, std::clog);

View file

@ -11,12 +11,14 @@ extern size_t __gc_stack_top, __gc_stack_bottom;
// --- State ---
static void init_state(Bytefile *bf, struct State* s, void** stack) {
void init_state(struct State* s, void** stack) {
s->stack = stack;
s->bf = bf;
s->bf = NULL;
s->is_closure_call = false;
s->ip = bf->code_ptr;
s->instr_ip = bf->code_ptr;
s->current_module_id = 0;
s->call_module_id = 0;
s->ip = s->bf->code_ptr;
s->instr_ip = s->bf->code_ptr;
s->call_ip = NULL;
s->current_line = 0;
@ -24,26 +26,48 @@ static void init_state(Bytefile *bf, struct State* s, void** stack) {
s->stack[i] = NULL;
}
// printf("%p:%zu - %zu", s->stack, (size_t)s->stack, (size_t)s->stack & 0xF);
// s->sp = s->stack + STACK_SIZE; // [top -> bottom] stack
s->fp = NULL;
}
void construct_state(Bytefile *bf, struct State* s, void** stack) {
__init();
init_state(bf, s, stack);
__gc_stack_bottom = (size_t)(s->stack + STACK_SIZE);
__gc_stack_top = __gc_stack_bottom;
s_pushn_nil(bf->global_area_size);
#ifdef DEBUG_VERSION
print_stack(s);
printf("- state init done\n");
#endif
}
void init_mod_state(uint mod_id, struct State* s) {
// init module data
s->bf = mod_get(mod_id);
s->current_module_id = mod_id;
// clearup from previous executions
s->is_closure_call = false;
s->current_module_id = 0;
s->call_module_id = 0;
s->call_ip = NULL;
s->current_line = 0;
s->fp = NULL;
#ifdef DEBUG_VERSION
print_stack(s);
printf("- mod state init done\n");
#endif
}
void init_mod_state_globals(struct State *s) {
s_pushn_nil(s->bf->global_area_size);
s->bf->global_ptr = (void*)__gc_stack_top;
#ifdef DEBUG_VERSION
print_stack(s);
printf("- state globals init done\n");
#endif
}
static void destruct_state(struct State* state) {
// free(state->stack);