From 19c99bc7e584b6cf06aa370288a9c656f980e0ad Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Sat, 11 Jan 2025 23:51:50 +0300 Subject: [PATCH] modules: fixes to working condition --- byterun/include/analyzer.hpp | 2 +- byterun/include/interpreter.h | 3 +- byterun/include/module_manager.h | 7 +- byterun/include/stack.h | 10 +-- byterun/include/types.h | 17 +++-- byterun/include/utils.h | 8 +-- byterun/performance_check.sh | 22 +++++-- byterun/regression_check.sh | 2 +- byterun/src/analyzer.cpp | 109 ++++++++++++++++++------------- byterun/src/cli.cpp | 37 +++++++---- byterun/src/compiler.cpp | 1 + byterun/src/interpreter.c | 21 ++++-- byterun/src/module_manager.cpp | 43 ++++++++---- byterun/src/parser.cpp | 22 +++---- byterun/src/types.c | 9 ++- src/SM.ml | 5 +- 16 files changed, 197 insertions(+), 121 deletions(-) create mode 100644 byterun/src/compiler.cpp diff --git a/byterun/include/analyzer.hpp b/byterun/include/analyzer.hpp index 5f9049f1b..9a2b533a5 100644 --- a/byterun/include/analyzer.hpp +++ b/byterun/include/analyzer.hpp @@ -4,4 +4,4 @@ extern "C" { #include "utils.h" } -void analyze(Bytefile *bf); +void analyze(uint32_t mod_id); diff --git a/byterun/include/interpreter.h b/byterun/include/interpreter.h index a3b934239..d270b9f72 100644 --- a/byterun/include/interpreter.h +++ b/byterun/include/interpreter.h @@ -1,12 +1,13 @@ #pragma once #include "utils.h" +#include // void run(Bytefile *bf, int argc, char **argv); void run_init(size_t *stack); -void run_mod_rec(uint mod_id, int argc, char **argv); +void run_mod_rec(uint mod_id, int argc, char **argv, bool do_verification); void run_prepare_exec(int argc, char **argv); diff --git a/byterun/include/module_manager.h b/byterun/include/module_manager.h index f53be3333..b76b2518c 100644 --- a/byterun/include/module_manager.h +++ b/byterun/include/module_manager.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "utils.h" @@ -12,12 +13,14 @@ struct ModSearchResult { void mod_add_search_path(const char *path); +const char *mod_get_name(uint32_t id); + Bytefile *mod_get(uint32_t id); int32_t find_mod_loaded(const char *name); // < 0 => not found -int32_t mod_load(const char *name); // < 0 => not found +int32_t mod_load(const char *name, bool do_verification); // < 0 => not found -uint32_t mod_add(Bytefile *module); +uint32_t mod_add(Bytefile *module, bool do_verification); struct ModSearchResult mod_search_pub_symbol(const char *name); diff --git a/byterun/include/stack.h b/byterun/include/stack.h index 2c541b9b9..4bf965ece 100644 --- a/byterun/include/stack.h +++ b/byterun/include/stack.h @@ -162,7 +162,6 @@ static inline void s_enter_f(char *rp, aint ret_module_id, bool is_closure_call, printf("-> %i locals sz\n", locals_sz); #endif - // TODO: move checks to BEGIN/CBEGIN // check that params count is valid if ((void **)__gc_stack_top + (aint)args_sz - (is_closure_call ? 0 : 1) >= s_top()) { @@ -231,12 +230,13 @@ static inline void s_exit_f() { s.current_module_id = UNBOX(frame.ret_module_box); s.bf = mod_get(s.current_module_id); - // TODO: return to ret_module } -static inline void print_stack() { - printf("stack (%i) is\n[", s.stack + STACK_SIZE - (void **)__gc_stack_top); - for (void **x = s.stack + STACK_SIZE - 1; x >= (void **)__gc_stack_top; --x) { +static inline void print_stack(struct State *state) { + printf("stack (%li) is\n[", + state->stack + STACK_SIZE - (void **)__gc_stack_top); + for (void **x = state->stack + STACK_SIZE - 1; x >= (void **)__gc_stack_top; + --x) { printf("%li ", (long)UNBOX(*x)); } printf("]\n"); diff --git a/byterun/include/types.h b/byterun/include/types.h index 43d8b4dd7..1d89d0ff3 100644 --- a/byterun/include/types.h +++ b/byterun/include/types.h @@ -2,6 +2,7 @@ #include "../../runtime/runtime.h" #include "../../runtime/runtime_common.h" +#include "module_manager.h" #include "parser.h" #include #include @@ -74,18 +75,20 @@ void init_mod_state(uint mod_id, struct State *s); void init_mod_state_globals(struct State *s); void cleanup_state(struct State *state); -// TODO: print current mod id static inline void s_failure(struct State *s, const char *msg) { - exec_failure(read_cmd(s->instr_ip, s->bf), s->current_line, - s->instr_ip - s->bf->code_ptr, msg); + exec_failure(read_cmd(s->instr_ip, s->bf), mod_get_name(s->current_module_id), + s->current_line, s->instr_ip - s->bf->code_ptr, msg); } -static inline void ip_failure(char *ip, Bytefile *bf, const char *msg) { - exec_failure(read_cmd(ip, bf), 0, ip - bf->code_ptr, msg); +static inline void ip_failure(char *ip, uint32_t mod_id, const char *msg) { + Bytefile *bf = mod_get(mod_id); + exec_failure(read_cmd(ip, bf), mod_get_name(mod_id), 0, ip - bf->code_ptr, + msg); } -static inline void ip_safe_failure(char *ip, Bytefile *bf, const char *msg) { - exec_failure("_UNDEF_", 0, ip - bf->code_ptr, msg); +static inline void ip_safe_failure(char *ip, uint32_t mod_id, const char *msg) { + Bytefile *bf = mod_get(mod_id); + exec_failure("_UNDEF_", mod_get_name(mod_id), 0, ip - bf->code_ptr, msg); } // ------ VarCategory ------ diff --git a/byterun/include/utils.h b/byterun/include/utils.h index 82ab95dd2..bb951075d 100644 --- a/byterun/include/utils.h +++ b/byterun/include/utils.h @@ -22,10 +22,10 @@ typedef struct { char buffer[0]; } Bytefile; -static inline void exec_failure(const char *cmd, int line, aint offset, - const char *msg) { - failure("*** RUNTIME ERROR: %i(0x%.8x):%s error: %s\n", line, offset, cmd, - msg); +static inline void exec_failure(const char *cmd, const char *module_name, + int line, aint offset, const char *msg) { + failure("*** RUNTIME ERROR: %s:%i(0x%.8x):%s error: %s\n", module_name, line, + offset, cmd, msg); } // --- unsafe versions diff --git a/byterun/performance_check.sh b/byterun/performance_check.sh index 2305f9312..38e0ef265 100755 --- a/byterun/performance_check.sh +++ b/byterun/performance_check.sh @@ -2,19 +2,26 @@ dune build > /dev/null -echo "Interpreter:" -time echo '0' | lamac -i ../performance/Sort.lama > /dev/null +compiler=../_build/default/src/Driver.exe + +echo "Used compiler path:" +echo $compiler + +# echo "Interpreter:" +# time echo '0' | $compiler -i ../performance/Sort.lama echo "Stack Machine:" -time echo '0' | lamac -s ../performance/Sort.lama > /dev/null +time echo '0' | $compiler -s ../performance/Sort.lama > /dev/null - -lamac -b ../performance/Sort.lama > /dev/null +$compiler -b ../performance/Sort.lama # ./byterun.exe -p Sort.bc -echo "Old Byterun:" -time ./old_byterun.exe -i Sort.bc > /dev/null +# echo "Old Byterun:" +# time ./old_byterun.exe -i Sort.bc > /dev/null + +echo "Code:" +time ./byterun.exe -p Sort.bc > /dev/null echo "Byterun:" time ./byterun.exe -vi Sort.bc > /dev/null @@ -27,3 +34,4 @@ time ./byterun.exe -i Sort.bc > /dev/null rm Sort.* rm *.o +rm *.a diff --git a/byterun/regression_check.sh b/byterun/regression_check.sh index a7dd02d4c..f7c8f9da1 100755 --- a/byterun/regression_check.sh +++ b/byterun/regression_check.sh @@ -7,7 +7,7 @@ suffix=".lama" for test in ../regression/*.lama; do echo $test - lamac -b $test > /dev/null + ../_build/default/src/Driver.exe -b $test > /dev/null test_file="${test%.*}" echo $test_file cat $test_file.input | ./byterun.exe -vi test*.bc > /dev/null diff --git a/byterun/src/analyzer.cpp b/byterun/src/analyzer.cpp index c7fa7d934..98ed3ae35 100644 --- a/byterun/src/analyzer.cpp +++ b/byterun/src/analyzer.cpp @@ -9,7 +9,9 @@ extern "C" { #include -void analyze(Bytefile *bf) { +void analyze(uint32_t mod_id) { + Bytefile *bf = mod_get(mod_id); + static constexpr const int NOT_VISITED = -1; std::vector visited(bf->code_size, NOT_VISITED); // store stack depth @@ -28,56 +30,58 @@ void analyze(Bytefile *bf) { char *current_ip = ip; char *saved_current_ip = current_ip; - auto const jmp_to_visit_push = [&saved_current_ip, &bf, &visited, + auto const jmp_to_visit_push = [&saved_current_ip, mod_id, &visited, ¤t_stack_depth, &to_visit_jmp](size_t offset) { if (visited[offset] == NOT_VISITED) { visited[offset] = current_stack_depth; to_visit_jmp.push_back(offset); } else if (visited[offset] != current_stack_depth) { - ip_failure(saved_current_ip, bf, + ip_failure(saved_current_ip, mod_id, "different stack depth on same point is not allowed"); } }; - auto const func_to_visit_push = [&saved_current_ip, &bf, &visited, + auto const func_to_visit_push = [&saved_current_ip, mod_id, &visited, ¤t_stack_depth, &to_visit_func](size_t offset) { if (visited[offset] == NOT_VISITED) { visited[offset] = 0; to_visit_func.push_back(offset); } else if (visited[offset] != 0) { - ip_failure(saved_current_ip, bf, + ip_failure(saved_current_ip, mod_id, "different stack depth on same point is not allowed"); } }; - auto const check_correct_var = [&saved_current_ip, &bf, &globals_count, + auto const check_correct_var = [&saved_current_ip, mod_id, &globals_count, ¤t_locals_count, ¤t_args_count, &is_in_closure](uint8_t l, uint id) { if (l > 3) { - ip_failure(saved_current_ip, bf, "unexpected variable category"); + ip_failure(saved_current_ip, mod_id, "unexpected variable category"); } VarCategory category = to_var_category(l); switch (category) { case VAR_GLOBAL: if (id >= globals_count) { - ip_failure(saved_current_ip, bf, "global var index is out of range"); + ip_failure(saved_current_ip, mod_id, + "global var index is out of range"); } break; case VAR_LOCAL: if (id >= current_locals_count) { - ip_failure(saved_current_ip, bf, "local var index is out of range"); + ip_failure(saved_current_ip, mod_id, "local var index is out of range"); } break; case VAR_ARGUMENT: if (id >= current_args_count) { - ip_failure(saved_current_ip, bf, "argument var index is out of range"); + ip_failure(saved_current_ip, mod_id, + "argument var index is out of range"); } break; case VAR_CLOSURE: if (!is_in_closure) { - ip_failure(saved_current_ip, bf, + ip_failure(saved_current_ip, mod_id, "can't access closure vars outside of closure"); } // NOTE: impossible to properly check bounds there @@ -118,11 +122,12 @@ void analyze(Bytefile *bf) { } if (ip >= bf->code_ptr + bf->code_size) { - ip_safe_failure(ip, bf, "instruction pointer is out of range (>= size)"); + ip_safe_failure(ip, mod_id, + "instruction pointer is out of range (>= size)"); } if (ip < bf->code_ptr) { - ip_safe_failure(ip, bf, "instruction pointer is out of range (< 0)"); + ip_safe_failure(ip, mod_id, "instruction pointer is out of range (< 0)"); } current_ip = ip; @@ -136,11 +141,12 @@ void analyze(Bytefile *bf) { if (current_begin_counter == nullptr && cmd != Cmd::BEGIN && cmd != Cmd::CBEGIN) { - ip_failure(saved_current_ip, bf, "function does not start with begin"); + ip_failure(saved_current_ip, mod_id, + "function does not start with begin"); } if (visited[current_ip - bf->code_ptr] == NOT_VISITED) { - ip_failure(saved_current_ip, bf, "not visited command"); + ip_failure(saved_current_ip, mod_id, "not visited command"); } current_stack_depth = visited[current_ip - bf->code_ptr]; @@ -156,7 +162,7 @@ void analyze(Bytefile *bf) { case Cmd::BINOP: current_stack_depth -= 2; if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; @@ -170,21 +176,21 @@ void analyze(Bytefile *bf) { ip_read_string_unsafe(¤t_ip, bf); current_stack_depth -= ip_read_int_unsafe(¤t_ip); if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; case Cmd::STI: current_stack_depth -= 2; if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; case Cmd::STA: current_stack_depth -= 3; if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; @@ -202,19 +208,19 @@ void analyze(Bytefile *bf) { break; case Cmd::DUP: if (current_stack_depth < 1) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; case Cmd::SWAP: if (current_stack_depth < 2) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } break; case Cmd::ELEM: current_stack_depth -= 2; if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; @@ -229,7 +235,7 @@ void analyze(Bytefile *bf) { case Cmd::ST: check_correct_var(l, ip_read_int_unsafe(¤t_ip)); if (current_stack_depth < 1) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } break; case Cmd::CJMPz: @@ -239,13 +245,13 @@ void analyze(Bytefile *bf) { case Cmd::BEGIN: case Cmd::CBEGIN: if (current_begin_counter != nullptr) { - ip_failure(saved_current_ip, bf, "unexpected function beginning"); + ip_failure(saved_current_ip, mod_id, "unexpected function beginning"); } current_args_count = ip_read_int_unsafe(¤t_ip); current_begin_counter = (uint16_t *)(current_ip + sizeof(uint16_t)); current_locals_count = ip_read_int_unsafe(¤t_ip); if (current_locals_count >= std::numeric_limits::max()) { - ip_failure(saved_current_ip, bf, "too many locals in functions"); + ip_failure(saved_current_ip, mod_id, "too many locals in functions"); } (*(uint16_t *)(current_ip - sizeof(uint16_t))) = current_locals_count; *current_begin_counter = 0; @@ -263,19 +269,20 @@ void analyze(Bytefile *bf) { ++current_stack_depth; // if (closure_offset >= bf->code_size) { - // ip_failure(saved_current_ip, bf, "jump/call out of file"); + // ip_failure(saved_current_ip, mod_id, "jump/call out of file"); // } // NOTE: is not always true // if (!is_command_name(bf->code_ptr + closure_offset, bf, Cmd::CBEGIN)) { - // ip_failure(saved_current_ip, bf, "closure should point to cbegin"); + // ip_failure(saved_current_ip, mod_id, "closure should point to + // cbegin"); // } } break; case Cmd::CALLC: { uint args_count = ip_read_int_unsafe(¤t_ip); current_stack_depth -= args_count + 1; // + closure itself if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; // NOTE: can't check args == cbegin args @@ -285,29 +292,29 @@ void analyze(Bytefile *bf) { uint args_count = ip_read_int_unsafe(¤t_ip); current_stack_depth -= args_count; if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; if (call_offset >= bf->code_size) { - ip_failure(saved_current_ip, bf, "jump/call out of file"); + ip_failure(saved_current_ip, mod_id, "jump/call out of file"); } if (!is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) { - ip_failure(saved_current_ip, bf, "call should point to begin"); + ip_failure(saved_current_ip, mod_id, "call should point to begin"); } if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) { - ip_failure(saved_current_ip, bf, "wrong call argument count"); + ip_failure(saved_current_ip, mod_id, "wrong call argument count"); } } break; case Cmd::TAG: if (current_stack_depth < 1) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } break; case Cmd::ARRAY: if (current_stack_depth < 1) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } break; case Cmd::FAIL: @@ -315,16 +322,25 @@ void analyze(Bytefile *bf) { break; case Cmd::LINE: break; - case Cmd::CALLF: // FIXME TODO - ip_failure(saved_current_ip, bf, "CALLF analysis is not implemented yet"); - break; + case Cmd::CALLF: { + // TODO: find link to real function and replace call (need to save all + // modules in one space) + + ip_read_int_unsafe(¤t_ip); // function name (str) + uint args_count = ip_read_int_unsafe(¤t_ip); + current_stack_depth -= args_count; + if (current_stack_depth < 0) { + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); + } + ++current_stack_depth; + } break; case Cmd::PATT: --current_stack_depth; if (l == CMD_PATT_STR) { --current_stack_depth; } if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; @@ -335,31 +351,32 @@ void analyze(Bytefile *bf) { case Cmd::Llength: case Cmd::Lstring: if (current_stack_depth < 1) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } break; case Cmd::Barray: current_stack_depth -= ip_read_int_unsafe(¤t_ip); // elem count if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } ++current_stack_depth; break; case Cmd::EXIT: - ip_failure(saved_current_ip, bf, + ip_failure(saved_current_ip, mod_id, "exit should be unreachable"); // NOTE: not sure break; case Cmd::_UNDEF_: - ip_failure(saved_current_ip, bf, "undefined command"); + ip_failure(saved_current_ip, mod_id, "undefined command"); break; } if (current_begin_counter == nullptr) { - ip_failure(saved_current_ip, bf, "function does not start with begin"); + ip_failure(saved_current_ip, mod_id, + "function does not start with begin"); } if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); + ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); } *current_begin_counter = @@ -386,7 +403,7 @@ void analyze(Bytefile *bf) { uint jmp_p = ip_read_int_unsafe(¤t_ip); if (jmp_p >= bf->code_size) { // NOTE: maybe also should check that > begin (?) - ip_failure(saved_current_ip, bf, "jump/call out of file"); + ip_failure(saved_current_ip, mod_id, "jump/call out of file"); } if (is_call) { func_to_visit_push(jmp_p); @@ -397,7 +414,7 @@ void analyze(Bytefile *bf) { } case Cmd::_UNDEF_: - ip_failure(saved_current_ip, bf, "undefined command"); + ip_failure(saved_current_ip, mod_id, "undefined command"); break; default: diff --git a/byterun/src/cli.cpp b/byterun/src/cli.cpp index cbf80c97f..c4d17ce39 100644 --- a/byterun/src/cli.cpp +++ b/byterun/src/cli.cpp @@ -41,27 +41,40 @@ int main(int argc, char **argv) { failure("no file name provided"); } +#ifdef DEBUG_VERSION + std::cerr << "- read code file" << std::endl; +#endif + Bytefile *f = read_file(argv[2]); if (do_print) { +#ifdef DEBUG_VERSION + std::cerr << "- print code file" << std::endl; +#endif + print_file(*f, std::cout); } - if (do_verification) { - analyze(f); - } - if (do_interpretation) { - // TODO FIXME: add all loaded modules verification + if (do_verification || do_interpretation) { +#ifdef DEBUG_VERSION + std::cerr << "- init stack" << std::endl; +#endif + size_t stack[STACK_SIZE]; run_init(stack); - uint main_mod_id = mod_add(f); +#ifdef DEBUG_VERSION + std::cerr << "- add main module" << std::endl; +#endif - run_mod_rec(main_mod_id, argc - 1, argv + 1); + uint main_mod_id = mod_add(f, do_verification); + + if (do_interpretation) { +#ifdef DEBUG_VERSION + std::cerr << "- start interpretation" << std::endl; +#endif + + run_mod_rec(main_mod_id, argc - 2, argv + 2, do_verification); + } } - // TODO: remove, before modules - // // NOTE: not used for now - // // free(f->global_ptr); - // free(f); - return 0; } diff --git a/byterun/src/compiler.cpp b/byterun/src/compiler.cpp new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/byterun/src/compiler.cpp @@ -0,0 +1 @@ + diff --git a/byterun/src/interpreter.c b/byterun/src/interpreter.c index 89dfeb8a8..9a39a714e 100644 --- a/byterun/src/interpreter.c +++ b/byterun/src/interpreter.c @@ -60,25 +60,35 @@ void run_init(size_t *stack) { void run_prepare_exec(int argc, char **argv) { s_push_i(BOX(argc)); +#ifdef DEBUG_VERSION + printf("- argc: %i\n", argc); +#endif for (size_t i = 0; i < argc; ++i) { +#ifdef DEBUG_VERSION + printf("- arg: %s\n", argv[argc - i - 1]); +#endif s_push(Bstring((aint *)&argv[argc - i - 1])); } s_push(Barray((aint *)s_peek(), argc)); void *argv_elem = s_pop(); s_popn(argc); s_push(argv_elem); + +#ifdef DEBUG_VERSION + print_stack(s); + printf("- state init done\n"); +#endif } -// TODO: use unsafe, move checks to verifier (?) -void run_mod_rec(uint mod_id, int argc, char **argv) { - Bytefile* mod = mod_get(mod_id); // TODO: pass as param ?? +void run_mod_rec(uint mod_id, int argc, char **argv, bool do_verification) { + Bytefile* mod = mod_get(mod_id); for (size_t i = 0; i < mod->imports_number; ++i) { if (find_mod_loaded(get_import_safe(mod, i)) < 0 && strcmp(get_import_safe(mod, i), "Std") != 0) { // not loaded - int32_t import_mod = mod_load(get_import_safe(mod, i)); + int32_t import_mod = mod_load(get_import_safe(mod, i), do_verification); if (import_mod < 0) { failure("module %s not found\n", get_import_safe(mod, i)); } - run_mod_rec(mod_id, argc, argv); + run_mod_rec(mod_id, argc, argv, do_verification); } } @@ -455,7 +465,6 @@ void run_mod(uint mod_id, int argc, char **argv) { const char *call_func_name = ip_read_string(&s.ip); ip_read_int(&s.ip); // args count - // TODO: jump to other module, save ret module struct ModSearchResult func = mod_search_pub_symbol(call_func_name); if (func.mod_file == NULL) { s_failure(&s, "external function not found"); diff --git a/byterun/src/module_manager.cpp b/byterun/src/module_manager.cpp index 44004e2e3..f3ee5a88a 100644 --- a/byterun/src/module_manager.cpp +++ b/byterun/src/module_manager.cpp @@ -3,6 +3,7 @@ extern "C" { #include "utils.h" } +#include "analyzer.hpp" #include "parser.hpp" #include @@ -16,24 +17,29 @@ struct ModSymbolPos { size_t offset; }; +struct Module { + std::string name; + Bytefile *bf; +}; + struct ModuleManager { std::unordered_map loaded_modules; std::unordered_map public_symbols_mods; - std::vector modules; + std::vector modules; std::vector search_paths; }; static ModuleManager manager; -uint32_t mod_add_impl(Bytefile *module, +uint32_t mod_add_impl(Bytefile *bf, bool do_verification, std::optional name = std::nullopt) { uint32_t id = manager.modules.size(); - manager.modules.push_back(module); - for (size_t i = 0; i < module->public_symbols_number; ++i) { + manager.modules.push_back({.name = name ? *name : "", .bf = bf}); + for (size_t i = 0; i < bf->public_symbols_number; ++i) { if (!manager.public_symbols_mods .insert({ - get_public_name_safe(module, i), - {.mod_id = id, .offset = get_public_offset_safe(module, i)}, + get_public_name_safe(bf, i), + {.mod_id = id, .offset = get_public_offset_safe(bf, i)}, }) .second) { failure("public symbol loaded more then once\n"); @@ -42,10 +48,14 @@ uint32_t mod_add_impl(Bytefile *module, if (name) { manager.loaded_modules.insert({*name, id}); } + if (do_verification) { + analyze(id); + } return id; } -uint32_t path_mod_load(const char *name, std::filesystem::path &&path) { +uint32_t path_mod_load(const char *name, std::filesystem::path &&path, + bool do_verification) { Bytefile *module = read_file(path.c_str()); return mod_add_impl(module, name); } @@ -55,11 +65,18 @@ void mod_add_search_path(const char *path) { manager.search_paths.emplace_back(path); } +const char *mod_get_name(uint32_t id) { + if (id > manager.modules.size()) { + failure("module id is out of range\n"); + } + return manager.modules[id].name.c_str(); +} + Bytefile *mod_get(uint32_t id) { if (id > manager.modules.size()) { failure("module id is out of range\n"); } - return manager.modules[id]; + return manager.modules[id].bf; } int32_t find_mod_loaded(const char *name) { @@ -73,7 +90,7 @@ int32_t find_mod_loaded(const char *name) { return -1; } -int32_t mod_load(const char *name) { +int32_t mod_load(const char *name, bool do_verification) { std::string full_name = std::string{name} + ".bc"; auto it = manager.loaded_modules.find(name); @@ -84,19 +101,21 @@ int32_t mod_load(const char *name) { } if (std::filesystem::exists(full_name)) { - return path_mod_load(name, full_name); + return path_mod_load(name, full_name, do_verification); } for (const auto &dir_path : manager.search_paths) { auto path = dir_path / full_name; if (std::filesystem::exists(path)) { - return path_mod_load(name, std::move(path)); + return path_mod_load(name, std::move(path), do_verification); } } return -1; } -uint32_t mod_add(Bytefile *module) { return mod_add_impl(module); } +uint32_t mod_add(Bytefile *module, bool do_verification) { + return mod_add_impl(module, do_verification); +} ModSearchResult mod_search_pub_symbol(const char *name) { auto it = manager.public_symbols_mods.find(name); diff --git a/byterun/src/parser.cpp b/byterun/src/parser.cpp index f5ed6a425..67ebf13be 100644 --- a/byterun/src/parser.cpp +++ b/byterun/src/parser.cpp @@ -77,8 +77,7 @@ Bytefile *read_file(const char *fname) { if (file->buffer + strings_buffer_offset >= file_end) { failure("public symbols are out of the file size\n"); } - file->string_ptr = - &file->buffer[strings_buffer_offset]; // TODO: check that should be there + file->string_ptr = file->buffer + strings_buffer_offset; if (file->string_ptr + file->stringtab_size > file_end) { failure("strings table is out of the file size\n"); } @@ -86,6 +85,8 @@ Bytefile *read_file(const char *fname) { // file->string_ptr[file->stringtab_size - 1] != 0) { // failure("strings table is not zero-ended\n"); // } + file->code_size = size - strings_buffer_offset - file->stringtab_size; + if (file->code_size < 0 || public_symbols_size < 0 || file->stringtab_size < 0) { failure("file zones sizes should be >= 0\n"); @@ -93,12 +94,10 @@ Bytefile *read_file(const char *fname) { file->imports_ptr = (int *)file->buffer; file->public_ptr = (int *)(file->buffer + imports_size); - // is allocated on module run on stack - file->global_ptr = NULL; + file->global_ptr = NULL; // is allocated on module run on stack + file->code_ptr = file->string_ptr + file->stringtab_size; // file->global_ptr = (int*) calloc (file->global_area_size, sizeof (int)); - file->code_size = size - strings_buffer_offset - file->stringtab_size; - return file; } @@ -606,18 +605,17 @@ void print_file_info(const Bytefile &bf, std::ostream &out) { out << "Global area size : " << bf.global_area_size << '\n'; out << "Number of imports : " << bf.imports_number << '\n'; out << "Number of public symbols: " << bf.public_symbols_number << '\n'; - out << "Imports :\n"; + out << "Imports :\n"; for (size_t i = 0; i < bf.imports_number; i++) { - out << " %s\n" << get_import_safe(&bf, i); + out << " " << get_import_safe(&bf, i) << '\n'; } out << "Public symbols :\n"; - for (size_t i = 0; i < bf.public_symbols_number; i++) { out << " " << std::setfill('0') << std::setw(8) << std::hex << get_public_offset_safe(&bf, i) << ": " << std::dec - << get_public_name_safe(&bf, i); + << get_public_name_safe(&bf, i) << '\n'; } } @@ -628,7 +626,7 @@ void print_file_code(const Bytefile &bf, std::ostream &out) { out << " " << std::setfill('0') << std::setw(8) << std::hex << ip - bf.code_ptr << ": " << std::dec; const auto [cmd, l] = parse_command(&ip, &bf, out); - out << '\n'; + out << std::endl; if (cmd == Cmd::EXIT) { break; @@ -640,8 +638,8 @@ void print_file(const Bytefile &bf, std::ostream &out) { print_file_info(bf, out); out << "Code:\n"; - print_file_code(bf, out); + out << "code end\n"; } extern "C" { diff --git a/byterun/src/types.c b/byterun/src/types.c index 5fda5817e..16e379ef5 100644 --- a/byterun/src/types.c +++ b/byterun/src/types.c @@ -12,13 +12,15 @@ extern size_t __gc_stack_top, __gc_stack_bottom; // --- State --- void init_state(struct State* s, void** stack) { + __init(); + s->stack = stack; s->bf = NULL; s->is_closure_call = false; s->current_module_id = 0; s->call_module_id = 0; - s->ip = s->bf->code_ptr; - s->instr_ip = s->bf->code_ptr; + s->ip = NULL; //s->bf->code_ptr; + s->instr_ip = NULL; //s->bf->code_ptr; s->call_ip = NULL; s->current_line = 0; @@ -52,6 +54,9 @@ void init_mod_state(uint mod_id, struct State* s) { s->fp = NULL; + s->ip = s->bf->code_ptr; + s->instr_ip = s->bf->code_ptr; + #ifdef DEBUG_VERSION print_stack(s); printf("- mod state init done\n"); diff --git a/src/SM.ml b/src/SM.ml index 126f7d229..4f5a8fa02 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -307,7 +307,7 @@ module ByteCode = struct (* 0x56 l:32 n:32 *) | CALL (fn, n, _) -> (add_bytes [ (5 * 16) + 6 ]; - (* TODO: 1 -> sizeof byte ?? *) + (* 1 = sizeof byte *) add_func_fixup (Buffer.length code - 1) fn; add_ints [ 0; n ]) (* 0x57 s:32 n:32 *) @@ -333,7 +333,6 @@ module ByteCode = struct (* TODO: put externs in string table (?), or check that all external funtions are externs *) | PUBLIC s -> add_public s | IMPORT s -> add_import s - (* TODO: add imports table before publics *) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) @@ -359,7 +358,7 @@ module ByteCode = struct with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) !fixups; - let imports = (* TODO: check *) + let imports = List.map (fun l -> (Int32.of_int @@ StringTab.add st l)) @@ S.elements !imports