From 58f0bfc0b9926b4e9b84b986410f33b79897d0dc Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Mon, 20 Jan 2025 23:13:42 +0300 Subject: [PATCH] interpreter: migration to universial stdlib support (without varargs yet) --- byterun/include/module_manager.h | 2 + byterun/include/parser.hpp | 11 +-- byterun/include/types.h | 16 ++--- byterun/performance_check.sh | 1 - byterun/src/analyzer.cpp | 37 +++++----- byterun/src/interpreter.c | 112 ++++++++++++++++++++----------- byterun/src/module_manager.cpp | 12 +++- byterun/src/parser.cpp | 77 +++++++++++---------- src/SM.ml | 14 ++-- 9 files changed, 163 insertions(+), 119 deletions(-) diff --git a/byterun/include/module_manager.h b/byterun/include/module_manager.h index b76b2518c..0f2f141c3 100644 --- a/byterun/include/module_manager.h +++ b/byterun/include/module_manager.h @@ -24,3 +24,5 @@ int32_t mod_load(const char *name, bool do_verification); // < 0 => not found uint32_t mod_add(Bytefile *module, bool do_verification); struct ModSearchResult mod_search_pub_symbol(const char *name); + +bool run_stdlib_func(const char *name, size_t args_count); diff --git a/byterun/include/parser.hpp b/byterun/include/parser.hpp index afefe0029..0ca640a8b 100644 --- a/byterun/include/parser.hpp +++ b/byterun/include/parser.hpp @@ -37,11 +37,12 @@ enum class Cmd : int8_t { LINE, CALLF, PATT, - Lread, - Lwrite, - Llength, - Lstring, - Barray, + // NOTE: no longer used + // Lread, + // Lwrite, + // Llength, + // Lstring, + // Barray, EXIT, _UNDEF_, }; diff --git a/byterun/include/types.h b/byterun/include/types.h index 1d89d0ff3..789442ee5 100644 --- a/byterun/include/types.h +++ b/byterun/include/types.h @@ -117,7 +117,7 @@ enum CMD_TOPLVL { CMD_ST, CMD_CTRL, CMD_PATT, - CMD_BUILTIN, + // CMD_BUILTIN, // NOTE: no longer used CMD_EXIT = 15, }; @@ -177,10 +177,10 @@ enum CMD_PATTS { CMD_PATT_FUN_TAG, }; -enum CMD_BUILTINS { - CMD_BUILTIN_Lread = 0, - CMD_BUILTIN_Lwrite, - CMD_BUILTIN_Llength, - CMD_BUILTIN_Lstring, - CMD_BUILTIN_Barray, -}; +// enum CMD_BUILTINS { // NOTE: no longer used +// CMD_BUILTIN_Lread = 0, +// CMD_BUILTIN_Lwrite, +// CMD_BUILTIN_Llength, +// CMD_BUILTIN_Lstring, +// CMD_BUILTIN_Barray, +// }; diff --git a/byterun/performance_check.sh b/byterun/performance_check.sh index 38e0ef265..fc12a2046 100755 --- a/byterun/performance_check.sh +++ b/byterun/performance_check.sh @@ -34,4 +34,3 @@ time ./byterun.exe -i Sort.bc > /dev/null rm Sort.* rm *.o -rm *.a diff --git a/byterun/src/analyzer.cpp b/byterun/src/analyzer.cpp index 98ed3ae35..77256ed72 100644 --- a/byterun/src/analyzer.cpp +++ b/byterun/src/analyzer.cpp @@ -324,7 +324,7 @@ void analyze(uint32_t mod_id) { break; case Cmd::CALLF: { // TODO: find link to real function and replace call (need to save all - // modules in one space) + // modules in one space) <- optimization ip_read_int_unsafe(¤t_ip); // function name (str) uint args_count = ip_read_int_unsafe(¤t_ip); @@ -344,23 +344,24 @@ void analyze(uint32_t mod_id) { } ++current_stack_depth; break; - case Cmd::Lread: - ++current_stack_depth; - break; - case Cmd::Lwrite: - case Cmd::Llength: - case Cmd::Lstring: - if (current_stack_depth < 1) { - ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); - } - break; - case Cmd::Barray: - current_stack_depth -= ip_read_int_unsafe(¤t_ip); // elem count - if (current_stack_depth < 0) { - ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); - } - ++current_stack_depth; - break; + // NOTE: no longer used + // case Cmd::Lread: + // ++current_stack_depth; + // break; + // case Cmd::Lwrite: + // case Cmd::Llength: + // case Cmd::Lstring: + // if (current_stack_depth < 1) { + // ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); + // } + // break; + // case Cmd::Barray: + // current_stack_depth -= ip_read_int_unsafe(¤t_ip); // elem count + // if (current_stack_depth < 0) { + // ip_failure(saved_current_ip, mod_id, "not enough elements in stack"); + // } + // ++current_stack_depth; + // break; case Cmd::EXIT: ip_failure(saved_current_ip, mod_id, "exit should be unreachable"); // NOTE: not sure diff --git a/byterun/src/interpreter.c b/byterun/src/interpreter.c index 9a39a714e..27bae6d11 100644 --- a/byterun/src/interpreter.c +++ b/byterun/src/interpreter.c @@ -99,6 +99,27 @@ void run_mod_rec(uint mod_id, int argc, char **argv, bool do_verification) { run_mod(mod_id, argc, argv); } +static inline void call_Barray(size_t elem_count, char** ip, void** buffer) { + // size_t elem_count = ip_read_int(ip); + + void **opr_buffer = (void**)(elem_count > BUFFER_SIZE + ? alloc(elem_count * sizeof(void *)) + : buffer); + for (size_t i = 0; i < elem_count; ++i) { + opr_buffer[elem_count - i - 1] = s_pop(); + } + + // s_rotate_n(elem_count); + + // NOTE: not sure if elems should be added + void *array = + Barray((aint *)opr_buffer, + BOX(elem_count)); + + // void *array = Barray((aint *)s_peek(), BOX(elem_count)); + s_push(array); +} + void run_mod(uint mod_id, int argc, char **argv) { #ifdef DEBUG_VERSION printf("--- module init state ---\n"); @@ -463,11 +484,21 @@ void run_mod(uint mod_id, int argc, char **argv) { case CMD_CTRL_CALLF: { // CALLF %s %d // call external function const char *call_func_name = ip_read_string(&s.ip); - ip_read_int(&s.ip); // args count + size_t args_count = ip_read_int(&s.ip); // args count + + if (run_stdlib_func(call_func_name, args_count)) { + // case of stdlib function + break; + } + + if (strcmp(call_func_name, ".array") == 0) { + call_Barray(args_count, &s.ip, buffer); + break; + } struct ModSearchResult func = mod_search_pub_symbol(call_func_name); if (func.mod_file == NULL) { - s_failure(&s, "external function not found"); + failure("RUNTIME:ERROR: external function <%s> with <%zu> args not found\n", call_func_name, args_count); } call_happened = true; @@ -519,52 +550,53 @@ void run_mod(uint mod_id, int argc, char **argv) { } break; - case CMD_BUILTIN: { - switch (l) { - case CMD_BUILTIN_Lread: // CALL Lread - s_push_i(Lread()); - break; + // NOTE: no longer used + // case CMD_BUILTIN: { + // switch (l) { + // case CMD_BUILTIN_Lread: // CALL Lread + // s_push_i(Lread()); + // break; - case CMD_BUILTIN_Lwrite: // CALL Lwrite - Lwrite(*s_peek_i()); - break; + // case CMD_BUILTIN_Lwrite: // CALL Lwrite + // Lwrite(*s_peek_i()); + // break; - case CMD_BUILTIN_Llength: // CALL Llength - s_push_i(Llength(s_pop())); - break; + // case CMD_BUILTIN_Llength: // CALL Llength + // s_push_i(Llength(s_pop())); + // break; - case CMD_BUILTIN_Lstring: { // CALL Lstring - void *val = s_pop(); - void *str = Lstring((aint *)&val); - s_push(str); - break; - } + // case CMD_BUILTIN_Lstring: { // CALL Lstring + // void *val = s_pop(); + // void *str = Lstring((aint *)&val); + // s_push(str); + // break; + // } - case CMD_BUILTIN_Barray: { // CALL Barray %d - size_t elem_count = ip_read_int(&s.ip); + // case CMD_BUILTIN_Barray: { // CALL Barray %d + // size_t elem_count = ip_read_int(&s.ip); - void **opr_buffer = (void**)(elem_count > BUFFER_SIZE - ? alloc(elem_count * sizeof(void *)) - : buffer); - for (size_t i = 0; i < elem_count; ++i) { - opr_buffer[elem_count - i - 1] = s_pop(); - } + // void **opr_buffer = (void**)(elem_count > BUFFER_SIZE + // ? alloc(elem_count * sizeof(void *)) + // : buffer); + // for (size_t i = 0; i < elem_count; ++i) { + // opr_buffer[elem_count - i - 1] = s_pop(); + // } - // s_rotate_n(elem_count); - void *array = - Barray((aint *)opr_buffer, - BOX(elem_count)); // NOTE: not sure if elems should be - // added + // // s_rotate_n(elem_count); + // void *array = + // Barray((aint *)opr_buffer, + // BOX(elem_count)); // NOTE: not sure if elems should be + // // added - // void *array = Barray((aint *)s_peek(), BOX(elem_count)); - s_push(array); - break; - } + // // void *array = Barray((aint *)s_peek(), BOX(elem_count)); + // s_push(array); + // break; + // } - default: - s_failure(&s, "invalid opcode"); // %d-%d\n", h, l); - } - } break; + // default: + // s_failure(&s, "invalid opcode"); // %d-%d\n", h, l); + // } + // } break; default: s_failure(&s, "invalid opcode"); // %d-%d\n", h, l); diff --git a/byterun/src/module_manager.cpp b/byterun/src/module_manager.cpp index eb32c35f6..0d2189885 100644 --- a/byterun/src/module_manager.cpp +++ b/byterun/src/module_manager.cpp @@ -154,7 +154,7 @@ struct StdFunc { bool is_args = false; // one var for all args bool is_vararg = false; }; -bool run_stdlib_func(const char *name) { +bool run_stdlib_func(const char *name, size_t args_count) { static const std::unordered_map std_func = { {"Luppercase", {.ptr = (void (*)()) & Luppercase, .args_count = 1}}, {"Llowercase", {.ptr = (void (*)()) & Llowercase, .args_count = 1}}, @@ -217,8 +217,14 @@ bool run_stdlib_func(const char *name) { return false; } - // TODO: stack safity check - // TODO: add stdlib func stack check to verification step + // TODO: move to bytecode verifier + if ((!it->second.is_vararg && it->second.args_count != args_count) || + it->second.args_count > args_count) { + failure("RUNTIME ERROR: stdlib function <%s> argument count <%zu> is not " + "expected (expected is <%s%zu>)\n", + name, it->second.args_count, it->second.is_vararg ? ">=" : "=", + args_count); + } // TODO: work with varargs if (it->second.is_vararg) { diff --git a/byterun/src/parser.cpp b/byterun/src/parser.cpp index 858a1b430..b8c77b3d0 100644 --- a/byterun/src/parser.cpp +++ b/byterun/src/parser.cpp @@ -219,16 +219,17 @@ const char *command_name(Cmd cmd, int8_t l) { return "_UNDEF_PATT_"; } return pats[l]; - case Cmd::Lread: - return "CALL\tLread"; - case Cmd::Lwrite: - return "CALL\tLwrite"; - case Cmd::Llength: - return "CALL\tLlength"; - case Cmd::Lstring: - return "CALL\tLstring"; - case Cmd::Barray: - return "CALL\tBarray\t%d"; + // NOTE: no longer used + // case Cmd::Lread: + // return "CALL\tLread"; + // case Cmd::Lwrite: + // return "CALL\tLwrite"; + // case Cmd::Llength: + // return "CALL\tLlength"; + // case Cmd::Lstring: + // return "CALL\tLstring"; + // case Cmd::Barray: + // return "CALL\tBarray\t%d"; case Cmd::_UNDEF_: return "_UNDEF_"; } @@ -552,35 +553,37 @@ std::pair parse_command_impl(char **ip, const Bytefile &bf, read_print_cmd_seq_opt(cmd, l, ip, bf, out); break; - case CMD_BUILTIN: { - switch (l) { - case CMD_BUILTIN_Lread: // CALL Lread - cmd = Cmd::Lread; - read_print_cmd_seq_opt(cmd, l, ip, bf, out); - break; - case CMD_BUILTIN_Lwrite: // CALL Lwrite - cmd = Cmd::Lwrite; - read_print_cmd_seq_opt(cmd, l, ip, bf, out); - break; - case CMD_BUILTIN_Llength: // CALL Llength - cmd = Cmd::Llength; - read_print_cmd_seq_opt(cmd, l, ip, bf, out); - break; - case CMD_BUILTIN_Lstring: // CALL Lstring - cmd = Cmd::Lstring; - read_print_cmd_seq_opt(cmd, l, ip, bf, out); - break; + // NOTE: no longer used + // case CMD_BUILTIN: { + // switch (l) { + // case CMD_BUILTIN_Lread: // CALL Lread + // cmd = Cmd::Lread; + // read_print_cmd_seq_opt(cmd, l, ip, bf, out); + // break; + // case CMD_BUILTIN_Lwrite: // CALL Lwrite + // cmd = Cmd::Lwrite; + // read_print_cmd_seq_opt(cmd, l, ip, bf, out); + // break; + // case CMD_BUILTIN_Llength: // CALL Llength + // cmd = Cmd::Llength; + // read_print_cmd_seq_opt(cmd, l, ip, bf, out); + // break; + // case CMD_BUILTIN_Lstring: // CALL Lstring + // cmd = Cmd::Lstring; + // read_print_cmd_seq_opt(cmd, l, ip, bf, out); + // break; - case CMD_BUILTIN_Barray: // CALL Barray %d - cmd = Cmd::Barray; - read_print_cmd_seq_opt(cmd, l, ip, bf, - out); - break; + // case CMD_BUILTIN_Barray: // CALL Barray %d + // cmd = Cmd::Barray; + // read_print_cmd_seq_opt(cmd, l, ip, + // bf, + // out); + // break; - default: - failure("invalid opcode"); - } - } break; + // default: + // failure("invalid opcode"); + // } + // } break; default: failure("invalid opcode"); diff --git a/src/SM.ml b/src/SM.ml index 4f5a8fa02..be3d736a4 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -275,17 +275,17 @@ module ByteCode = struct add_fixup s; add_ints [ 0 ] (* 0x70 *) - | CALL (f, _, _) when f = labeled "read" -> add_bytes [ (7 * 16) + 0 ] + (* | CALL (f, _, _) when f = labeled "read" -> add_bytes [ (7 * 16) + 0 ] *) (* 0x71 *) - | CALL (f, _, _) when f = labeled "write" -> add_bytes [ (7 * 16) + 1 ] + (* | CALL (f, _, _) when f = labeled "write" -> add_bytes [ (7 * 16) + 1 ] *) (* 0x72 *) - | CALL (f, _, _) when f = labeled "length" -> add_bytes [ (7 * 16) + 2 ] + (* | CALL (f, _, _) when f = labeled "length" -> add_bytes [ (7 * 16) + 2 ] *) (* 0x73 *) - | CALL (f, _, _) when f = labeled "string" -> add_bytes [ (7 * 16) + 3 ] + (* | CALL (f, _, _) when f = labeled "string" -> add_bytes [ (7 * 16) + 3 ] *) (* 0x74 *) - | CALL (".array", n, _) -> - add_bytes [ (7 * 16) + 4 ]; - add_ints [ n ] + (* | CALL (".array", n, _) -> *) + (* add_bytes [ (7 * 16) + 4 ]; *) + (* add_ints [ n ] *) (* 0x52 n:32 n:32 *) | BEGIN (_, a, l, [], _, _) -> add_bytes [ (5 * 16) + 2 ];