From 43088ec9f9fb45119db6570227b67e8539d2d49c Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Sun, 30 Mar 2025 09:34:50 +0300 Subject: [PATCH] stdlib tester, fixes, switch to builtins as pseudo functions (use call), remove negative closure offset possibility --- byterun/.gitignore | 1 + byterun/include/module_manager.h | 82 +++++++------- byterun/include/stack.h | 8 ++ byterun/include/types.h | 2 +- byterun/regression_check.sh | 1 + byterun/src/analyzer.cpp | 63 +++++++---- byterun/src/interpreter.c | 61 ++++++----- byterun/src/module_manager.cpp | 168 +++++++++++++++++++++++------ byterun/src/parser.cpp | 28 +++-- byterun/stdlib_regression_check.sh | 38 +++++++ 10 files changed, 321 insertions(+), 131 deletions(-) create mode 100755 byterun/stdlib_regression_check.sh diff --git a/byterun/.gitignore b/byterun/.gitignore index db2a15b28..f13038d58 100644 --- a/byterun/.gitignore +++ b/byterun/.gitignore @@ -11,5 +11,6 @@ compile_commands.json .cache/ *.a *.o +*.bc test** diff --git a/byterun/include/module_manager.h b/byterun/include/module_manager.h index d6e31d036..00452b9e9 100644 --- a/byterun/include/module_manager.h +++ b/byterun/include/module_manager.h @@ -11,47 +11,47 @@ Bytefile *run_with_imports(Bytefile *root, int argc, char **argv, // --- enum BUILTIN : uint { - BUILTIN_Luppercase, - BUILTIN_Llowercase, - BUILTIN_Lassert, - BUILTIN_Lstring, - BUILTIN_Llength, - BUILTIN_LstringInt, - BUILTIN_Lread, - BUILTIN_Lwrite, - BUILTIN_LmakeArray, - BUILTIN_LmakeString, - BUILTIN_Lstringcat, - BUILTIN_LmatchSubString, - BUILTIN_Lsprintf, - BUILTIN_Lsubstring, - BUILTIN_Li__Infix_4343, // ++ - BUILTIN_Lclone, - BUILTIN_Lhash, - BUILTIN_LtagHash, - BUILTIN_Lcompare, - BUILTIN_LflatCompare, - BUILTIN_Lfst, - BUILTIN_Lsnd, - BUILTIN_Lhd, - BUILTIN_Ltl, - BUILTIN_Lprintf, - BUILTIN_LreadLine, - BUILTIN_Lfopen, - BUILTIN_Lfclose, - BUILTIN_Lfread, - BUILTIN_Lfwrite, - BUILTIN_Lfexists, - BUILTIN_Lfprintf, - BUILTIN_Lregexp, - BUILTIN_LregexpMatch, - BUILTIN_Lfailure, - BUILTIN_Lsystem, - BUILTIN_LgetEnv, - BUILTIN_Lrandom, - BUILTIN_Ltime, - BUILTIN_Barray, // can't be run with run_stdlib_func - BUILTIN_NONE, + BUILTIN_Luppercase, // 0 + BUILTIN_Llowercase, // 1 + BUILTIN_Lassert, // 2 + BUILTIN_Lstring, // 3 + BUILTIN_Llength, // 4 + BUILTIN_LstringInt, // 5 + BUILTIN_Lread, // 6 + BUILTIN_Lwrite, // 7 + BUILTIN_LmakeArray, // 8 + BUILTIN_LmakeString, // 9 + BUILTIN_Lstringcat, // 10 + BUILTIN_LmatchSubString, // 11 + BUILTIN_Lsprintf, // 12 + BUILTIN_Lsubstring, // 13 + BUILTIN_Li__Infix_4343, // 14 // ++ + BUILTIN_Lclone, // 15 + BUILTIN_Lhash, // 16 + BUILTIN_LtagHash, // 17 + BUILTIN_Lcompare, // 18 + BUILTIN_LflatCompare, // 19 + BUILTIN_Lfst, // 20 + BUILTIN_Lsnd, // 21 + BUILTIN_Lhd, // 22 + BUILTIN_Ltl, // 23 + BUILTIN_Lprintf, // 24 + BUILTIN_LreadLine, // 25 + BUILTIN_Lfopen, // 26 + BUILTIN_Lfclose, // 27 + BUILTIN_Lfread, // 28 + BUILTIN_Lfwrite, // 29 + BUILTIN_Lfexists, // 30 + BUILTIN_Lfprintf, // 31 + BUILTIN_Lregexp, // 32 + BUILTIN_LregexpMatch, // 33 + BUILTIN_Lfailure, // 34 + BUILTIN_Lsystem, // 35 + BUILTIN_LgetEnv, // 36 + BUILTIN_Lrandom, // 37 + BUILTIN_Ltime, // 38 + BUILTIN_Barray, // 39 // can't be run with run_stdlib_func + BUILTIN_NONE, // 40 }; enum BUILTIN id_by_builtin(const char *name); diff --git a/byterun/include/stack.h b/byterun/include/stack.h index 8cdf3fd3f..9ffe4a794 100644 --- a/byterun/include/stack.h +++ b/byterun/include/stack.h @@ -125,6 +125,14 @@ static inline void s_popn(size_t n) { // ------ complex operations ------ +static inline void s_swap_tops() { + // NOTE: can be optimized + void *x = s_pop(); + void *y = s_pop(); + s_push(x); + s_push(y); +} + // for some reason does not work in sexp constructor, probably connected with gc // behaviour static inline void s_put_nth(size_t n, void *val) { diff --git a/byterun/include/types.h b/byterun/include/types.h index 53315f8c5..0e992f87f 100644 --- a/byterun/include/types.h +++ b/byterun/include/types.h @@ -16,7 +16,7 @@ // CLOJURE_T = CLOSURE_TAG, // }; -#define STACK_SIZE 128 * 1024 +#define STACK_SIZE 512 * 1024 static const size_t MAX_ARRAY_SIZE = 0x11111110; diff --git a/byterun/regression_check.sh b/byterun/regression_check.sh index d08df3e5f..2c4d4562a 100755 --- a/byterun/regression_check.sh +++ b/byterun/regression_check.sh @@ -17,6 +17,7 @@ for test in ../regression/*.lama; do echo $test_file # cat $test_file.input | ./byterun.exe -p test*.bc > test.bc.code # cat $test_file.input | ./byterun.exe -p test*.bc + # cat $test_file.input | ./byterun.exe -vi test*.bc cat $test_file.input | ./byterun.exe -vi test*.bc > test.log sed '1d;s/^..//' $test_file.t > test_orig.log diff test.log test_orig.log diff --git a/byterun/src/analyzer.cpp b/byterun/src/analyzer.cpp index 8617c6f57..ead3135de 100644 --- a/byterun/src/analyzer.cpp +++ b/byterun/src/analyzer.cpp @@ -13,9 +13,11 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { static constexpr const int NOT_VISITED = -1; std::vector visited(bf->code_size, NOT_VISITED); // store stack depth - std::vector to_visit_func = std::move(add_publics); + std::vector to_visit_func; std::vector to_visit_jmp; + uint16_t mock_builtin_begin_counter = 0; + int current_stack_depth = 0; const uint globals_count = bf->global_area_size; uint current_locals_count = 0; @@ -84,6 +86,10 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { } }; + for (const auto &add_public : add_publics) { + func_to_visit_push(add_public); + } + // add publics to_visit_func.reserve(bf->public_symbols_number + to_visit_func.size()); for (size_t i = 0; i < bf->public_symbols_number; ++i) { @@ -136,8 +142,9 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { #endif if (current_begin_counter == nullptr && cmd != Cmd::BEGIN && - cmd != Cmd::CBEGIN) { - ip_failure(saved_current_ip, bf, "function does not start with begin"); + cmd != Cmd::CBEGIN && cmd != Cmd::BUILTIN) { + ip_failure(saved_current_ip, bf, + "function does not start with begin and is not builtin"); } if (visited[current_ip - bf->code_ptr] == NOT_VISITED) { @@ -297,11 +304,18 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { ip_failure(saved_current_ip, bf, "jump/call out of file"); } - if (!is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) { - ip_failure(saved_current_ip, bf, "call should point to begin"); - } - if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) { - ip_failure(saved_current_ip, bf, "wrong call argument count"); + if (is_command_name(bf->code_ptr + call_offset, bf, Cmd::BUILTIN)) { + if (args_count != + *(uint *)(bf->code_ptr + call_offset + 1 + sizeof(uint32_t))) { + ip_failure(saved_current_ip, bf, "wrong builtin call argument count"); + } + } else if (is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) { + if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) { + ip_failure(saved_current_ip, bf, "wrong call argument count"); + } + } else { + ip_failure(saved_current_ip, bf, + "call should point to begin or builtin"); } } break; case Cmd::TAG: @@ -329,12 +343,21 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { ip_failure(saved_current_ip, bf, "undefined builtin id"); } - uint args_count = ip_read_int_unsafe(¤t_ip); - current_stack_depth -= args_count; - if (current_stack_depth < 0) { - ip_failure(saved_current_ip, bf, "not enough elements in stack"); - } - ++current_stack_depth; + // set mock counter to behave similary to begin + current_begin_counter = &mock_builtin_begin_counter; + *current_begin_counter = 0; + // add end to behave like end + ++func_end_found; + + /*uint args_count = */ ip_read_int_unsafe(¤t_ip); + + // NOTE: done in corresponding CALL/CALLC + // TODO: no stack edit required then (?) + // current_stack_depth -= args_count; + // if (current_stack_depth < 0) { + // ip_failure(saved_current_ip, bf, "not enough elements in stack"); + // } + // ++current_stack_depth; } break; case Cmd::PATT: --current_stack_depth; @@ -374,7 +397,8 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { } if (current_begin_counter == nullptr) { - ip_failure(saved_current_ip, bf, "function does not start with begin"); + ip_failure(saved_current_ip, bf, + "function does not start with begin and is not builtin"); } if (current_stack_depth < 0) { @@ -392,6 +416,7 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { case Cmd::EXIT: case Cmd::END: case Cmd::FAIL: + case Cmd::BUILTIN: // pseudo function without begin and end break; case Cmd::CJMPz: @@ -402,15 +427,15 @@ void analyze(Bytefile *bf, std::vector &&add_publics) { case Cmd::JMP: { bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL); - uint jmp_p = ip_read_int_unsafe(¤t_ip); - if ((int)jmp_p >= bf->code_size) { + aint jmp_offset = ip_read_int_unsafe(¤t_ip); + if (jmp_offset < 0 || jmp_offset >= bf->code_size) { // NOTE: maybe also should check that > begin (?) ip_failure(saved_current_ip, bf, "jump/call out of file"); } if (is_call) { - func_to_visit_push(jmp_p); + func_to_visit_push(jmp_offset); } else { - jmp_to_visit_push(jmp_p); + jmp_to_visit_push(jmp_offset); } break; } diff --git a/byterun/src/interpreter.c b/byterun/src/interpreter.c index 28770bbd6..79ca676f4 100644 --- a/byterun/src/interpreter.c +++ b/byterun/src/interpreter.c @@ -104,6 +104,23 @@ static inline void call_Barray(size_t elem_count) { s_push(array); } +void call_builtin(uint builtin_id, uint args_count) { +#ifdef DEBUG_VERSION + printf("builtin id: %zu\n", builtin_id); +#endif +#ifndef WITH_CHECK + if (builtin_id >= BUILTIN_NONE) { + s_failure(&s, "invalid builtin"); + } +#endif + + if (builtin_id == BUILTIN_Barray) { + call_Barray(args_count); + } else { + run_stdlib_func(builtin_id, args_count); + } +} + void run_main(Bytefile* bf, int argc, char **argv) { #ifdef DEBUG_VERSION printf("--- init state ---\n"); @@ -247,12 +264,8 @@ void run_main(Bytefile* bf, int argc, char **argv) { } case CMD_BASIC_SWAP: // SWAP - { - void *x = s_pop(); - void *y = s_pop(); - s_push(y); - s_push(x); - } break; + s_swap_tops(); + break; case CMD_BASIC_ELEM: // ELEM { @@ -373,11 +386,17 @@ void run_main(Bytefile* bf, int argc, char **argv) { s_push(*var_ptr); } #ifndef WITH_CHECK + // check correct offset if (call_offset >= s.bf->code_size) { s_failure(&s, "jump out of file"); } + // or correct builtin // TODO: add this check to analyzer + if (call_offset < 0 && call_offset + 1 <= -BUILTIN_NONE) { + s_failure(&s, "closure"); + } #endif - s_push(s.bf->code_ptr + call_offset); + // NOTE: call_offset < 0 => deal with closure of builtin function + s_push_i(BOX(call_offset)); void *closure = Bclosure((aint *)__gc_stack_top, BOX(args_count)); // printf("args is %li, count is %li\n", args_count, get_len(TO_DATA(closure))); @@ -390,11 +409,11 @@ void run_main(Bytefile* bf, int argc, char **argv) { case CMD_CTRL_CALLC: { // CALLC %d // call clojure aint args_count = ip_read_int(&s.ip); // args count + aint closure_offset = UNBOX((char*)Belem(*s_nth(args_count), BOX(0))); call_happened = true; s.is_closure_call = true; s.call_ip = s.ip; - - s.ip = (char*)Belem(*s_nth(args_count), BOX(0)); // use offset instead ?? + s.ip = s.bf->code_ptr + closure_offset; break; } @@ -435,6 +454,7 @@ void run_main(Bytefile* bf, int argc, char **argv) { case CMD_CTRL_FAIL: { // FAIL %d %d int line = ip_read_int(&s.ip); int col = ip_read_int(&s.ip); + print_stack(&s); Bmatch_failure(s_pop(), argv[0], BOX(line), BOX(col)); break; } @@ -447,24 +467,10 @@ void run_main(Bytefile* bf, int argc, char **argv) { case CMD_CTRL_BUILTIN: { // BUILTIN %d %d // call builtin size_t builtin_id = ip_read_int(&s.ip); size_t args_count = ip_read_int(&s.ip); // args count - -#ifdef DEBUG_VERSION - printf("builtin id: %zu\n", builtin_id); -#endif -#ifndef WITH_CHECK - if (builtin_id >= BUILTIN_NONE) { - s_failure(&s, "invalid builtin"); - } -#endif - - if (builtin_id == BUILTIN_Barray) { - call_Barray(args_count); - } else { - run_stdlib_func(builtin_id, args_count); - } + call_builtin(builtin_id, args_count); + s.ip = s.call_ip; // TODO: check break; - } - + } default: s_failure(&s, "interpreter: ctrl, invalid opcode"); // %d-%d\n", h, l); } @@ -551,6 +557,7 @@ void run_main(Bytefile* bf, int argc, char **argv) { s_failure(&s, "invalid opcode"); // %d-%d\n", h, l); } + // NOTE: do not clear for now, assume that call are correct if (!call_happened) { s.is_closure_call = false; s.call_ip = NULL; @@ -565,7 +572,7 @@ void run_main(Bytefile* bf, int argc, char **argv) { } while (1); stop:; #ifdef DEBUG_VERSION - printf("--- module run end ---\n"); + printf("--- run end ---\n"); #endif } diff --git a/byterun/src/module_manager.cpp b/byterun/src/module_manager.cpp index 571c1d0e7..c3ca2b1e8 100644 --- a/byterun/src/module_manager.cpp +++ b/byterun/src/module_manager.cpp @@ -27,7 +27,7 @@ template requires(N != 0) void call_func(void (*f)(), Args... args) { void *arg = s_pop(); - call_func(f, args..., arg); + call_func(f, arg, args...); // TODO: check that arg is added on the right position } @@ -84,8 +84,10 @@ void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) { ip_read_int_unsafe(&read_ip) + offsets.code); break; case Cmd::CLOSURE: { + aint offset = ip_read_int_unsafe(&read_ip); + // NOTE: do not modify offset for builtin's closures ip_write_int_unsafe(write_ip, - ip_read_int_unsafe(&read_ip) + offsets.code); + offset < 0 ? offset : offset + offsets.code); size_t args_count = ip_read_int_unsafe(&read_ip); for (size_t i = 0; i < args_count; ++i) { uint8_t arg_type = ip_read_byte_unsafe(&read_ip); @@ -111,8 +113,33 @@ void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) { } } -void subst_in_code(Bytefile *bytefile, - const std::unordered_map &publics) { +struct BuiltinSubst { + BUILTIN id; + uint32_t args_count; + + auto operator<=>(const BuiltinSubst &) const = default; + bool operator==(const BuiltinSubst &) const = default; +}; + +void print_subst_to_bytes(BuiltinSubst subst, char **loc) { + static constexpr const uint8_t builtin_cmd = + ((CMD_CTRL << 4) | CMD_CTRL_BUILTIN); + + **(uint8_t **)loc = builtin_cmd; + *loc += sizeof(uint8_t); + **(uint32_t **)loc = subst.id; + *loc += sizeof(int32_t); + **(uint32_t **)loc = subst.args_count; + *loc += sizeof(int32_t); +} + +using BuiltinSubstMap = std::map; +// std::vector /*subst offsets*/>; + +// TODO: shared iteration over substs in functions +void add_subst_builtin_offsets(BuiltinSubstMap &subst_map, size_t code_offset, + const Bytefile *bytefile) { for (size_t i = 0; i < bytefile->substs_area_size; ++i) { if (i + sizeof(uint32_t) >= bytefile->substs_area_size) { failure("substitution %zu offset is out of area\n", i); @@ -124,7 +151,7 @@ void subst_in_code(Bytefile *bytefile, i += strlen(name); #ifdef DEBUG_VERSION - printf("subst: offset %u, name %s\n", offset, name); + printf("subst: offset 0x%.8x, name %s\n", offset, name); #endif if (i > bytefile->substs_area_size) { @@ -135,16 +162,61 @@ void subst_in_code(Bytefile *bytefile, // NOTE: address is first argument of the call if (builtin != BUILTIN_NONE) { - uint8_t cmd = ((CMD_CTRL << 4) | CMD_CTRL_BUILTIN); + char *ip = bytefile->code_ptr + offset; + ip_read_int_unsafe(&ip); // read ptr placeholder + uint32_t args_count = ip_read_int_unsafe(&ip); // read args count + subst_map[{builtin, args_count}] = 0; // .push_back(offset + code_offset); + } + } +} + +// NOTE: unmanaged memory allocated +std::pair gen_builtins(size_t code_offset, + BuiltinSubstMap &subst_map) { + size_t code_size = + subst_map.size() * /*size of builtin command*/ (1 + 2 * sizeof(uint32_t)); + char *code = (char *)malloc(code_size); + + char *code_it = code; + for (auto &subst : subst_map) { + subst.second = code_it - code + code_offset; + print_subst_to_bytes(subst.first, &code_it); + } + + return {code, code_size}; +} + +void subst_in_code(Bytefile *bytefile, + const std::unordered_map &publics, + const BuiltinSubstMap &builtins) { + for (size_t i = 0; i < bytefile->substs_area_size; ++i) { + if (i + sizeof(uint32_t) >= bytefile->substs_area_size) { + failure("substitution %zu offset is out of area\n", i); + } + + uint32_t offset = *(uint32_t *)(bytefile->substs_ptr + i); + i += sizeof(uint32_t); + const char *name = bytefile->substs_ptr + i; + i += strlen(name); + #ifdef DEBUG_VERSION - printf("set builtin %i, offset %i, cmd %u = (%u << 4) | %u, h = %u, l = " - "%u\n", - builtin, offset, cmd, CMD_CTRL, CMD_CTRL_BUILTIN, - (cmd & 0xF0) >> 4, cmd & 0x0F); + printf("subst: offset 0x%.8x, name %s\n", offset, name); #endif - *(uint8_t *)(bytefile->code_ptr + offset - 1) = - cmd; // set BUILTIN command - *(uint32_t *)(bytefile->code_ptr + offset) = builtin; + + if (i > bytefile->substs_area_size) { + failure("substitution %zu name is out of area\n", i); + } + + BUILTIN builtin_id = id_by_builtin(name); + + // NOTE: address is first argument of the call and closure, args count is + // second argument + if (builtin_id != BUILTIN_NONE) { + uint32_t *val_ptr = (uint32_t *)(bytefile->code_ptr + offset); + uint32_t args_count = + *(uint32_t *)(bytefile->code_ptr + offset + sizeof(uint32_t)); + + *val_ptr = builtins.at({.id = builtin_id, .args_count = args_count}); continue; } @@ -164,7 +236,7 @@ Offsets calc_merge_sizes(const std::vector &bytefiles) { sizes.strings += bytefiles[i]->stringtab_size; sizes.globals += bytefiles[i]->global_area_size; sizes.code += bytefiles[i]->code_size; - sizes.publics_num += bytefiles[i]->public_symbols_number; + // sizes.publics_num += bytefiles[i]->public_symbols_number; } return sizes; } @@ -177,6 +249,20 @@ struct MergeResult { MergeResult merge_files(std::vector &&bytefiles) { Offsets sizes = calc_merge_sizes(bytefiles); size_t public_symbols_size = calc_publics_size(sizes.publics_num); + + // find all builtin variations ad extract them + BuiltinSubstMap builtins_map; + { + size_t code_offset = 0; + for (size_t i = 0; i < bytefiles.size(); ++i) { + add_subst_builtin_offsets(builtins_map, code_offset, bytefiles[i]); + code_offset += bytefiles[i]->code_size; + } + } + auto [builtins_code, builtins_code_size] = + gen_builtins(sizes.code, builtins_map); + sizes.code += builtins_code_size; + Bytefile *result = (Bytefile *)malloc(sizeof(Bytefile) + sizes.strings + sizes.code + public_symbols_size); // globals are on the stack @@ -190,6 +276,7 @@ MergeResult merge_files(std::vector &&bytefiles) { for (size_t i = 0; i < bytefiles.size(); ++i) { #ifdef DEBUG_VERSION printf("bytefile <%zu>\n", i); + #endif for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) { #ifdef DEBUG_VERSION @@ -218,7 +305,8 @@ MergeResult merge_files(std::vector &&bytefiles) { result->global_area_size = sizes.globals; result->substs_area_size = 0; result->imports_number = 0; - result->public_symbols_number = sizes.publics_num; + result->public_symbols_number = + 0; // sizes.publics_num; // TODO: correctly set and update publics result->main_offset = 0; // TODO: save al main offsets in some way (?) result->public_ptr = (int *)result->buffer; @@ -235,7 +323,7 @@ MergeResult merge_files(std::vector &&bytefiles) { // REMOVE printf("rewrite offsets %zu\n", i); rewrite_code_with_offsets(bytefiles[i], offsets); // REMOVE printf("subst in code %zu\n", i); - subst_in_code(bytefiles[i], publics); + subst_in_code(bytefiles[i], publics, builtins_map); size_t publics_offset = calc_publics_size(offsets.publics_num); @@ -244,22 +332,31 @@ MergeResult merge_files(std::vector &&bytefiles) { bytefiles[i]->stringtab_size); memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr, bytefiles[i]->code_size); - memcpy((char *)result->public_ptr + publics_offset, - (char *)bytefiles[i]->public_ptr, - calc_publics_size( - bytefiles[i]->public_symbols_number)); // TODO: recalc publics: - // offsets, strings + // memcpy((char *)result->public_ptr + publics_offset, + // (char *)bytefiles[i]->public_ptr, + // calc_publics_size( + // bytefiles[i]->public_symbols_number)); // TODO: recalc + // publics: + // // offsets, strings // update offsets offsets.strings += bytefiles[i]->stringtab_size; offsets.globals += bytefiles[i]->global_area_size; offsets.code += bytefiles[i]->code_size; - offsets.publics_num += bytefiles[i]->public_symbols_number; + // offsets.publics_num += bytefiles[i]->public_symbols_number; free(bytefiles[i]); } + memcpy(result->code_ptr + offsets.code, builtins_code, builtins_code_size); + free(builtins_code); + #ifdef DEBUG_VERSION + std::cout << "main offsets:\n"; + for (const auto &offset : main_offsets) { + std::cout << offset << '\n'; + } + std::cout << "- merged file:\n"; print_file(*result, std::cout); #endif @@ -272,7 +369,9 @@ Bytefile *path_mod_load(const char *name, std::filesystem::path &&path) { #ifdef DEBUG_VERSION std::cout << "- module path load '" << name << "'\n"; #endif - return read_file(path.c_str()); + Bytefile *file = read_file(path.c_str()); + return file; + // return read_file(path.c_str()); } static std::vector search_paths; @@ -354,7 +453,10 @@ MergeResult load_with_imports(Bytefile *root, bool do_verification) { #ifdef DEBUG_VERSION printf("main offsets count: %zu\n", result.main_offsets.size()); #endif - analyze(result.bf /*, std::move(result.main_offsets)*/); + analyze(result.bf, std::move(result.main_offsets)); +#ifdef DEBUG_VERSION + std::cout << "verification done" << std::endl; +#endif } return result; } @@ -390,6 +492,7 @@ struct StdFunc { bool is_vararg = false; }; +// TODO: FIXME: add kind, binops BUILTIN id_by_builtin(const char *name) { static const std::unordered_map std_func = { {"Luppercase", BUILTIN_Luppercase}, @@ -440,6 +543,7 @@ BUILTIN id_by_builtin(const char *name) { } void run_stdlib_func(BUILTIN id, size_t args_count) { + // std::cout << "RUN BUILTIN: " << id << '\n'; // TODO: TMP void *ret = NULL; // TODO: deal with right pointers, etc. switch (id) { @@ -498,8 +602,8 @@ void run_stdlib_func(BUILTIN id, size_t args_count) { s_push(ret); break; case BUILTIN_LmatchSubString: - ret = (void *)LmatchSubString((char *)*s_nth(0), (char *)*s_nth(1), - *s_nth_i(2)); + ret = (void *)LmatchSubString((char *)*s_nth(2), (char *)*s_nth(1), + *s_nth_i(0)); s_popn(3); s_push(ret); break; @@ -533,12 +637,12 @@ void run_stdlib_func(BUILTIN id, size_t args_count) { s_push(ret); break; case BUILTIN_Lcompare: - ret = (void *)Lcompare(*s_nth(0), *s_nth(1)); + ret = (void *)Lcompare(*s_nth(1), *s_nth(0)); s_popn(2); s_push(ret); break; case BUILTIN_LflatCompare: - ret = (void *)LflatCompare(*s_nth(0), *s_nth(1)); + ret = (void *)LflatCompare(*s_nth(1), *s_nth(0)); s_popn(2); s_push(ret); break; @@ -571,7 +675,7 @@ void run_stdlib_func(BUILTIN id, size_t args_count) { call_anyarg_func<20>((void (*)()) & Lprintf, args_count); break; case BUILTIN_Lfopen: - ret = (void *)Lfopen((char *)*s_nth(0), (char *)*s_nth(1)); + ret = (void *)Lfopen((char *)*s_nth(1), (char *)*s_nth(0)); s_popn(2); s_push(ret); break; @@ -586,7 +690,7 @@ void run_stdlib_func(BUILTIN id, size_t args_count) { s_push(ret); break; case BUILTIN_Lfwrite: - /*ret = (void *)*/ Lfwrite((char *)*s_nth(0), (char *)*s_nth(1)); + /*ret = (void *)*/ Lfwrite((char *)*s_nth(1), (char *)*s_nth(0)); s_popn(2); // s_push(ret); // NOTE: ?? break; @@ -605,8 +709,8 @@ void run_stdlib_func(BUILTIN id, size_t args_count) { s_push(ret); break; case BUILTIN_LregexpMatch: - ret = (void *)LregexpMatch((struct re_pattern_buffer *)*s_nth(0), - (char *)*s_nth(1), *s_nth_i(2)); + ret = (void *)LregexpMatch((struct re_pattern_buffer *)*s_nth(2), + (char *)*s_nth(1), *s_nth_i(0)); s_popn(2); s_push(ret); break; diff --git a/byterun/src/parser.cpp b/byterun/src/parser.cpp index 50ec7f3c2..28598adac 100644 --- a/byterun/src/parser.cpp +++ b/byterun/src/parser.cpp @@ -272,9 +272,9 @@ template static inline void print_space(std::ostream &out) { template requires(arg == ArgT::INT) -static inline uint read_print_val(char **ip, const Bytefile &bf, +static inline aint read_print_val(char **ip, const Bytefile &bf, std::ostream &out) { - uint val = ip_read_int_safe(ip, &bf); + aint val = ip_read_int_safe(ip, &bf); if constexpr (use_out) { out << val; } @@ -283,11 +283,16 @@ static inline uint read_print_val(char **ip, const Bytefile &bf, template requires(arg == ArgT::OFFSET) -static inline uint read_print_val(char **ip, const Bytefile &bf, +static inline aint read_print_val(char **ip, const Bytefile &bf, std::ostream &out) { - uint val = ip_read_int_safe(ip, &bf); + aint val = ip_read_int_safe(ip, &bf); if constexpr (use_out) { - out << val; + // NOTE: < 0 for builtin closures + if (val >= 0) { + out << std::hex << val << std::dec; + } else { + out << val; + } } return val; } @@ -500,7 +505,8 @@ std::pair parse_command_impl(char **ip, const Bytefile &bf, read_print_cmd_seq_opt(cmd, l, ip, bf, out); print_space(out); if constexpr (do_read_args) { - size_t call_p = read_print_val(ip, bf, out); + /*aint call_offset =*/read_print_val(ip, bf, + out); print_space(out); size_t args_count = read_print_val(ip, bf, out); for (size_t i = 0; i < args_count; i++) { @@ -662,16 +668,16 @@ void print_file_info(const Bytefile &bf, std::ostream &out) { void print_file_code(const Bytefile &bf, std::ostream &out) { char *ip = bf.code_ptr; - while (true) { + while (ip - bf.code_ptr < bf.code_size) { out << " " << std::setfill('0') << std::setw(8) << std::hex << ip - bf.code_ptr << ": " << std::dec; const auto [cmd, l] = parse_command(&ip, &bf, out); out << std::endl; - if (cmd == Cmd::EXIT) { - std::cout << "> EXIT" << std::endl; - break; - } + // if (cmd == Cmd::EXIT) { + // std::cout << "> EXIT" << std::endl; + // break; + // } } } diff --git a/byterun/stdlib_regression_check.sh b/byterun/stdlib_regression_check.sh new file mode 100755 index 000000000..18eaf47b5 --- /dev/null +++ b/byterun/stdlib_regression_check.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +dune build + +prefix="../stdlib/regression/" +suffix=".lama" + +compiler=../_build/default/src/Driver.exe + +echo "Used compiler path:" +echo $compiler + +echo "Build modules:" +for mod in ../stdlib/*.lama; do + echo $mod + $compiler -b $mod -I ../stdlib/ +done + +echo "Run tests:" +for test in ../stdlib/regression/*01.lama; do + echo $test + $compiler -b $test -I ../stdlib/ > /dev/null + test_file="${test%.*}" + echo $test_file + # cat $test_file.input | ./byterun.exe -p test*.bc > test.bc.code + # cat $test_file.input | ./byterun.exe -p test*.bc + echo "" | ./byterun.exe -vi test*.bc + echo "" | ./byterun.exe -vi test*.bc > test.log + # sed '1d;s/^..//' $test_file.t > test_orig.log + # diff test.log test_orig.log + + rm test*.bc + # rm test.log test_orig.log + echo "done" +done + +rm *.bc +rm *.o