From 014f249b16ff8da2e2a0c2bffa9f4970b4bbb56c Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Sun, 15 Dec 2024 00:54:48 +0300 Subject: [PATCH] part of analyzer, interpreter closure processing fix --- byterun/include/analyzer.hpp | 2 +- byterun/include/parser.hpp | 5 +- byterun/include/stack.h | 2 - byterun/src/analyzer.cpp | 304 +++++++++++++++++++++++++++++++---- byterun/src/cli.cpp | 28 +++- byterun/src/interpreter.c | 8 +- byterun/src/parser.cpp | 10 +- 7 files changed, 316 insertions(+), 43 deletions(-) diff --git a/byterun/include/analyzer.hpp b/byterun/include/analyzer.hpp index 249ea679e..5f9049f1b 100644 --- a/byterun/include/analyzer.hpp +++ b/byterun/include/analyzer.hpp @@ -4,4 +4,4 @@ extern "C" { #include "utils.h" } -void analyze(const Bytefile &bf); +void analyze(Bytefile *bf); diff --git a/byterun/include/parser.hpp b/byterun/include/parser.hpp index 969b4f1eb..f8288ef07 100644 --- a/byterun/include/parser.hpp +++ b/byterun/include/parser.hpp @@ -67,5 +67,6 @@ static inline const char *ip_read_string(char **ip, const Bytefile &bf) { return get_string(&bf, ip_read_int(ip, bf)); } -Cmd parse_command(char **ip, const Bytefile &bf); -Cmd parse_command(char **ip, const Bytefile &bf, std::ostream &out); +std::pair parse_command(char **ip, const Bytefile &bf); +std::pair parse_command(char **ip, const Bytefile &bf, + std::ostream &out); diff --git a/byterun/include/stack.h b/byterun/include/stack.h index b58de8ac5..b7f616683 100644 --- a/byterun/include/stack.h +++ b/byterun/include/stack.h @@ -273,8 +273,6 @@ static inline void **var_by_category(enum VarCategory category, size_t id) { "can't read arguments: too big id"); //, %i >= %ul", id, count); } return (void **)d->contents + id; // order is not important - break; - break; } return var; diff --git a/byterun/src/analyzer.cpp b/byterun/src/analyzer.cpp index 7ffd47cb8..ff9dfb593 100644 --- a/byterun/src/analyzer.cpp +++ b/byterun/src/analyzer.cpp @@ -1,78 +1,324 @@ #include "analyzer.hpp" #include "parser.hpp" +extern "C" { +#include "types.h" +} + #include // TODO -void analyze(const Bytefile &bf) { - std::vector to_visit; - std::vector visited(bf.code_size, false); - std::vector control_flow_in(bf.code_size, false); +void analyze(Bytefile *bf) { + static constexpr const int NOT_VISITED = -1; + std::vector visited(bf->code_size, NOT_VISITED); // store stack depth + std::vector control_flow_in(bf->code_size, false); // store - // TODO: 2 control flow sets, for functions and for control flow inside - // functions - // + current stack depth - // + ast begin pos + std::vector to_visit_func; + std::vector to_visit_jmp; - auto const to_visit_push = [&visited, &to_visit](size_t offset) { - if (!visited[offset]) { + int current_stack_depth = 0; + const uint globals_count = bf->global_area_size; + uint current_locals_count = 0; + uint current_args_count = 0; + uint16_t *current_begin_counter = nullptr; + + auto const to_visit_push = [&visited, ¤t_stack_depth](size_t offset, + auto &to_visit) { + if (visited[offset] == NOT_VISITED) { visited[offset] = true; to_visit.push_back(offset); + } else if (visited[offset] != current_stack_depth) { + // TODO: is this condition same for calls? + failure("different stack depth on same point is not allowed"); } }; - auto const control_push = [&to_visit_push, &control_flow_in](size_t offset) { + auto const control_push = [&to_visit_push, &control_flow_in](size_t offset, + auto &to_visit) { control_flow_in[offset] = true; - to_visit_push(offset); + to_visit_push(offset, to_visit); + }; + + auto const check_correct_var = [&globals_count, ¤t_locals_count, + ¤t_args_count](uint8_t l, uint id) { + if (l > 3) { + failure("unexpected variable category"); + } + VarCategory category = to_var_category(l); + switch (category) { + case VAR_GLOBAL: + if (id >= globals_count) { + failure("global var index is out of range"); + } + break; + case VAR_LOCAL: + if (id >= current_locals_count) { + failure("local var index is out of range"); + } + break; + case VAR_ARGUMENT: + if (id >= current_args_count) { + failure("argument var index is out of range"); + } + break; + case VAR_CLOSURE: + // NOTE: impossible to properly check there (?) + break; + } }; // add publics - to_visit.reserve(bf.public_symbols_number); - for (size_t i = 0; i < bf.public_symbols_number; ++i) { - control_push(get_public_offset(&bf, i)); + to_visit_func.reserve(bf->public_symbols_number); + for (size_t i = 0; i < bf->public_symbols_number; ++i) { + control_push(get_public_offset(bf, i), to_visit_func); } - if (to_visit.size() == 0) { + if (to_visit_func.size() == 0) { failure("no public symbols detected"); } - while (!to_visit.empty()) { - char *ip = bf.code_ptr + to_visit.back(); - to_visit.pop_back(); + while (true) { + char *ip = bf->code_ptr; + if (current_begin_counter == nullptr) { + if (to_visit_func.empty()) { + break; + } + ip += to_visit_func.back(); + to_visit_func.pop_back(); + current_stack_depth = 0; + } else { + if (to_visit_jmp.empty()) { + current_begin_counter = nullptr; + continue; + } + ip += to_visit_jmp.back(); + to_visit_jmp.pop_back(); + } - if (ip >= bf.code_ptr + bf.code_size) { + if (ip >= bf->code_ptr + bf->code_size) { failure("instruction pointer is out of range (>= size)"); } - if (ip < bf.code_ptr) { + if (ip < bf->code_ptr) { failure("instruction pointer is out of range (< 0)"); } char *current_ip = ip; #ifdef DEBUG_VERSION - printf("0x%.8lx \n", current_ip - bf.code_ptr - 1); + printf("0x%.8lx \n", current_ip - bf->code_ptr - 1); #endif - Cmd cmd = parse_command(&ip, bf); + const auto [cmd, l] = parse_command(&ip, *bf); + + if (current_begin_counter == nullptr && cmd != Cmd::BEGIN && + cmd != Cmd::CBEGIN) { + failure("function does not start with begin"); + } + + visited[current_ip - bf->code_ptr] = current_stack_depth; ++current_ip; // skip command byte + char *const saved_current_ip = current_ip; + + size_t extra_stack_during_opr = 0; + // change stack depth + switch (cmd) { + case Cmd::BINOP: + current_stack_depth -= 2; + break; + case Cmd::CONST: + ++current_stack_depth; + break; + case Cmd::STRING: + ++current_stack_depth; + break; + case Cmd::SEXP: + ip_read_string(¤t_ip, *bf); + current_stack_depth -= ip_read_int(¤t_ip, *bf); + break; + case Cmd::STI: + current_stack_depth -= 2; + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + break; + case Cmd::STA: + current_stack_depth -= 3; + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + break; + case Cmd::JMP: + break; + case Cmd::END: + --current_stack_depth; + break; + case Cmd::RET: + --current_stack_depth; + break; + case Cmd::DROP: + --current_stack_depth; + break; + case Cmd::DUP: + ++current_stack_depth; + break; + case Cmd::SWAP: + if (current_stack_depth < 2) { + failure("not enough elements in stack"); + } + break; + case Cmd::ELEM: + current_stack_depth -= 2; + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + break; + case Cmd::LD: + check_correct_var(l, ip_read_int(¤t_ip, *bf)); + ++current_stack_depth; + break; + case Cmd::LDA: + check_correct_var(l, ip_read_int(¤t_ip, *bf)); + ++current_stack_depth; + break; + case Cmd::ST: + check_correct_var(l, ip_read_int(¤t_ip, *bf)); + if (current_stack_depth < 1) { + failure("not enough elements in stack"); + } + break; + case Cmd::CJMPz: + case Cmd::CJMPnz: + --current_stack_depth; + break; + case Cmd::BEGIN: + case Cmd::CBEGIN: + // TODO: remember & check needed args count + if (current_begin_counter != nullptr) { + failure("unexpected function beginning"); + } + current_begin_counter = (uint16_t *)(current_ip + sizeof(uint16_t)); + *current_begin_counter = 0; + current_args_count = ip_read_int(¤t_ip, *bf); // TODO: read int16 ?? + current_locals_count = ip_read_int(¤t_ip, *bf); + break; + case Cmd::CLOSURE: { + // // TODO: checks ?? + ip_read_int(&ip, *bf); // offset + size_t args_count = ip_read_int(¤t_ip, *bf); // args count + extra_stack_during_opr = args_count; + for (aint i = 0; i < args_count; i++) { + aint arg_type = ip_read_byte(¤t_ip, *bf); + aint arg_id = ip_read_int(¤t_ip, *bf); + check_correct_var(arg_type, arg_id); + } + ++current_stack_depth; + } break; + case Cmd::CALLC: { + current_stack_depth -= + ip_read_int(¤t_ip, *bf) + 1; // + closure itself + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + // TODO: check args == cbegin args (?) + } break; + case Cmd::CALL: { + ip_read_int(¤t_ip, *bf); // call offset + current_stack_depth -= ip_read_int(¤t_ip, *bf); + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + // TODO: check args == begin args + } break; + case Cmd::TAG: + if (current_stack_depth < 1) { + failure("not enough elements in stack"); + } + break; + case Cmd::ARRAY: + if (current_stack_depth < 1) { + failure("not enough elements in stack"); + } + break; + case Cmd::FAIL: + --current_stack_depth; + break; + case Cmd::LINE: + break; + case Cmd::PATT: + --current_stack_depth; + if (l == 0) { // to arg for '=str' // FIXME: magic const + --current_stack_depth; + } + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + break; + case Cmd::Lread: + ++current_stack_depth; + break; + case Cmd::Lwrite: + case Cmd::Llength: + case Cmd::Lstring: + if (current_stack_depth < 1) { + failure("not enough elements in stack"); + } + break; + case Cmd::Barray: + current_stack_depth -= ip_read_int(¤t_ip, *bf); + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + ++current_stack_depth; + break; + case Cmd::EXIT: + failure("exit should be unreachable"); // NOTE: not sure + break; + case Cmd::_UNDEF_: + failure("undefined command"); + break; + } + + if (current_begin_counter == nullptr) { + failure("function does not start with begin"); + } + + if (current_stack_depth < 0) { + failure("not enough elements in stack"); + } + + *current_begin_counter = + std::max(*current_begin_counter, + (uint16_t)(current_stack_depth + extra_stack_during_opr)); + + current_ip = saved_current_ip; + // do jumps switch (cmd) { case Cmd::EXIT: case Cmd::END: + case Cmd::FAIL: break; case Cmd::CJMPz: case Cmd::CJMPnz: case Cmd::CLOSURE: case Cmd::CALL: - to_visit_push(ip - bf.code_ptr); + to_visit_push(ip - bf->code_ptr, to_visit_jmp); case Cmd::JMP: { - uint jmp_p = ip_read_int(¤t_ip, bf); - if (jmp_p >= bf.code_size) { - failure("jump/call out of file"); + bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL); + + uint jmp_p = ip_read_int(¤t_ip, *bf); + if (jmp_p >= bf->code_size) { // TODO: check that > begin (?) + failure("jump out of file"); } - control_push(jmp_p); + control_push(jmp_p, is_call ? to_visit_func : to_visit_jmp); break; } @@ -81,7 +327,7 @@ void analyze(const Bytefile &bf) { break; default: - to_visit_push(ip - bf.code_ptr); + to_visit_push(ip - bf->code_ptr, to_visit_jmp); break; } } diff --git a/byterun/src/cli.cpp b/byterun/src/cli.cpp index f13463f15..4160c609d 100644 --- a/byterun/src/cli.cpp +++ b/byterun/src/cli.cpp @@ -5,16 +5,40 @@ extern "C" { #include "utils.h" } +#include "analyzer.hpp" + int main(int argc, char **argv) { if (argc < 2) { + failure("no execution option"); + } + + bool do_verification = false; + bool do_interpretation = false; + if (strcmp(argv[1], "-vi") == 0) { + do_verification = true; + do_interpretation = true; + } else if (strcmp(argv[1], "-i") == 0) { + do_interpretation = true; + } else if (strcmp(argv[1], "-v") == 0) { + do_verification = true; + } else { + failure("wrong execution option (acceptable options - '-i', '-v', '-vi')"); + } + + if (argc < 3) { failure("no file name provided"); } - Bytefile *f = read_file(argv[1]); + Bytefile *f = read_file(argv[2]); // #ifdef DEBUG_VERSION // dump_file (stdout, f); // #endif - run(f, argc - 1, argv + 1); + if (do_verification) { + analyze(f); + } + if (do_interpretation) { // TODO: switch between enabled/disabled verification + run(f, argc - 2, argv + 2); + } free(f->global_ptr); free(f); diff --git a/byterun/src/interpreter.c b/byterun/src/interpreter.c index a098d74a2..0afb646b8 100644 --- a/byterun/src/interpreter.c +++ b/byterun/src/interpreter.c @@ -267,6 +267,7 @@ void run(Bytefile *bf, int argc, char **argv) { break; } + // TODO: read req stack from second part or args case CMD_CTRL_BEGIN: { // BEGIN %d %d // function begin int args_sz = ip_read_int(&s.ip); int locals_sz = ip_read_int(&s.ip); @@ -278,6 +279,7 @@ void run(Bytefile *bf, int argc, char **argv) { break; } + // TODO: read req stack from second part of args case CMD_CTRL_CBEGIN: { // CBEGIN %d %d // NOTE: example not found, no checks done int args_sz = ip_read_int(&s.ip); @@ -295,11 +297,11 @@ void run(Bytefile *bf, int argc, char **argv) { aint call_offset = ip_read_int(&s.ip); aint args_count = ip_read_int(&s.ip); for (aint i = 0; i < args_count; i++) { - aint arg_type = ip_read_byte(&s.ip); - aint arg_id = ip_read_int(&s.ip); + uint8_t arg_type = ip_read_byte(&s.ip); + auint arg_id = ip_read_int(&s.ip); void **var_ptr = - var_by_category(to_var_category(l), ip_read_int(&s.ip)); + var_by_category(to_var_category(arg_type), arg_id); s_push(*var_ptr); } if (call_offset >= bf->code_size) { diff --git a/byterun/src/parser.cpp b/byterun/src/parser.cpp index 5274d8f11..3a449ce8b 100644 --- a/byterun/src/parser.cpp +++ b/byterun/src/parser.cpp @@ -320,7 +320,8 @@ static inline void read_print_cmd_seq(Cmd cmd, uint8_t l, char **ip, } template -Cmd parse_command_impl(char **ip, const Bytefile &bf, std::ostream &out) { +std::pair parse_command_impl(char **ip, const Bytefile &bf, + std::ostream &out) { static const char *const ops[] = { #define OP_TO_STR(id, op) "op", FORALL_BINOP(OP_TO_STR) @@ -562,13 +563,14 @@ Cmd parse_command_impl(char **ip, const Bytefile &bf, std::ostream &out) { #ifdef DEBUG_VERSION std::cout << command_name(cmd, l) << '\n'; #endif - return cmd; + return {cmd, l}; } -Cmd parse_command(char **ip, const Bytefile &bf) { +std::pair parse_command(char **ip, const Bytefile &bf) { return parse_command_impl(ip, bf, std::clog); } -Cmd parse_command(char **ip, const Bytefile &bf, std::ostream &out) { +std::pair parse_command(char **ip, const Bytefile &bf, + std::ostream &out) { return parse_command_impl(ip, bf, out); }