fix interpreter and analyzer with new algorithm (with bugs)

This commit is contained in:
ProgramSnail 2025-03-02 15:05:09 +03:00
parent 343a21ee2d
commit 58c9fd77c2
21 changed files with 3489 additions and 559 deletions

View file

@ -9,13 +9,11 @@ extern "C" {
#include <vector>
void analyze(uint32_t mod_id) {
Bytefile *bf = mod_get(mod_id);
void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
static constexpr const int NOT_VISITED = -1;
std::vector<int> visited(bf->code_size, NOT_VISITED); // store stack depth
std::vector<size_t> to_visit_func;
std::vector<size_t> to_visit_func = std::move(add_publics);
std::vector<size_t> to_visit_jmp;
int current_stack_depth = 0;
@ -30,57 +28,55 @@ void analyze(uint32_t mod_id) {
char *current_ip = ip;
char *saved_current_ip = current_ip;
auto const jmp_to_visit_push = [&saved_current_ip, mod_id, &visited,
auto const jmp_to_visit_push = [&saved_current_ip, bf, &visited,
&current_stack_depth,
&to_visit_jmp](size_t offset) {
if (visited[offset] == NOT_VISITED) {
visited[offset] = current_stack_depth;
to_visit_jmp.push_back(offset);
} else if (visited[offset] != current_stack_depth) {
ip_failure(saved_current_ip, mod_id,
ip_failure(saved_current_ip, bf,
"different stack depth on same point is not allowed");
}
};
auto const func_to_visit_push = [&saved_current_ip, mod_id, &visited,
auto const func_to_visit_push = [&saved_current_ip, bf, &visited,
&to_visit_func](size_t offset) {
if (visited[offset] == NOT_VISITED) {
visited[offset] = 0;
to_visit_func.push_back(offset);
} else if (visited[offset] != 0) {
ip_failure(saved_current_ip, mod_id,
ip_failure(saved_current_ip, bf,
"different stack depth on same point is not allowed");
}
};
auto const check_correct_var = [&saved_current_ip, mod_id, &globals_count,
auto const check_correct_var = [&saved_current_ip, bf, &globals_count,
&current_locals_count, &current_args_count,
&is_in_closure](uint8_t l, uint id) {
if (l > 3) {
ip_failure(saved_current_ip, mod_id, "unexpected variable category");
ip_failure(saved_current_ip, bf, "unexpected variable category");
}
VarCategory category = to_var_category(l);
switch (category) {
case VAR_GLOBAL:
if (id >= globals_count) {
ip_failure(saved_current_ip, mod_id,
"global var index is out of range");
ip_failure(saved_current_ip, bf, "global var index is out of range");
}
break;
case VAR_LOCAL:
if (id >= current_locals_count) {
ip_failure(saved_current_ip, mod_id, "local var index is out of range");
ip_failure(saved_current_ip, bf, "local var index is out of range");
}
break;
case VAR_ARGUMENT:
if (id >= current_args_count) {
ip_failure(saved_current_ip, mod_id,
"argument var index is out of range");
ip_failure(saved_current_ip, bf, "argument var index is out of range");
}
break;
case VAR_CLOSURE:
if (!is_in_closure) {
ip_failure(saved_current_ip, mod_id,
ip_failure(saved_current_ip, bf,
"can't access closure vars outside of closure");
}
// NOTE: impossible to properly check bounds there
@ -89,13 +85,13 @@ void analyze(uint32_t mod_id) {
};
// add publics
to_visit_func.reserve(bf->public_symbols_number);
to_visit_func.reserve(bf->public_symbols_number + to_visit_func.size());
for (size_t i = 0; i < bf->public_symbols_number; ++i) {
func_to_visit_push(get_public_offset_safe(bf, i));
}
if (to_visit_func.size() == 0) {
failure("no public symbols detected");
failure("no public symbols detected\n");
}
while (true) {
@ -112,7 +108,8 @@ void analyze(uint32_t mod_id) {
if (to_visit_jmp.empty()) {
current_begin_counter = nullptr;
if (func_end_found != 1) {
failure("each function should have exactly one end");
failure("each function should have exactly one end (%zu found)\n",
func_end_found);
}
continue;
}
@ -121,12 +118,11 @@ void analyze(uint32_t mod_id) {
}
if (ip >= bf->code_ptr + bf->code_size) {
ip_safe_failure(ip, mod_id,
"instruction pointer is out of range (>= size)");
ip_safe_failure(ip, bf, "instruction pointer is out of range (>= size)");
}
if (ip < bf->code_ptr) {
ip_safe_failure(ip, mod_id, "instruction pointer is out of range (< 0)");
ip_safe_failure(ip, bf, "instruction pointer is out of range (< 0)");
}
current_ip = ip;
@ -134,18 +130,18 @@ void analyze(uint32_t mod_id) {
#ifdef DEBUG_VERSION
const auto [cmd, l] = parse_command(&ip, bf, std::cout);
std::cout << '\n';
#else
const auto [cmd, l] = parse_command(&ip, bf);
#endif
if (current_begin_counter == nullptr && cmd != Cmd::BEGIN &&
cmd != Cmd::CBEGIN) {
ip_failure(saved_current_ip, mod_id,
"function does not start with begin");
ip_failure(saved_current_ip, bf, "function does not start with begin");
}
if (visited[current_ip - bf->code_ptr] == NOT_VISITED) {
ip_failure(saved_current_ip, mod_id, "not visited command");
ip_failure(saved_current_ip, bf, "not visited command");
}
current_stack_depth = visited[current_ip - bf->code_ptr];
@ -161,7 +157,7 @@ void analyze(uint32_t mod_id) {
case Cmd::BINOP:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
@ -175,21 +171,21 @@ void analyze(uint32_t mod_id) {
ip_read_string_unsafe(&current_ip, bf);
current_stack_depth -= ip_read_int_unsafe(&current_ip);
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::STI:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::STA:
current_stack_depth -= 3;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
@ -207,19 +203,19 @@ void analyze(uint32_t mod_id) {
break;
case Cmd::DUP:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::SWAP:
if (current_stack_depth < 2) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::ELEM:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
@ -234,7 +230,7 @@ void analyze(uint32_t mod_id) {
case Cmd::ST:
check_correct_var(l, ip_read_int_unsafe(&current_ip));
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::CJMPz:
@ -244,13 +240,14 @@ void analyze(uint32_t mod_id) {
case Cmd::BEGIN:
case Cmd::CBEGIN:
if (current_begin_counter != nullptr) {
ip_failure(saved_current_ip, mod_id, "unexpected function beginning");
ip_failure(saved_current_ip, bf, "unexpected function beginning");
}
current_args_count = ip_read_int_unsafe(&current_ip);
current_begin_counter = (uint16_t *)(current_ip + sizeof(uint16_t));
current_locals_count = ip_read_int_unsafe(&current_ip);
if (current_locals_count >= std::numeric_limits<uint16_t>::max()) {
ip_failure(saved_current_ip, mod_id, "too many locals in functions");
std::cerr << current_locals_count << " locals" << '\n';
ip_failure(saved_current_ip, bf, "too many locals in functions");
}
(*(uint16_t *)(current_ip - sizeof(uint16_t))) = current_locals_count;
*current_begin_counter = 0;
@ -269,12 +266,12 @@ void analyze(uint32_t mod_id) {
++current_stack_depth;
// if (closure_offset >= bf->code_size) {
// ip_failure(saved_current_ip, mod_id, "jump/call out of file");
// ip_failure(saved_current_ip, bf, "jump/call out of file");
// }
// NOTE: is not always true
// if (!is_command_name(bf->code_ptr + closure_offset, bf, Cmd::CBEGIN)) {
// ip_failure(saved_current_ip, mod_id, "closure should point to
// ip_failure(saved_current_ip, bf, "closure should point to
// cbegin");
// }
} break;
@ -282,7 +279,7 @@ void analyze(uint32_t mod_id) {
uint args_count = ip_read_int_unsafe(&current_ip);
current_stack_depth -= args_count + 1; // + closure itself
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
// NOTE: can't check args == cbegin args
@ -292,29 +289,29 @@ void analyze(uint32_t mod_id) {
uint args_count = ip_read_int_unsafe(&current_ip);
current_stack_depth -= args_count;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
if ((int)call_offset >= bf->code_size) {
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
ip_failure(saved_current_ip, bf, "jump/call out of file");
}
if (!is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) {
ip_failure(saved_current_ip, mod_id, "call should point to begin");
ip_failure(saved_current_ip, bf, "call should point to begin");
}
if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) {
ip_failure(saved_current_ip, mod_id, "wrong call argument count");
ip_failure(saved_current_ip, bf, "wrong call argument count");
}
} break;
case Cmd::TAG:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::ARRAY:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::FAIL:
@ -322,25 +319,31 @@ void analyze(uint32_t mod_id) {
break;
case Cmd::LINE:
break;
// case Cmd::CALLF: {
// // TODO: find link to real function and replace call (need to save all
// // modules in one space) <- optimization
case Cmd::BUILTIN: {
std::cout << "builtin\n";
// TODO: find link to real function and replace call (need to save all
// modules in one space) <- optimization
// ip_read_int_unsafe(&current_ip); // function name (str)
// uint args_count = ip_read_int_unsafe(&current_ip);
// current_stack_depth -= args_count;
// if (current_stack_depth < 0) {
// ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
// }
// ++current_stack_depth;
// } break;
size_t id = ip_read_int_unsafe(&current_ip); // builtin id
if (id >= BUILTIN_NONE) {
ip_failure(saved_current_ip, bf, "undefined builtin id");
}
uint args_count = ip_read_int_unsafe(&current_ip);
current_stack_depth -= args_count;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
} break;
case Cmd::PATT:
--current_stack_depth;
if (l == CMD_PATT_STR) {
--current_stack_depth;
}
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
@ -352,32 +355,31 @@ void analyze(uint32_t mod_id) {
// case Cmd::Llength:
// case Cmd::Lstring:
// if (current_stack_depth < 1) {
// ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
// ip_failure(saved_current_ip, bf, "not enough elements in stack");
// }
// break;
// case Cmd::Barray:
// current_stack_depth -= ip_read_int_unsafe(&current_ip); // elem count
// if (current_stack_depth < 0) {
// ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
// ip_failure(saved_current_ip, bf, "not enough elements in stack");
// }
// ++current_stack_depth;
// break;
case Cmd::EXIT:
ip_failure(saved_current_ip, mod_id,
ip_failure(saved_current_ip, bf,
"exit should be unreachable"); // NOTE: not sure
break;
case Cmd::_UNDEF_:
ip_failure(saved_current_ip, mod_id, "undefined command");
ip_failure(saved_current_ip, bf, "undefined command");
break;
}
if (current_begin_counter == nullptr) {
ip_failure(saved_current_ip, mod_id,
"function does not start with begin");
ip_failure(saved_current_ip, bf, "function does not start with begin");
}
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
*current_begin_counter =
@ -404,7 +406,7 @@ void analyze(uint32_t mod_id) {
uint jmp_p = ip_read_int_unsafe(&current_ip);
if ((int)jmp_p >= bf->code_size) {
// NOTE: maybe also should check that > begin (?)
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
ip_failure(saved_current_ip, bf, "jump/call out of file");
}
if (is_call) {
func_to_visit_push(jmp_p);
@ -415,7 +417,7 @@ void analyze(uint32_t mod_id) {
}
case Cmd::_UNDEF_:
ip_failure(saved_current_ip, mod_id, "undefined command");
ip_failure(saved_current_ip, bf, "undefined command");
break;
default:

View file

@ -42,42 +42,32 @@ int main(int argc, char **argv) {
}
#ifdef DEBUG_VERSION
std::cerr << "- read code file" << std::endl;
std::cout << "- read code file" << std::endl;
#endif
Bytefile *f = read_file(argv[2]);
if (do_print) {
#ifdef DEBUG_VERSION
std::cerr << "- print code file" << std::endl;
std::cout << "- print code file" << std::endl;
#endif
print_file(*f, std::cout);
free(f);
}
if (do_verification || do_interpretation) {
#ifdef DEBUG_VERSION
std::cerr << "- init stack" << std::endl;
std::cout << "- init stack" << std::endl;
#endif
size_t stack[STACK_SIZE];
run_init(stack);
#ifdef DEBUG_VERSION
std::cerr << "- add main module" << std::endl;
std::cout << "- run with imports" << std::endl;
#endif
uint main_mod_id = mod_add(f, do_verification);
if (do_interpretation) {
#ifdef DEBUG_VERSION
std::cerr << "- start interpretation" << std::endl;
#endif
run_mod_rec(main_mod_id, argc - 2, argv + 2, do_verification);
}
f = run_with_imports(f, argc - 2, argv + 2, do_verification);
}
mod_cleanup();
free(f);
return 0;
}

View file

@ -328,7 +328,7 @@ struct ArgumentLocation {
};
/* We need to know the word size to calculate offsets correctly */
constexpr auto word_size = 8;
constexpr size_t word_size = 8;
const Register::T &as_register(const Opnd &opnd) {
return std::visit(
@ -1278,7 +1278,7 @@ std::vector<Instr> compile_binop(Env &env, Opr op) {
switch (op) {
case Opr::DIV:
return with_rdx([&x, &y](const auto &rdx) -> std::vector<Instr> {
return {Mov{y, rax}, Sar1{rax}, Binop{"^", rdx, rdx},
return {Mov{y, rax}, Sar1{rax}, Binop{Opr::XOR, rdx, rdx},
Cltd{}, Sar1{x}, IDiv{x},
Sal1{rax}, Or1{rax}, Mov{rax, y}};
});
@ -1301,7 +1301,7 @@ std::vector<Instr> compile_binop(Env &env, Opr op) {
return {
Binop{Opr::XOR, rax, rax},
Mov{x, extra},
Binop{"cmp", extra, y},
Binop{Opr::CMP, extra, y},
Set{suffix(op), Registers::rax},
Sal1{rax},
Or1{rax},

View file

@ -58,7 +58,7 @@ void run_init(size_t *stack) {
init_state(&s, (void**)stack);
}
void run_prepare_exec(int argc, char **argv) {
void set_argc_argv(int argc, char **argv) {
s_push_i(BOX(argc));
#ifdef DEBUG_VERSION
printf("- argc: %i\n", argc);
@ -80,38 +80,7 @@ void run_prepare_exec(int argc, char **argv) {
#endif
}
void run_mod_rec(uint mod_id, int argc, char **argv, bool do_verification) {
Bytefile* mod = mod_get(mod_id);
#ifdef DEBUG_VERSION
printf("- run mod rec, %i imports\n", mod->imports_number);
#endif
for (size_t i = 0; i < mod->imports_number; ++i) {
const char* import_str = get_import_safe(mod, i);
if (find_mod_loaded(import_str) < 0 && strcmp(import_str, "Std") != 0) { // not loaded
#ifdef DEBUG_VERSION
printf("- mod load <%s>\n", import_str);
#endif
int32_t import_mod = mod_load(import_str, do_verification);
if (import_mod < 0) {
failure("module <%s> not found\n", import_str);
}
#ifdef DEBUG_VERSION
printf("- mod run <%s>\n", import_str);
#endif
run_mod_rec(import_mod, argc, argv, do_verification);
}
}
init_mod_state(mod_id, &s);
init_mod_state_globals(&s);
run_prepare_exec(argc, argv); // args for module main
run_mod(mod_id, argc, argv);
cleanup_state(&s);
}
static inline void call_Barray(size_t elem_count, char** ip, void** buffer) {
void call_Barray(size_t elem_count, char** ip, void** buffer) {
// size_t elem_count = ip_read_int(ip);
bool use_new_buffer = (elem_count > BUFFER_SIZE);
@ -149,17 +118,17 @@ static inline void call_Barray(size_t elem_count, char** ip, void** buffer) {
}
}
void run_mod(uint mod_id, int argc, char **argv) {
void run_main(Bytefile* bf, int argc, char **argv) {
#ifdef DEBUG_VERSION
printf("--- module init state ---\n");
printf("--- init state ---\n");
#endif
init_mod_state(mod_id, &s);
prepare_state(bf, &s);
void *buffer[BUFFER_SIZE];
#ifdef DEBUG_VERSION
printf("--- module run begin ---\n");
printf("--- run begin ---\n");
#endif
do {
@ -178,9 +147,9 @@ void run_mod(uint mod_id, int argc, char **argv) {
s.instr_ip = s.ip;
uint8_t x = ip_read_byte(&s.ip), h = (x & 0xF0) >> 4, l = x & 0x0F;
#ifdef DEBUG_VERSION
// #ifdef DEBUG_VERSION
printf("0x%.8x: %s\n", s.ip - s.bf->code_ptr - 1, read_cmd(s.ip - 1, s.bf));
#endif
// #endif
switch (h) {
case CMD_EXIT:
@ -207,7 +176,7 @@ void run_mod(uint mod_id, int argc, char **argv) {
#undef BINOP_OPR
default:
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
s_failure(&s, "interpreter: invalid opcode"); // %d-%d\n", h, l);
break;
}
}
@ -343,7 +312,7 @@ void run_mod(uint mod_id, int argc, char **argv) {
} break;
default:
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
s_failure(&s, "interpreter: basic, invalid opcode"); // %d-%d\n", h, l);
}
break;
@ -407,7 +376,7 @@ void run_mod(uint mod_id, int argc, char **argv) {
s_failure(&s, "begin should only be called after call");
}
#endif
s_enter_f(s.call_ip /*ip from call*/, s.call_module_id,
s_enter_f(s.call_ip /*ip from call*/,
s.is_closure_call, args_sz, locals_sz);
#ifndef WITH_CHECK
if ((void **)__gc_stack_top + (aint)max_additional_stack_sz - 1 <= s.stack) {
@ -431,7 +400,7 @@ void run_mod(uint mod_id, int argc, char **argv) {
s_failure(&s, "begin should only be called after call");
}
#endif
s_enter_f(s.call_ip /*ip from call*/, s.call_module_id,
s_enter_f(s.call_ip /*ip from call*/,
s.is_closure_call, args_sz, locals_sz);
#ifdef WITH_CHECK
if ((void **)__gc_stack_top + (aint)max_additional_stack_sz - 1 <= s.stack) {
@ -474,7 +443,6 @@ void run_mod(uint mod_id, int argc, char **argv) {
call_happened = true;
s.is_closure_call = true;
s.call_ip = s.ip;
s.call_module_id = s.current_module_id;
s.ip = (char*)Belem(*s_nth(args_count), BOX(0)); // use offset instead ??
break;
@ -487,7 +455,6 @@ void run_mod(uint mod_id, int argc, char **argv) {
call_happened = true;
s.is_closure_call = false;
s.call_ip = s.ip;
s.call_module_id = s.current_module_id;
#ifndef WITH_CHECK
if (call_p >= s.bf->code_size) {
@ -527,42 +494,29 @@ void run_mod(uint mod_id, int argc, char **argv) {
// maybe some metainfo should be collected
break;
// case CMD_CTRL_CALLF: { // CALLF %s %d // call external function
// const char *call_func_name = ip_read_string(&s.ip);
// size_t args_count = ip_read_int(&s.ip); // args count
case CMD_CTRL_BUILTIN: { // BUILTIN %d %d // call builtin
size_t builtin_id = ip_read_int(&s.ip);
size_t args_count = ip_read_int(&s.ip); // args count
// if (run_stdlib_func(call_func_name, args_count)) {
// // case of stdlib function
// break;
// }
printf("builtin id: %zu\n", builtin_id);
// #ifndef WITH_CHECK
if (builtin_id >= BUILTIN_NONE) {
s_failure(&s, "invalid builtin");
}
// #endif
// if (strcmp(call_func_name, ".array") == 0) {
// call_Barray(args_count, &s.ip, buffer);
// break;
// }
// struct ModSearchResult func = mod_search_pub_symbol(call_func_name);
// if (func.mod_file == NULL) {
// failure("RUNTIME ERROR: external function <%s> with <%zu> args not found\n", call_func_name, args_count);
// }
// call_happened = true;
// s.is_closure_call = false;
// s.call_ip = s.ip;
// s.call_module_id = s.current_module_id;
// s.current_module_id = func.mod_id;
// s.bf = func.mod_file;
// if (func.symbol_offset >= s.bf->code_size) {
// s_failure(&s, "jump out of file");
// }
// s.ip = s.bf->code_ptr + func.symbol_offset;
// break;
// }
if (builtin_id == BUILTIN_Barray) {
call_Barray(args_count, &s.ip, buffer);
} else {
run_stdlib_func(builtin_id, args_count);
}
printf("builtin end\n");
fflush(stdout);
break;
}
default:
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
s_failure(&s, "interpreter: ctrl, invalid opcode"); // %d-%d\n", h, l);
}
break;
@ -650,7 +604,6 @@ void run_mod(uint mod_id, int argc, char **argv) {
if (!call_happened) {
s.is_closure_call = false;
s.call_ip = NULL;
s.call_module_id = 0;
}
if (s.fp == NULL) {

View file

@ -1,4 +1,6 @@
#include <iostream>
extern "C" {
#include "interpreter.h"
#include "module_manager.h"
#include "runtime_externs.h"
#include "stack.h"
@ -9,6 +11,7 @@ extern "C" {
#include "parser.hpp"
#include <filesystem>
#include <map>
#include <optional>
#include <string>
#include <unordered_map>
@ -42,146 +45,336 @@ void call_anyarg_func(void (*f)(), size_t n) {
}
}
struct ModSymbolPos {
uint32_t mod_id;
size_t offset;
// ---
struct Offsets {
size_t strings;
size_t globals;
size_t code;
size_t publics_num;
};
struct Module {
std::string name;
Bytefile *bf;
};
void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
// TODO: globals offsets
struct ModuleManager {
std::unordered_map<std::string, uint32_t> loaded_modules;
std::unordered_map<std::string, ModSymbolPos> public_symbols_mods;
std::vector<Module> modules;
std::vector<std::filesystem::path> search_paths;
};
char *ip = bytefile->code_ptr;
while (ip - bytefile->code_ptr < bytefile->code_size) {
char *instr_ip = ip;
const auto [cmd, l] = parse_command(&ip, bytefile);
static ModuleManager manager;
uint32_t mod_add_impl(Bytefile *bf, bool do_verification,
std::optional<const char *> name = std::nullopt) {
#ifdef DEBUG_VERSION
std::cerr << "- add module (impl) '" << std::string{name ? *name : ""}
<< "'\n";
#endif
uint32_t id = manager.modules.size();
manager.modules.push_back({.name = name ? *name : "", .bf = bf});
for (size_t i = 0; i < bf->public_symbols_number; ++i) {
const char *public_name = get_public_name_safe(bf, i);
#ifdef DEBUG_VERSION
std::cerr << "- load public " << public_name << "\n";
#endif
size_t public_offset = get_public_offset_safe(bf, i);
if (strcmp(public_name, "main") == 0) {
bf->main_offset = public_offset;
} else if (!manager.public_symbols_mods
.insert(
{public_name, {.mod_id = id, .offset = public_offset}})
.second) {
failure("public symbol '%s' loaded more then once\n",
get_public_name_safe(bf, i));
char *read_ip = instr_ip + 1;
char *write_ip = instr_ip + 1;
switch (cmd) {
case Cmd::STRING:
ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.strings);
break;
case Cmd::JMP:
case Cmd::CJMPnz:
case Cmd::CJMPz:
case Cmd::CALL:
ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.code);
break;
case Cmd::CLOSURE: {
ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.code);
size_t args_count = ip_read_int_unsafe(&read_ip);
for (size_t i = 0; i < args_count; ++i) {
uint8_t arg_type = ip_read_byte_unsafe(&read_ip);
if (to_var_category(arg_type) == VAR_GLOBAL) {
write_ip = read_ip;
ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.globals);
}
}
break;
}
case Cmd::LD:
case Cmd::ST:
case Cmd::STA:
if (to_var_category(l) == VAR_GLOBAL) {
ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.globals);
}
break;
default:
break;
}
}
if (name) {
manager.loaded_modules.insert({*name, id});
}
if (do_verification) {
analyze(id);
}
return id;
}
uint32_t path_mod_load(const char *name, std::filesystem::path &&path,
bool do_verification) {
void subst_in_code(Bytefile *bytefile,
const std::unordered_map<std::string, size_t> &publics) {
for (size_t i = 0; i < bytefile->substs_area_size; ++i) {
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
failure("substitution %zu offset is out of area\n", i);
}
uint32_t offset = *(uint32_t *)(bytefile->substs_ptr + i);
i += sizeof(uint32_t);
const char *name = bytefile->substs_ptr + i;
i += strlen(name);
#ifdef DEBUG_VERSION
std::cerr << "- module path load '" << name << "'\n";
printf("subst: offset %u, name %s\n", offset, name);
#endif
Bytefile *module = read_file(path.c_str());
return mod_add_impl(module, do_verification, name);
if (i > bytefile->substs_area_size) {
failure("substitution %zu name is out of area\n", i);
}
BUILTIN builtin = id_by_builtin(name);
// NOTE: address is first argument of the call
if (builtin != BUILTIN_NONE) {
uint8_t cmd = ((CMD_CTRL << 4) | CMD_CTRL_BUILTIN);
#ifdef DEBUG_VERSION
printf("set builtin %i, offset %i, cmd %u = (%u << 4) | %u, h = %u, l = "
"%u\n",
builtin, offset, cmd, CMD_CTRL, CMD_CTRL_BUILTIN,
(cmd & 0xF0) >> 4, cmd & 0x0F);
#endif
*(uint8_t *)(bytefile->code_ptr + offset - 1) =
cmd; // set BUILTIN command
*(uint32_t *)(bytefile->code_ptr + offset) = builtin;
continue;
}
const auto it = publics.find(name);
if (it == publics.end()) {
failure("public name for substitution is not found: <%s>\n", name);
}
*(uint32_t *)(bytefile->code_ptr + offset) = it->second;
// TODO: check: +4 to match ?
}
}
Offsets calc_merge_sizes(const std::vector<Bytefile *> &bytefiles) {
Offsets sizes{.strings = 0, .globals = 0, .code = 0, .publics_num = 0};
for (size_t i = 0; i < bytefiles.size(); ++i) {
sizes.strings += bytefiles[i]->stringtab_size;
sizes.globals += bytefiles[i]->global_area_size;
sizes.code += bytefiles[i]->code_size;
sizes.publics_num += bytefiles[i]->public_symbols_number;
}
return sizes;
}
struct MergeResult {
Bytefile *bf;
std::vector<size_t> main_offsets;
};
MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
Offsets sizes = calc_merge_sizes(bytefiles);
size_t public_symbols_size = calc_publics_size(sizes.publics_num);
Bytefile *result =
(Bytefile *)malloc(sizeof(Bytefile) + sizes.strings + sizes.code +
public_symbols_size); // globals are on the stack
// collect publics
// TODO: add publics + updat name offsets too ?())
std::unordered_map<std::string, size_t> publics;
std::vector<size_t> main_offsets;
{
size_t code_offset = 0;
for (size_t i = 0; i < bytefiles.size(); ++i) {
#ifdef DEBUG_VERSION
printf("bytefile <%zu>\n", i);
#endif
for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) {
#ifdef DEBUG_VERSION
printf("symbol <%zu>:<%zu>\n", i, j);
#endif
const char *name = get_public_name_unsafe(bytefiles[i], j);
size_t offset = get_public_offset_unsafe(bytefiles[i], j) + code_offset;
#ifdef DEBUG_VERSION
printf("symbol %s : %zu (code offset %zu)\n", name, offset,
code_offset);
#endif
if (strcmp(name, "main") == 0) {
main_offsets.push_back(offset);
} else if (!publics.insert({name, offset}).second) {
failure("public name found more then once: %s", name);
}
}
code_offset += bytefiles[i]->code_size;
}
}
// init result
result->code_size = sizes.code;
result->stringtab_size = sizes.strings;
result->global_area_size = sizes.globals;
result->substs_area_size = 0;
result->imports_number = 0;
result->public_symbols_number = sizes.publics_num;
result->main_offset = 0; // TODO: save al main offsets in some way (?)
result->public_ptr = (int *)result->buffer;
result->string_ptr = (char *)result->public_ptr + public_symbols_size;
result->code_ptr = result->string_ptr + result->stringtab_size;
result->imports_ptr = NULL;
result->global_ptr = NULL;
result->substs_ptr = NULL;
// update & merge code segments
Offsets offsets{.strings = 0, .globals = 0, .code = 0, .publics_num = 0};
// REMOVE printf("merge bytefiles\n");
for (size_t i = 0; i < bytefiles.size(); ++i) {
// REMOVE printf("rewrite offsets %zu\n", i);
rewrite_code_with_offsets(bytefiles[i], offsets);
// REMOVE printf("subst in code %zu\n", i);
subst_in_code(bytefiles[i], publics);
size_t publics_offset = calc_publics_size(offsets.publics_num);
// copy data to merged file
memcpy(result->string_ptr + offsets.strings, bytefiles[i]->string_ptr,
bytefiles[i]->stringtab_size);
memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr,
bytefiles[i]->code_size);
memcpy((char *)result->public_ptr + publics_offset,
(char *)bytefiles[i]->public_ptr,
calc_publics_size(
bytefiles[i]->public_symbols_number)); // TODO: recalc publics:
// offsets, strings
// update offsets
offsets.strings += bytefiles[i]->stringtab_size;
offsets.globals += bytefiles[i]->global_area_size;
offsets.code += bytefiles[i]->code_size;
offsets.publics_num += bytefiles[i]->public_symbols_number;
free(bytefiles[i]);
}
#ifdef DEBUG_VERSION
std::cout << "- merged file:\n";
print_file(*result, std::cout);
#endif
return {result, main_offsets};
}
// ---
Bytefile *path_mod_load(const char *name, std::filesystem::path &&path) {
#ifdef DEBUG_VERSION
std::cout << "- module path load '" << name << "'\n";
#endif
return read_file(path.c_str());
}
static std::vector<std::filesystem::path> search_paths;
extern "C" {
void mod_cleanup() {
for (auto &mod : manager.modules) {
free(mod.bf);
}
}
void mod_add_search_path(const char *path) { search_paths.emplace_back(path); }
void mod_add_search_path(const char *path) {
manager.search_paths.emplace_back(path);
}
const char *mod_get_name(uint32_t id) {
if (id > manager.modules.size()) {
failure("module id is out of range\n");
}
return manager.modules[id].name.c_str();
}
Bytefile *mod_get(uint32_t id) {
if (id > manager.modules.size()) {
failure("module id is out of range\n");
}
return manager.modules[id].bf;
}
int32_t find_mod_loaded(const char *name) {
auto it = manager.loaded_modules.find(name);
// module already loaded
if (it != manager.loaded_modules.end()) {
return it->second;
}
return -1;
}
int32_t mod_load(const char *name, bool do_verification) {
Bytefile *mod_load(const char *name) {
std::string full_name = std::string{name} + ".bc";
auto it = manager.loaded_modules.find(name);
// module already loaded
if (it != manager.loaded_modules.end()) {
return it->second;
}
if (std::filesystem::exists(full_name)) {
return path_mod_load(name, full_name, do_verification);
return path_mod_load(name, full_name);
}
for (const auto &dir_path : manager.search_paths) {
for (const auto &dir_path : search_paths) {
auto path = dir_path / full_name;
if (std::filesystem::exists(path)) {
return path_mod_load(name, std::move(path), do_verification);
return path_mod_load(name, std::move(path));
}
}
return -1;
return NULL;
}
uint32_t mod_add(Bytefile *module, bool do_verification) {
} // extern "C"
// uint32_t mod_add(Bytefile *module, bool do_verification) {
// #ifdef DEBUG_VERSION
// std::cout << "- add module, no name\n";
// #endif
// return mod_add_impl(module, do_verification);
// }
// ModSearchResult mod_search_pub_symbol(const char *name) {
// auto it = manager.public_symbols_mods.find(name);
// if (it == manager.public_symbols_mods.end()) {
// return {.symbol_offset = 0, .mod_id = 0, .mod_file = NULL};
// }
// return {
// .symbol_offset = it->second.offset,
// .mod_id = it->second.mod_id,
// .mod_file = mod_get(it->second.mod_id),
// };
// }
void mod_load_rec(Bytefile *mod,
std::unordered_map<std::string, Bytefile *> &loaded,
std::vector<Bytefile *> &loaded_ord) {
#ifdef DEBUG_VERSION
std::cerr << "- add module, no name\n";
printf("- run mod rec, %i imports\n", mod->imports_number);
#endif
return mod_add_impl(module, do_verification);
for (size_t i = 0; i < mod->imports_number; ++i) {
const char *import_str = get_import_safe(mod, i);
if (loaded.count(import_str) == 0 &&
strcmp(import_str, "Std") != 0) { // not loaded
#ifdef DEBUG_VERSION
printf("- mod load <%s>\n", import_str);
#endif
Bytefile *import_mod = mod_load(import_str); // TODO
if (import_mod == NULL) {
failure("module <%s> not found\n", import_str);
}
loaded.insert({import_str, import_mod});
mod_load_rec(import_mod, loaded, loaded_ord);
// loaded_ord.push_back(import_mod);
}
}
loaded_ord.push_back(mod);
}
ModSearchResult mod_search_pub_symbol(const char *name) {
auto it = manager.public_symbols_mods.find(name);
if (it == manager.public_symbols_mods.end()) {
return {.symbol_offset = 0, .mod_id = 0, .mod_file = NULL};
MergeResult load_with_imports(Bytefile *root, bool do_verification) {
std::unordered_map<std::string, Bytefile *> loaded;
std::vector<Bytefile *> loaded_ord;
mod_load_rec(root, loaded, loaded_ord);
MergeResult result = merge_files(std::move(loaded_ord));
if (do_verification) {
// #ifdef DEBUG_VERSION
printf("main offsets count: %zu\n", result.main_offsets.size());
// #endif
analyze(result.bf /*, std::move(result.main_offsets)*/);
}
return result;
}
extern "C" {
Bytefile *run_with_imports(Bytefile *root, int argc, char **argv,
bool do_verification) {
MergeResult result = load_with_imports(root, do_verification);
Bytefile *bf = result.bf;
bf->main_offset = 0;
prepare_state(bf, &s); // NOTE: for push_globals
push_globals(&s);
for (size_t i = 0; i < result.main_offsets.size(); ++i) {
bf->main_offset = result.main_offsets[i];
set_argc_argv(argc, argv); // args for module main
run_main(bf, argc, argv);
}
return {
.symbol_offset = it->second.offset,
.mod_id = it->second.mod_id,
.mod_file = mod_get(it->second.mod_id),
};
cleanup_state(&s);
return bf;
}
} // extern "C"
struct StdFunc {
void (*ptr)();
@ -189,75 +382,128 @@ struct StdFunc {
bool is_args = false; // one var for all args
bool is_vararg = false;
};
bool run_stdlib_func(const char *name, size_t args_count) {
static const std::unordered_map<std::string, StdFunc> std_func = {
{"Luppercase", {.ptr = (void (*)()) & Luppercase, .args_count = 1}},
{"Llowercase", {.ptr = (void (*)()) & Llowercase, .args_count = 1}},
{"Lassert",
BUILTIN id_by_builtin(const char *name) {
static const std::unordered_map<std::string, BUILTIN> std_func = {
{"Luppercase", BUILTIN_Luppercase},
{"Llowercase", BUILTIN_Llowercase},
{"Lassert", BUILTIN_Lassert},
{"Lstring", BUILTIN_Lstring},
{"Llength", BUILTIN_Llength},
{"LstringInt", BUILTIN_LstringInt},
{"Lread", BUILTIN_Lread},
{"Lwrite", BUILTIN_Lwrite},
{"LmakeArray", BUILTIN_LmakeArray},
{"LmakeString", BUILTIN_LmakeString},
{"Lstringcat", BUILTIN_Lstringcat},
{"LmatchSubString", BUILTIN_LmatchSubString},
{"Lsprintf", BUILTIN_Lsprintf},
{"Lsubstring", BUILTIN_Lsubstring},
{"Li__Infix_4343", BUILTIN_Li__Infix_4343}, // ++
{"Lclone", BUILTIN_Lclone},
{"Lhash", BUILTIN_Lhash},
{"LtagHash", BUILTIN_LtagHash},
{"Lcompare", BUILTIN_Lcompare},
{"LflatCompare", BUILTIN_LflatCompare},
{"Lfst", BUILTIN_Lfst},
{"Lsnd", BUILTIN_Lsnd},
{"Lhd", BUILTIN_Lhd},
{"Ltl", BUILTIN_Ltl},
{"LreadLine", BUILTIN_LreadLine},
{"Lprintf", BUILTIN_Lprintf},
{"Lfopen", BUILTIN_Lfopen},
{"Lfclose", BUILTIN_Lfclose},
{"Lfread", BUILTIN_Lfread},
{"Lfwrite", BUILTIN_Lfwrite},
{"Lfexists", BUILTIN_Lfexists},
{"Lfprintf", BUILTIN_Lfprintf},
{"Lregexp", BUILTIN_Lregexp},
{"LregexpMatch", BUILTIN_LregexpMatch},
{"Lfailure", BUILTIN_Lfailure},
{"Lsystem", BUILTIN_Lsystem},
{"LgetEnv", BUILTIN_LgetEnv},
{"Lrandom", BUILTIN_Lrandom},
{"Ltime", BUILTIN_Ltime},
{".array", BUILTIN_Barray},
};
auto const it = std_func.find(name);
return it == std_func.end() ? BUILTIN_NONE : it->second;
}
void run_stdlib_func(BUILTIN id, size_t args_count) {
static const std::map<BUILTIN, StdFunc> std_func = {
{BUILTIN_Luppercase, {.ptr = (void (*)()) & Luppercase, .args_count = 1}},
{BUILTIN_Llowercase, {.ptr = (void (*)()) & Llowercase, .args_count = 1}},
{BUILTIN_Lassert,
{.ptr = (void (*)()) & Lassert, .args_count = 2, .is_vararg = true}},
{"Lstring",
{BUILTIN_Lstring,
{.ptr = (void (*)()) & Lstring, .args_count = 1, .is_args = true}},
{"Llength", {.ptr = (void (*)()) & Llength, .args_count = 1}},
{"LstringInt", {.ptr = (void (*)()) & LstringInt, .args_count = 1}},
{"Lread", {.ptr = (void (*)()) & Lread, .args_count = 0}},
{"Lwrite", {.ptr = (void (*)()) & Lwrite, .args_count = 1}},
{"LmakeArray", {.ptr = (void (*)()) & LmakeArray, .args_count = 1}},
{"LmakeString", {.ptr = (void (*)()) & LmakeString, .args_count = 1}},
{"Lstringcat",
{BUILTIN_Llength, {.ptr = (void (*)()) & Llength, .args_count = 1}},
{BUILTIN_LstringInt, {.ptr = (void (*)()) & LstringInt, .args_count = 1}},
{BUILTIN_Lread, {.ptr = (void (*)()) & Lread, .args_count = 0}},
{BUILTIN_Lwrite, {.ptr = (void (*)()) & Lwrite, .args_count = 1}},
{BUILTIN_LmakeArray, {.ptr = (void (*)()) & LmakeArray, .args_count = 1}},
{BUILTIN_LmakeString,
{.ptr = (void (*)()) & LmakeString, .args_count = 1}},
{BUILTIN_Lstringcat,
{.ptr = (void (*)()) & Lstringcat, .args_count = 1, .is_args = true}},
{"LmatchSubString",
{BUILTIN_LmatchSubString,
{.ptr = (void (*)()) & LmatchSubString, .args_count = 3}},
{"Lsprintf",
{BUILTIN_Lsprintf,
{.ptr = (void (*)()) & Lsprintf, .args_count = 1, .is_vararg = true}},
{"Lsubstring",
{BUILTIN_Lsubstring,
{.ptr = (void (*)()) & Lsubstring, .args_count = 3, .is_args = true}},
{"Li__Infix_4343",
{BUILTIN_Li__Infix_4343,
{.ptr = (void (*)()) & Li__Infix_4343,
.args_count = 2,
.is_args = true}}, // ++
{"Lclone",
{BUILTIN_Lclone,
{.ptr = (void (*)()) & Lclone, .args_count = 1, .is_args = true}},
{"Lhash", {.ptr = (void (*)()) & Lhash, .args_count = 1}},
{"LtagHash", {.ptr = (void (*)()) & LtagHash, .args_count = 1}},
{"Lcompare", {.ptr = (void (*)()) & Lcompare, .args_count = 2}},
{"LflatCompare", {.ptr = (void (*)()) & LflatCompare, .args_count = 2}},
{"Lfst", {.ptr = (void (*)()) & Lfst, .args_count = 1}},
{"Lsnd", {.ptr = (void (*)()) & Lsnd, .args_count = 1}},
{"Lhd", {.ptr = (void (*)()) & Lhd, .args_count = 1}},
{"Ltl", {.ptr = (void (*)()) & Ltl, .args_count = 1}},
{"LreadLine", {.ptr = (void (*)()) & LreadLine, .args_count = 0}},
{"Lprintf",
{BUILTIN_Lhash, {.ptr = (void (*)()) & Lhash, .args_count = 1}},
{BUILTIN_LtagHash, {.ptr = (void (*)()) & LtagHash, .args_count = 1}},
{BUILTIN_Lcompare, {.ptr = (void (*)()) & Lcompare, .args_count = 2}},
{BUILTIN_LflatCompare,
{.ptr = (void (*)()) & LflatCompare, .args_count = 2}},
{BUILTIN_Lfst, {.ptr = (void (*)()) & Lfst, .args_count = 1}},
{BUILTIN_Lsnd, {.ptr = (void (*)()) & Lsnd, .args_count = 1}},
{BUILTIN_Lhd, {.ptr = (void (*)()) & Lhd, .args_count = 1}},
{BUILTIN_Ltl, {.ptr = (void (*)()) & Ltl, .args_count = 1}},
{BUILTIN_LreadLine, {.ptr = (void (*)()) & LreadLine, .args_count = 0}},
{BUILTIN_Lprintf,
{.ptr = (void (*)()) & Lprintf, .args_count = 1, .is_vararg = true}},
{"Lfopen", {.ptr = (void (*)()) & Lfopen, .args_count = 2}},
{"Lfclose", {.ptr = (void (*)()) & Lfclose, .args_count = 1}},
{"Lfread", {.ptr = (void (*)()) & Lfread, .args_count = 1}},
{"Lfwrite", {.ptr = (void (*)()) & Lfwrite, .args_count = 2}},
{"Lfexists", {.ptr = (void (*)()) & Lfexists, .args_count = 1}},
{"Lfprintf",
{BUILTIN_Lfopen, {.ptr = (void (*)()) & Lfopen, .args_count = 2}},
{BUILTIN_Lfclose, {.ptr = (void (*)()) & Lfclose, .args_count = 1}},
{BUILTIN_Lfread, {.ptr = (void (*)()) & Lfread, .args_count = 1}},
{BUILTIN_Lfwrite, {.ptr = (void (*)()) & Lfwrite, .args_count = 2}},
{BUILTIN_Lfexists, {.ptr = (void (*)()) & Lfexists, .args_count = 1}},
{BUILTIN_Lfprintf,
{.ptr = (void (*)()) & Lfprintf, .args_count = 2, .is_vararg = true}},
{"Lregexp", {.ptr = (void (*)()) & Lregexp, .args_count = 1}},
{"LregexpMatch", {.ptr = (void (*)()) & LregexpMatch, .args_count = 3}},
{"Lfailure",
{BUILTIN_Lregexp, {.ptr = (void (*)()) & Lregexp, .args_count = 1}},
{BUILTIN_LregexpMatch,
{.ptr = (void (*)()) & LregexpMatch, .args_count = 3}},
{BUILTIN_Lfailure,
{.ptr = (void (*)()) & Lfailure, .args_count = 1, .is_vararg = true}},
{"Lsystem", {.ptr = (void (*)()) & Lsystem, .args_count = 1}},
{"LgetEnv", {.ptr = (void (*)()) & LgetEnv, .args_count = 1}},
{"Lrandom", {.ptr = (void (*)()) & Lrandom, .args_count = 1}},
{"Ltime", {.ptr = (void (*)()) & Ltime, .args_count = 0}},
{BUILTIN_Lsystem, {.ptr = (void (*)()) & Lsystem, .args_count = 1}},
{BUILTIN_LgetEnv, {.ptr = (void (*)()) & LgetEnv, .args_count = 1}},
{BUILTIN_Lrandom, {.ptr = (void (*)()) & Lrandom, .args_count = 1}},
{BUILTIN_Ltime, {.ptr = (void (*)()) & Ltime, .args_count = 0}},
};
// some functions do use on args pointer
const auto it = std_func.find(name);
const auto it = std_func.find(id);
if (it == std_func.end()) {
return false;
failure("RUNTIME ERROR: stdlib function <%u> not found\n", id);
}
// TODO: move to bytecode verifier
if ((!it->second.is_vararg && it->second.args_count != args_count) ||
it->second.args_count > args_count) {
failure("RUNTIME ERROR: stdlib function <%s> argument count <%zu> is not "
failure("RUNTIME ERROR: stdlib function <%u> argument count <%zu> is not "
"expected (expected is <%s%zu>)\n",
name, it->second.args_count, it->second.is_vararg ? ">=" : "=",
id, it->second.args_count, it->second.is_vararg ? ">=" : "=",
args_count);
}
@ -268,7 +514,4 @@ bool run_stdlib_func(const char *name, size_t args_count) {
} else {
call_anyarg_func<20>(it->second.ptr, args_count);
}
return true;
}
} // extern "C"

View file

@ -72,6 +72,8 @@ Bytefile *read_file(const char *fname) {
char *file_begin = (char *)file + additional_size;
char *file_end = file_begin + size;
size_t buffer_size = size + additional_size - sizeof(Bytefile);
if (file == 0) {
failure("unable to allocate memory to store file data\n");
}
@ -85,7 +87,7 @@ Bytefile *read_file(const char *fname) {
fclose(f);
size_t imports_size = file->imports_number * sizeof(int);
size_t public_symbols_size = file->public_symbols_number * 2 * sizeof(int);
size_t public_symbols_size = calc_publics_size(file->public_symbols_number);
size_t strings_buffer_offset = public_symbols_size + imports_size;
if (file->buffer + strings_buffer_offset >= file_end) {
@ -98,7 +100,7 @@ Bytefile *read_file(const char *fname) {
size_t substs_buffer_offset = strings_buffer_offset + file->stringtab_size;
file->substs_ptr = file->buffer + substs_buffer_offset;
if ((char *)file->substs_ptr + file->substs_area_size > file_end) {
if (file->substs_ptr + file->substs_area_size > file_end) {
failure("substitutions table is out of the file size\n");
}
@ -106,7 +108,9 @@ Bytefile *read_file(const char *fname) {
// file->string_ptr[file->stringtab_size - 1] != 0) {
// failure("strings table is not zero-ended\n");
// }
file->code_size = size - substs_buffer_offset - file->substs_area_size;
file->code_ptr = file->substs_ptr + file->substs_area_size;
// file->code_size = size - substs_buffer_offset - file->substs_area_size;
file->code_size = buffer_size - (file->code_ptr - file->buffer);
if (file->code_size < 0 || public_symbols_size < 0 ||
file->stringtab_size < 0) {
@ -116,144 +120,11 @@ Bytefile *read_file(const char *fname) {
file->imports_ptr = (int *)file->buffer;
file->public_ptr = (int *)(file->buffer + imports_size);
file->global_ptr = NULL; // is allocated on module run on stack
file->code_ptr = file->string_ptr + file->stringtab_size;
// file->global_ptr = (int*) calloc (file->global_area_size, sizeof (int));
return file;
}
struct Offsets {
size_t strings;
size_t globals;
size_t code;
};
void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
char *ip = bytefile->code_ptr;
while (ip - bytefile->code_ptr < bytefile->code_size) {
const auto [cmd, l] = parse_command(&ip, bytefile);
char *cmd_ip = ip;
switch (cmd) {
case Cmd::STRING:
ip_write_int_unsafe(cmd_ip, ip_read_int_unsafe(&ip) +
offsets.strings); // TODO: check
break;
case Cmd::JMP:
case Cmd::CJMPnz:
case Cmd::CJMPz:
case Cmd::CLOSURE:
case Cmd::CALL:
ip_write_int_unsafe(cmd_ip, ip_read_int_unsafe(&ip) +
offsets.code); // TODO: check
break;
default:
break;
}
}
}
void subst_in_code(Bytefile *bytefile,
const std::unordered_map<std::string, size_t> &publics) {
for (size_t i = 0; i < bytefile->substs_area_size;) {
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
failure("substitution %zu offset is out of area", i);
}
uint32_t offset = *(uint32_t *)(bytefile->substs_ptr + i);
i += sizeof(uint32_t);
const char *name = bytefile->substs_ptr + i;
i += strlen(name);
if (i > bytefile->substs_area_size) {
failure("substitution %zu name is out of area", i);
}
const auto it = publics.find(name);
if (it == publics.end()) {
failure("public name for substitution is not found: %s", name);
}
*(uint32_t *)(bytefile->code_ptr + offset) = it->second;
// TODO: check: +4 to match ?
}
}
Offsets calc_merge_sizes(const std::vector<Bytefile *> &bytefiles) {
Offsets sizes{.strings = 0, .globals = 0, .code = 0};
for (size_t i = 0; i < bytefiles.size(); ++i) {
sizes.strings += bytefiles[i]->stringtab_size;
sizes.strings += bytefiles[i]->global_area_size;
sizes.strings += bytefiles[i]->code_size;
}
return sizes;
}
Bytefile *merge_files(std::vector<Bytefile *> &&bytefiles) {
Offsets sizes = calc_merge_sizes(bytefiles);
Bytefile *result = (Bytefile *)malloc(sizeof(Bytefile) + sizes.strings +
sizes.code); // globals - on stack
// collect publics
std::unordered_map<std::string, size_t> publics;
std::vector<size_t> main_offsets;
{
size_t code_offset = 0;
for (size_t i = 0; i < bytefiles.size(); ++i) {
for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) {
const char *name = get_public_name_unsafe(bytefiles[i], j);
size_t offset =
get_public_name_offset_unsafe(bytefiles[i], j) + code_offset;
if (strcmp(name, "main") == 0) {
main_offsets.push_back(offset);
} else if (!publics.insert({name, offset}).second) {
failure("public name found more then once: %s", name);
}
}
code_offset += bytefiles[i]->code_size;
}
}
// init result
result->code_size = sizes.code;
result->stringtab_size = sizes.strings;
result->global_area_size = sizes.globals;
result->substs_area_size = 0;
result->imports_number = 0;
result->public_symbols_number = 0;
result->main_offset = 0; // TODO: save al main offsets in some way (?)
result->string_ptr = result->buffer;
result->imports_ptr = NULL;
result->public_ptr = NULL;
result->code_ptr = result->string_ptr + result->stringtab_size;
result->global_ptr = NULL;
result->substs_ptr = NULL;
// update & merge code segments
Offsets offsets{.strings = 0, .globals = 0, .code = 0};
for (size_t i = 0; i < bytefiles.size(); ++i) {
rewrite_code_with_offsets(bytefiles[i], offsets);
subst_in_code(bytefiles[i], publics);
// copy data to merged file
memcpy(result->string_ptr + offsets.strings, bytefiles[i]->string_ptr,
bytefiles[i]->stringtab_size);
memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr,
bytefiles[i]->code_size);
// update offsets
offsets.strings += bytefiles[i]->stringtab_size;
offsets.globals += bytefiles[i]->global_area_size;
offsets.code += bytefiles[i]->code_size;
free(bytefiles[i]);
}
return result;
}
const char *command_name(Cmd cmd, int8_t l) {
static const char *const ops[] = {
#define OP_TO_STR(id, op) "BINOP:" #op,
@ -349,6 +220,8 @@ const char *command_name(Cmd cmd, int8_t l) {
return "CALLC";
case Cmd::CALL:
return "CALL";
case Cmd::BUILTIN:
return "BUILTIN";
case Cmd::TAG:
return "TAG";
case Cmd::ARRAY:
@ -379,7 +252,7 @@ const char *command_name(Cmd cmd, int8_t l) {
return "_UNDEF_";
}
exit(1);
failure("command_name: unexpected command %u", static_cast<uint>(cmd));
}
// } // extern "C"
@ -569,14 +442,14 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
break;
default:
failure("invalid opcode");
failure("parser: basic, invalid opcode\n");
}
break;
case CMD_LD: // LD %d
cmd = Cmd::LD;
if (l > sizeof(ldts) / sizeof(char *)) {
failure("wrong ld argument type");
failure("wrong ld argument type\n");
}
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
out);
@ -584,7 +457,7 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
case CMD_LDA: // LDA %d
cmd = Cmd::LDA;
if (l > sizeof(ldts) / sizeof(char *)) {
failure("wrong lda argument type");
failure("wrong lda argument type\n");
}
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
out);
@ -592,7 +465,7 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
case CMD_ST: // ST %d
cmd = Cmd::ST;
if (l > sizeof(ldts) / sizeof(char *)) {
failure("wrong st argument type");
failure("wrong st argument type\n");
}
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
out);
@ -633,7 +506,7 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
for (size_t i = 0; i < args_count; i++) {
uint8_t arg_type = ip_read_byte_safe(ip, &bf);
if (arg_type > sizeof(ldts) / sizeof(char *)) {
failure("wrong closure argument type");
failure("wrong closure argument type\n");
}
print_space<use_out>(out);
print_val<use_out>(out, ldts[arg_type]);
@ -677,22 +550,21 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
out);
break;
// NOTE: is replaced
// case CMD_CTRL_CALLF: // CALLF %s %d
// cmd = Cmd::CALLF;
// read_print_cmd_seq_opt<do_read_args, use_out, ArgT::STR, ArgT::INT>(
// cmd, l, ip, bf, out);
// break;
case CMD_CTRL_BUILTIN: // BUILTIN %d %d // call builtin
cmd = Cmd::BUILTIN;
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT, ArgT::INT>(
cmd, l, ip, bf, out);
break;
default:
failure("invalid opcode");
failure("parser: ctrl, invalid opcode\n");
}
break;
case CMD_PATT: // PATT pats[l]
// {"=str", "#string", "#array", "#sexp", "#ref", "#val", "#fun"}
if (l >= sizeof(pats) / sizeof(char *)) {
failure("invalid opcode");
failure("parser: patt, invalid opcode\n");
}
cmd = Cmd::PATT;
read_print_cmd_seq_opt<do_read_args, use_out>(cmd, l, ip, bf, out);
@ -726,12 +598,12 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
// break;
// default:
// failure("invalid opcode");
// failure("parser: bultin, invalid opcode\n");
// }
// } break;
default:
failure("invalid opcode");
failure("parser: invalid opcode\n");
}
#ifdef DEBUG_VERSION
std::cout << command_name(cmd, l) << '\n';
@ -757,6 +629,7 @@ bool is_command_name(char *ip, const Bytefile *bf, Cmd cmd) {
}
void print_file_info(const Bytefile &bf, std::ostream &out) {
out << "Code size : " << bf.code_size << '\n';
out << "String table size : " << bf.stringtab_size << '\n';
out << "Global area size : " << bf.global_area_size << '\n';
out << "Substitutions area size : " << bf.substs_area_size << '\n';
@ -774,6 +647,16 @@ void print_file_info(const Bytefile &bf, std::ostream &out) {
<< get_public_offset_safe(&bf, i) << ": " << std::dec
<< get_public_name_safe(&bf, i) << '\n';
}
out << "Substs :\n";
for (size_t i = 0; i < bf.substs_area_size; i++) {
uint32_t offset = *(uint32_t *)(bf.substs_ptr + i);
i += sizeof(uint32_t);
const char *name = bf.substs_ptr + i;
out << " " << std::setfill('0') << std::setw(8) << std::hex << offset
<< ": " << std::dec << name << '\n';
i += strlen(name);
}
}
void print_file_code(const Bytefile &bf, std::ostream &out) {
@ -786,6 +669,7 @@ void print_file_code(const Bytefile &bf, std::ostream &out) {
out << std::endl;
if (cmd == Cmd::EXIT) {
std::cout << "> EXIT" << std::endl;
break;
}
}
@ -796,7 +680,7 @@ void print_file(const Bytefile &bf, std::ostream &out) {
out << "Code:\n";
print_file_code(bf, out);
out << "code end\n";
out << "Code end\n";
}
extern "C" {

View file

@ -19,8 +19,6 @@ void init_state(struct State* s, void** stack) {
s->bf = NULL;
s->current_line = 0;
s->is_closure_call = false;
s->current_module_id = 0;
s->call_module_id = 0;
s->ip = NULL; //s->bf->code_ptr;
s->instr_ip = NULL; //s->bf->code_ptr;
s->call_ip = NULL;
@ -38,16 +36,13 @@ void init_state(struct State* s, void** stack) {
#endif
}
void init_mod_state(uint mod_id, struct State* s) {
// init module data
s->bf = mod_get(mod_id);
s->current_module_id = mod_id;
void prepare_state(Bytefile* bf, struct State* s) {
// init data
s->bf = bf;
// clearup from previous executions
s->is_closure_call = false;
s->current_module_id = 0;
s->call_module_id = 0;
s->call_ip = NULL;
s->current_line = 0;
@ -62,7 +57,7 @@ void init_mod_state(uint mod_id, struct State* s) {
#endif
}
void init_mod_state_globals(struct State *s) {
void push_globals(struct State *s) {
s_pushn_nil(s->bf->global_area_size);
s->bf->global_ptr = (void*)__gc_stack_top;