stdlib tester, fixes, switch to builtins as pseudo functions (use call), remove negative closure offset possibility

This commit is contained in:
ProgramSnail 2025-03-30 09:34:50 +03:00
parent 905632aab6
commit 43088ec9f9
10 changed files with 321 additions and 131 deletions

1
byterun/.gitignore vendored
View file

@ -11,5 +11,6 @@ compile_commands.json
.cache/ .cache/
*.a *.a
*.o *.o
*.bc
test** test**

View file

@ -11,47 +11,47 @@ Bytefile *run_with_imports(Bytefile *root, int argc, char **argv,
// --- // ---
enum BUILTIN : uint { enum BUILTIN : uint {
BUILTIN_Luppercase, BUILTIN_Luppercase, // 0
BUILTIN_Llowercase, BUILTIN_Llowercase, // 1
BUILTIN_Lassert, BUILTIN_Lassert, // 2
BUILTIN_Lstring, BUILTIN_Lstring, // 3
BUILTIN_Llength, BUILTIN_Llength, // 4
BUILTIN_LstringInt, BUILTIN_LstringInt, // 5
BUILTIN_Lread, BUILTIN_Lread, // 6
BUILTIN_Lwrite, BUILTIN_Lwrite, // 7
BUILTIN_LmakeArray, BUILTIN_LmakeArray, // 8
BUILTIN_LmakeString, BUILTIN_LmakeString, // 9
BUILTIN_Lstringcat, BUILTIN_Lstringcat, // 10
BUILTIN_LmatchSubString, BUILTIN_LmatchSubString, // 11
BUILTIN_Lsprintf, BUILTIN_Lsprintf, // 12
BUILTIN_Lsubstring, BUILTIN_Lsubstring, // 13
BUILTIN_Li__Infix_4343, // ++ BUILTIN_Li__Infix_4343, // 14 // ++
BUILTIN_Lclone, BUILTIN_Lclone, // 15
BUILTIN_Lhash, BUILTIN_Lhash, // 16
BUILTIN_LtagHash, BUILTIN_LtagHash, // 17
BUILTIN_Lcompare, BUILTIN_Lcompare, // 18
BUILTIN_LflatCompare, BUILTIN_LflatCompare, // 19
BUILTIN_Lfst, BUILTIN_Lfst, // 20
BUILTIN_Lsnd, BUILTIN_Lsnd, // 21
BUILTIN_Lhd, BUILTIN_Lhd, // 22
BUILTIN_Ltl, BUILTIN_Ltl, // 23
BUILTIN_Lprintf, BUILTIN_Lprintf, // 24
BUILTIN_LreadLine, BUILTIN_LreadLine, // 25
BUILTIN_Lfopen, BUILTIN_Lfopen, // 26
BUILTIN_Lfclose, BUILTIN_Lfclose, // 27
BUILTIN_Lfread, BUILTIN_Lfread, // 28
BUILTIN_Lfwrite, BUILTIN_Lfwrite, // 29
BUILTIN_Lfexists, BUILTIN_Lfexists, // 30
BUILTIN_Lfprintf, BUILTIN_Lfprintf, // 31
BUILTIN_Lregexp, BUILTIN_Lregexp, // 32
BUILTIN_LregexpMatch, BUILTIN_LregexpMatch, // 33
BUILTIN_Lfailure, BUILTIN_Lfailure, // 34
BUILTIN_Lsystem, BUILTIN_Lsystem, // 35
BUILTIN_LgetEnv, BUILTIN_LgetEnv, // 36
BUILTIN_Lrandom, BUILTIN_Lrandom, // 37
BUILTIN_Ltime, BUILTIN_Ltime, // 38
BUILTIN_Barray, // can't be run with run_stdlib_func BUILTIN_Barray, // 39 // can't be run with run_stdlib_func
BUILTIN_NONE, BUILTIN_NONE, // 40
}; };
enum BUILTIN id_by_builtin(const char *name); enum BUILTIN id_by_builtin(const char *name);

View file

@ -125,6 +125,14 @@ static inline void s_popn(size_t n) {
// ------ complex operations ------ // ------ complex operations ------
static inline void s_swap_tops() {
// NOTE: can be optimized
void *x = s_pop();
void *y = s_pop();
s_push(x);
s_push(y);
}
// for some reason does not work in sexp constructor, probably connected with gc // for some reason does not work in sexp constructor, probably connected with gc
// behaviour // behaviour
static inline void s_put_nth(size_t n, void *val) { static inline void s_put_nth(size_t n, void *val) {

View file

@ -16,7 +16,7 @@
// CLOJURE_T = CLOSURE_TAG, // CLOJURE_T = CLOSURE_TAG,
// }; // };
#define STACK_SIZE 128 * 1024 #define STACK_SIZE 512 * 1024
static const size_t MAX_ARRAY_SIZE = 0x11111110; static const size_t MAX_ARRAY_SIZE = 0x11111110;

View file

@ -17,6 +17,7 @@ for test in ../regression/*.lama; do
echo $test_file echo $test_file
# cat $test_file.input | ./byterun.exe -p test*.bc > test.bc.code # cat $test_file.input | ./byterun.exe -p test*.bc > test.bc.code
# cat $test_file.input | ./byterun.exe -p test*.bc # cat $test_file.input | ./byterun.exe -p test*.bc
# cat $test_file.input | ./byterun.exe -vi test*.bc
cat $test_file.input | ./byterun.exe -vi test*.bc > test.log cat $test_file.input | ./byterun.exe -vi test*.bc > test.log
sed '1d;s/^..//' $test_file.t > test_orig.log sed '1d;s/^..//' $test_file.t > test_orig.log
diff test.log test_orig.log diff test.log test_orig.log

View file

@ -13,9 +13,11 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
static constexpr const int NOT_VISITED = -1; static constexpr const int NOT_VISITED = -1;
std::vector<int> visited(bf->code_size, NOT_VISITED); // store stack depth std::vector<int> visited(bf->code_size, NOT_VISITED); // store stack depth
std::vector<size_t> to_visit_func = std::move(add_publics); std::vector<size_t> to_visit_func;
std::vector<size_t> to_visit_jmp; std::vector<size_t> to_visit_jmp;
uint16_t mock_builtin_begin_counter = 0;
int current_stack_depth = 0; int current_stack_depth = 0;
const uint globals_count = bf->global_area_size; const uint globals_count = bf->global_area_size;
uint current_locals_count = 0; uint current_locals_count = 0;
@ -84,6 +86,10 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
} }
}; };
for (const auto &add_public : add_publics) {
func_to_visit_push(add_public);
}
// add publics // add publics
to_visit_func.reserve(bf->public_symbols_number + to_visit_func.size()); to_visit_func.reserve(bf->public_symbols_number + to_visit_func.size());
for (size_t i = 0; i < bf->public_symbols_number; ++i) { for (size_t i = 0; i < bf->public_symbols_number; ++i) {
@ -136,8 +142,9 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
#endif #endif
if (current_begin_counter == nullptr && cmd != Cmd::BEGIN && if (current_begin_counter == nullptr && cmd != Cmd::BEGIN &&
cmd != Cmd::CBEGIN) { cmd != Cmd::CBEGIN && cmd != Cmd::BUILTIN) {
ip_failure(saved_current_ip, bf, "function does not start with begin"); ip_failure(saved_current_ip, bf,
"function does not start with begin and is not builtin");
} }
if (visited[current_ip - bf->code_ptr] == NOT_VISITED) { if (visited[current_ip - bf->code_ptr] == NOT_VISITED) {
@ -297,12 +304,19 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
ip_failure(saved_current_ip, bf, "jump/call out of file"); ip_failure(saved_current_ip, bf, "jump/call out of file");
} }
if (!is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) { if (is_command_name(bf->code_ptr + call_offset, bf, Cmd::BUILTIN)) {
ip_failure(saved_current_ip, bf, "call should point to begin"); if (args_count !=
*(uint *)(bf->code_ptr + call_offset + 1 + sizeof(uint32_t))) {
ip_failure(saved_current_ip, bf, "wrong builtin call argument count");
} }
} else if (is_command_name(bf->code_ptr + call_offset, bf, Cmd::BEGIN)) {
if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) { if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) {
ip_failure(saved_current_ip, bf, "wrong call argument count"); ip_failure(saved_current_ip, bf, "wrong call argument count");
} }
} else {
ip_failure(saved_current_ip, bf,
"call should point to begin or builtin");
}
} break; } break;
case Cmd::TAG: case Cmd::TAG:
if (current_stack_depth < 1) { if (current_stack_depth < 1) {
@ -329,12 +343,21 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
ip_failure(saved_current_ip, bf, "undefined builtin id"); ip_failure(saved_current_ip, bf, "undefined builtin id");
} }
uint args_count = ip_read_int_unsafe(&current_ip); // set mock counter to behave similary to begin
current_stack_depth -= args_count; current_begin_counter = &mock_builtin_begin_counter;
if (current_stack_depth < 0) { *current_begin_counter = 0;
ip_failure(saved_current_ip, bf, "not enough elements in stack"); // add end to behave like end
} ++func_end_found;
++current_stack_depth;
/*uint args_count = */ ip_read_int_unsafe(&current_ip);
// NOTE: done in corresponding CALL/CALLC
// TODO: no stack edit required then (?)
// current_stack_depth -= args_count;
// if (current_stack_depth < 0) {
// ip_failure(saved_current_ip, bf, "not enough elements in stack");
// }
// ++current_stack_depth;
} break; } break;
case Cmd::PATT: case Cmd::PATT:
--current_stack_depth; --current_stack_depth;
@ -374,7 +397,8 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
} }
if (current_begin_counter == nullptr) { if (current_begin_counter == nullptr) {
ip_failure(saved_current_ip, bf, "function does not start with begin"); ip_failure(saved_current_ip, bf,
"function does not start with begin and is not builtin");
} }
if (current_stack_depth < 0) { if (current_stack_depth < 0) {
@ -392,6 +416,7 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
case Cmd::EXIT: case Cmd::EXIT:
case Cmd::END: case Cmd::END:
case Cmd::FAIL: case Cmd::FAIL:
case Cmd::BUILTIN: // pseudo function without begin and end
break; break;
case Cmd::CJMPz: case Cmd::CJMPz:
@ -402,15 +427,15 @@ void analyze(Bytefile *bf, std::vector<size_t> &&add_publics) {
case Cmd::JMP: { case Cmd::JMP: {
bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL); bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL);
uint jmp_p = ip_read_int_unsafe(&current_ip); aint jmp_offset = ip_read_int_unsafe(&current_ip);
if ((int)jmp_p >= bf->code_size) { if (jmp_offset < 0 || jmp_offset >= bf->code_size) {
// NOTE: maybe also should check that > begin (?) // NOTE: maybe also should check that > begin (?)
ip_failure(saved_current_ip, bf, "jump/call out of file"); ip_failure(saved_current_ip, bf, "jump/call out of file");
} }
if (is_call) { if (is_call) {
func_to_visit_push(jmp_p); func_to_visit_push(jmp_offset);
} else { } else {
jmp_to_visit_push(jmp_p); jmp_to_visit_push(jmp_offset);
} }
break; break;
} }

View file

@ -104,6 +104,23 @@ static inline void call_Barray(size_t elem_count) {
s_push(array); s_push(array);
} }
void call_builtin(uint builtin_id, uint args_count) {
#ifdef DEBUG_VERSION
printf("builtin id: %zu\n", builtin_id);
#endif
#ifndef WITH_CHECK
if (builtin_id >= BUILTIN_NONE) {
s_failure(&s, "invalid builtin");
}
#endif
if (builtin_id == BUILTIN_Barray) {
call_Barray(args_count);
} else {
run_stdlib_func(builtin_id, args_count);
}
}
void run_main(Bytefile* bf, int argc, char **argv) { void run_main(Bytefile* bf, int argc, char **argv) {
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("--- init state ---\n"); printf("--- init state ---\n");
@ -247,12 +264,8 @@ void run_main(Bytefile* bf, int argc, char **argv) {
} }
case CMD_BASIC_SWAP: // SWAP case CMD_BASIC_SWAP: // SWAP
{ s_swap_tops();
void *x = s_pop(); break;
void *y = s_pop();
s_push(y);
s_push(x);
} break;
case CMD_BASIC_ELEM: // ELEM case CMD_BASIC_ELEM: // ELEM
{ {
@ -373,11 +386,17 @@ void run_main(Bytefile* bf, int argc, char **argv) {
s_push(*var_ptr); s_push(*var_ptr);
} }
#ifndef WITH_CHECK #ifndef WITH_CHECK
// check correct offset
if (call_offset >= s.bf->code_size) { if (call_offset >= s.bf->code_size) {
s_failure(&s, "jump out of file"); s_failure(&s, "jump out of file");
} }
// or correct builtin // TODO: add this check to analyzer
if (call_offset < 0 && call_offset + 1 <= -BUILTIN_NONE) {
s_failure(&s, "closure");
}
#endif #endif
s_push(s.bf->code_ptr + call_offset); // NOTE: call_offset < 0 => deal with closure of builtin function
s_push_i(BOX(call_offset));
void *closure = Bclosure((aint *)__gc_stack_top, BOX(args_count)); void *closure = Bclosure((aint *)__gc_stack_top, BOX(args_count));
// printf("args is %li, count is %li\n", args_count, get_len(TO_DATA(closure))); // printf("args is %li, count is %li\n", args_count, get_len(TO_DATA(closure)));
@ -390,11 +409,11 @@ void run_main(Bytefile* bf, int argc, char **argv) {
case CMD_CTRL_CALLC: { // CALLC %d // call clojure case CMD_CTRL_CALLC: { // CALLC %d // call clojure
aint args_count = ip_read_int(&s.ip); // args count aint args_count = ip_read_int(&s.ip); // args count
aint closure_offset = UNBOX((char*)Belem(*s_nth(args_count), BOX(0)));
call_happened = true; call_happened = true;
s.is_closure_call = true; s.is_closure_call = true;
s.call_ip = s.ip; s.call_ip = s.ip;
s.ip = s.bf->code_ptr + closure_offset;
s.ip = (char*)Belem(*s_nth(args_count), BOX(0)); // use offset instead ??
break; break;
} }
@ -435,6 +454,7 @@ void run_main(Bytefile* bf, int argc, char **argv) {
case CMD_CTRL_FAIL: { // FAIL %d %d case CMD_CTRL_FAIL: { // FAIL %d %d
int line = ip_read_int(&s.ip); int line = ip_read_int(&s.ip);
int col = ip_read_int(&s.ip); int col = ip_read_int(&s.ip);
print_stack(&s);
Bmatch_failure(s_pop(), argv[0], BOX(line), BOX(col)); Bmatch_failure(s_pop(), argv[0], BOX(line), BOX(col));
break; break;
} }
@ -447,24 +467,10 @@ void run_main(Bytefile* bf, int argc, char **argv) {
case CMD_CTRL_BUILTIN: { // BUILTIN %d %d // call builtin case CMD_CTRL_BUILTIN: { // BUILTIN %d %d // call builtin
size_t builtin_id = ip_read_int(&s.ip); size_t builtin_id = ip_read_int(&s.ip);
size_t args_count = ip_read_int(&s.ip); // args count size_t args_count = ip_read_int(&s.ip); // args count
call_builtin(builtin_id, args_count);
#ifdef DEBUG_VERSION s.ip = s.call_ip; // TODO: check
printf("builtin id: %zu\n", builtin_id);
#endif
#ifndef WITH_CHECK
if (builtin_id >= BUILTIN_NONE) {
s_failure(&s, "invalid builtin");
}
#endif
if (builtin_id == BUILTIN_Barray) {
call_Barray(args_count);
} else {
run_stdlib_func(builtin_id, args_count);
}
break; break;
} }
default: default:
s_failure(&s, "interpreter: ctrl, invalid opcode"); // %d-%d\n", h, l); s_failure(&s, "interpreter: ctrl, invalid opcode"); // %d-%d\n", h, l);
} }
@ -551,6 +557,7 @@ void run_main(Bytefile* bf, int argc, char **argv) {
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l); s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
} }
// NOTE: do not clear for now, assume that call are correct
if (!call_happened) { if (!call_happened) {
s.is_closure_call = false; s.is_closure_call = false;
s.call_ip = NULL; s.call_ip = NULL;
@ -565,7 +572,7 @@ void run_main(Bytefile* bf, int argc, char **argv) {
} while (1); } while (1);
stop:; stop:;
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("--- module run end ---\n"); printf("--- run end ---\n");
#endif #endif
} }

View file

@ -27,7 +27,7 @@ template <size_t N, typename... Args>
requires(N != 0) requires(N != 0)
void call_func(void (*f)(), Args... args) { void call_func(void (*f)(), Args... args) {
void *arg = s_pop(); void *arg = s_pop();
call_func<N - 1, Args..., void *>(f, args..., arg); call_func<N - 1, Args..., void *>(f, arg, args...);
// TODO: check that arg is added on the right position // TODO: check that arg is added on the right position
} }
@ -84,8 +84,10 @@ void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
ip_read_int_unsafe(&read_ip) + offsets.code); ip_read_int_unsafe(&read_ip) + offsets.code);
break; break;
case Cmd::CLOSURE: { case Cmd::CLOSURE: {
aint offset = ip_read_int_unsafe(&read_ip);
// NOTE: do not modify offset for builtin's closures
ip_write_int_unsafe(write_ip, ip_write_int_unsafe(write_ip,
ip_read_int_unsafe(&read_ip) + offsets.code); offset < 0 ? offset : offset + offsets.code);
size_t args_count = ip_read_int_unsafe(&read_ip); size_t args_count = ip_read_int_unsafe(&read_ip);
for (size_t i = 0; i < args_count; ++i) { for (size_t i = 0; i < args_count; ++i) {
uint8_t arg_type = ip_read_byte_unsafe(&read_ip); uint8_t arg_type = ip_read_byte_unsafe(&read_ip);
@ -111,8 +113,33 @@ void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
} }
} }
void subst_in_code(Bytefile *bytefile, struct BuiltinSubst {
const std::unordered_map<std::string, size_t> &publics) { BUILTIN id;
uint32_t args_count;
auto operator<=>(const BuiltinSubst &) const = default;
bool operator==(const BuiltinSubst &) const = default;
};
void print_subst_to_bytes(BuiltinSubst subst, char **loc) {
static constexpr const uint8_t builtin_cmd =
((CMD_CTRL << 4) | CMD_CTRL_BUILTIN);
**(uint8_t **)loc = builtin_cmd;
*loc += sizeof(uint8_t);
**(uint32_t **)loc = subst.id;
*loc += sizeof(int32_t);
**(uint32_t **)loc = subst.args_count;
*loc += sizeof(int32_t);
}
using BuiltinSubstMap = std::map<BuiltinSubst,
/*generated builtin offset*/ size_t>;
// std::vector<size_t> /*subst offsets*/>;
// TODO: shared iteration over substs in functions
void add_subst_builtin_offsets(BuiltinSubstMap &subst_map, size_t code_offset,
const Bytefile *bytefile) {
for (size_t i = 0; i < bytefile->substs_area_size; ++i) { for (size_t i = 0; i < bytefile->substs_area_size; ++i) {
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) { if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
failure("substitution %zu offset is out of area\n", i); failure("substitution %zu offset is out of area\n", i);
@ -124,7 +151,7 @@ void subst_in_code(Bytefile *bytefile,
i += strlen(name); i += strlen(name);
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("subst: offset %u, name %s\n", offset, name); printf("subst: offset 0x%.8x, name %s\n", offset, name);
#endif #endif
if (i > bytefile->substs_area_size) { if (i > bytefile->substs_area_size) {
@ -135,16 +162,61 @@ void subst_in_code(Bytefile *bytefile,
// NOTE: address is first argument of the call // NOTE: address is first argument of the call
if (builtin != BUILTIN_NONE) { if (builtin != BUILTIN_NONE) {
uint8_t cmd = ((CMD_CTRL << 4) | CMD_CTRL_BUILTIN); char *ip = bytefile->code_ptr + offset;
ip_read_int_unsafe(&ip); // read ptr placeholder
uint32_t args_count = ip_read_int_unsafe(&ip); // read args count
subst_map[{builtin, args_count}] = 0; // .push_back(offset + code_offset);
}
}
}
// NOTE: unmanaged memory allocated
std::pair<char *, size_t> gen_builtins(size_t code_offset,
BuiltinSubstMap &subst_map) {
size_t code_size =
subst_map.size() * /*size of builtin command*/ (1 + 2 * sizeof(uint32_t));
char *code = (char *)malloc(code_size);
char *code_it = code;
for (auto &subst : subst_map) {
subst.second = code_it - code + code_offset;
print_subst_to_bytes(subst.first, &code_it);
}
return {code, code_size};
}
void subst_in_code(Bytefile *bytefile,
const std::unordered_map<std::string, size_t> &publics,
const BuiltinSubstMap &builtins) {
for (size_t i = 0; i < bytefile->substs_area_size; ++i) {
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
failure("substitution %zu offset is out of area\n", i);
}
uint32_t offset = *(uint32_t *)(bytefile->substs_ptr + i);
i += sizeof(uint32_t);
const char *name = bytefile->substs_ptr + i;
i += strlen(name);
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("set builtin %i, offset %i, cmd %u = (%u << 4) | %u, h = %u, l = " printf("subst: offset 0x%.8x, name %s\n", offset, name);
"%u\n",
builtin, offset, cmd, CMD_CTRL, CMD_CTRL_BUILTIN,
(cmd & 0xF0) >> 4, cmd & 0x0F);
#endif #endif
*(uint8_t *)(bytefile->code_ptr + offset - 1) =
cmd; // set BUILTIN command if (i > bytefile->substs_area_size) {
*(uint32_t *)(bytefile->code_ptr + offset) = builtin; failure("substitution %zu name is out of area\n", i);
}
BUILTIN builtin_id = id_by_builtin(name);
// NOTE: address is first argument of the call and closure, args count is
// second argument
if (builtin_id != BUILTIN_NONE) {
uint32_t *val_ptr = (uint32_t *)(bytefile->code_ptr + offset);
uint32_t args_count =
*(uint32_t *)(bytefile->code_ptr + offset + sizeof(uint32_t));
*val_ptr = builtins.at({.id = builtin_id, .args_count = args_count});
continue; continue;
} }
@ -164,7 +236,7 @@ Offsets calc_merge_sizes(const std::vector<Bytefile *> &bytefiles) {
sizes.strings += bytefiles[i]->stringtab_size; sizes.strings += bytefiles[i]->stringtab_size;
sizes.globals += bytefiles[i]->global_area_size; sizes.globals += bytefiles[i]->global_area_size;
sizes.code += bytefiles[i]->code_size; sizes.code += bytefiles[i]->code_size;
sizes.publics_num += bytefiles[i]->public_symbols_number; // sizes.publics_num += bytefiles[i]->public_symbols_number;
} }
return sizes; return sizes;
} }
@ -177,6 +249,20 @@ struct MergeResult {
MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) { MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
Offsets sizes = calc_merge_sizes(bytefiles); Offsets sizes = calc_merge_sizes(bytefiles);
size_t public_symbols_size = calc_publics_size(sizes.publics_num); size_t public_symbols_size = calc_publics_size(sizes.publics_num);
// find all builtin variations ad extract them
BuiltinSubstMap builtins_map;
{
size_t code_offset = 0;
for (size_t i = 0; i < bytefiles.size(); ++i) {
add_subst_builtin_offsets(builtins_map, code_offset, bytefiles[i]);
code_offset += bytefiles[i]->code_size;
}
}
auto [builtins_code, builtins_code_size] =
gen_builtins(sizes.code, builtins_map);
sizes.code += builtins_code_size;
Bytefile *result = Bytefile *result =
(Bytefile *)malloc(sizeof(Bytefile) + sizes.strings + sizes.code + (Bytefile *)malloc(sizeof(Bytefile) + sizes.strings + sizes.code +
public_symbols_size); // globals are on the stack public_symbols_size); // globals are on the stack
@ -190,6 +276,7 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
for (size_t i = 0; i < bytefiles.size(); ++i) { for (size_t i = 0; i < bytefiles.size(); ++i) {
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("bytefile <%zu>\n", i); printf("bytefile <%zu>\n", i);
#endif #endif
for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) { for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) {
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
@ -218,7 +305,8 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
result->global_area_size = sizes.globals; result->global_area_size = sizes.globals;
result->substs_area_size = 0; result->substs_area_size = 0;
result->imports_number = 0; result->imports_number = 0;
result->public_symbols_number = sizes.publics_num; result->public_symbols_number =
0; // sizes.publics_num; // TODO: correctly set and update publics
result->main_offset = 0; // TODO: save al main offsets in some way (?) result->main_offset = 0; // TODO: save al main offsets in some way (?)
result->public_ptr = (int *)result->buffer; result->public_ptr = (int *)result->buffer;
@ -235,7 +323,7 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
// REMOVE printf("rewrite offsets %zu\n", i); // REMOVE printf("rewrite offsets %zu\n", i);
rewrite_code_with_offsets(bytefiles[i], offsets); rewrite_code_with_offsets(bytefiles[i], offsets);
// REMOVE printf("subst in code %zu\n", i); // REMOVE printf("subst in code %zu\n", i);
subst_in_code(bytefiles[i], publics); subst_in_code(bytefiles[i], publics, builtins_map);
size_t publics_offset = calc_publics_size(offsets.publics_num); size_t publics_offset = calc_publics_size(offsets.publics_num);
@ -244,22 +332,31 @@ MergeResult merge_files(std::vector<Bytefile *> &&bytefiles) {
bytefiles[i]->stringtab_size); bytefiles[i]->stringtab_size);
memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr, memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr,
bytefiles[i]->code_size); bytefiles[i]->code_size);
memcpy((char *)result->public_ptr + publics_offset, // memcpy((char *)result->public_ptr + publics_offset,
(char *)bytefiles[i]->public_ptr, // (char *)bytefiles[i]->public_ptr,
calc_publics_size( // calc_publics_size(
bytefiles[i]->public_symbols_number)); // TODO: recalc publics: // bytefiles[i]->public_symbols_number)); // TODO: recalc
// offsets, strings // publics:
// // offsets, strings
// update offsets // update offsets
offsets.strings += bytefiles[i]->stringtab_size; offsets.strings += bytefiles[i]->stringtab_size;
offsets.globals += bytefiles[i]->global_area_size; offsets.globals += bytefiles[i]->global_area_size;
offsets.code += bytefiles[i]->code_size; offsets.code += bytefiles[i]->code_size;
offsets.publics_num += bytefiles[i]->public_symbols_number; // offsets.publics_num += bytefiles[i]->public_symbols_number;
free(bytefiles[i]); free(bytefiles[i]);
} }
memcpy(result->code_ptr + offsets.code, builtins_code, builtins_code_size);
free(builtins_code);
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
std::cout << "main offsets:\n";
for (const auto &offset : main_offsets) {
std::cout << offset << '\n';
}
std::cout << "- merged file:\n"; std::cout << "- merged file:\n";
print_file(*result, std::cout); print_file(*result, std::cout);
#endif #endif
@ -272,7 +369,9 @@ Bytefile *path_mod_load(const char *name, std::filesystem::path &&path) {
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
std::cout << "- module path load '" << name << "'\n"; std::cout << "- module path load '" << name << "'\n";
#endif #endif
return read_file(path.c_str()); Bytefile *file = read_file(path.c_str());
return file;
// return read_file(path.c_str());
} }
static std::vector<std::filesystem::path> search_paths; static std::vector<std::filesystem::path> search_paths;
@ -354,7 +453,10 @@ MergeResult load_with_imports(Bytefile *root, bool do_verification) {
#ifdef DEBUG_VERSION #ifdef DEBUG_VERSION
printf("main offsets count: %zu\n", result.main_offsets.size()); printf("main offsets count: %zu\n", result.main_offsets.size());
#endif #endif
analyze(result.bf /*, std::move(result.main_offsets)*/); analyze(result.bf, std::move(result.main_offsets));
#ifdef DEBUG_VERSION
std::cout << "verification done" << std::endl;
#endif
} }
return result; return result;
} }
@ -390,6 +492,7 @@ struct StdFunc {
bool is_vararg = false; bool is_vararg = false;
}; };
// TODO: FIXME: add kind, binops
BUILTIN id_by_builtin(const char *name) { BUILTIN id_by_builtin(const char *name) {
static const std::unordered_map<std::string, BUILTIN> std_func = { static const std::unordered_map<std::string, BUILTIN> std_func = {
{"Luppercase", BUILTIN_Luppercase}, {"Luppercase", BUILTIN_Luppercase},
@ -440,6 +543,7 @@ BUILTIN id_by_builtin(const char *name) {
} }
void run_stdlib_func(BUILTIN id, size_t args_count) { void run_stdlib_func(BUILTIN id, size_t args_count) {
// std::cout << "RUN BUILTIN: " << id << '\n'; // TODO: TMP
void *ret = NULL; void *ret = NULL;
// TODO: deal with right pointers, etc. // TODO: deal with right pointers, etc.
switch (id) { switch (id) {
@ -498,8 +602,8 @@ void run_stdlib_func(BUILTIN id, size_t args_count) {
s_push(ret); s_push(ret);
break; break;
case BUILTIN_LmatchSubString: case BUILTIN_LmatchSubString:
ret = (void *)LmatchSubString((char *)*s_nth(0), (char *)*s_nth(1), ret = (void *)LmatchSubString((char *)*s_nth(2), (char *)*s_nth(1),
*s_nth_i(2)); *s_nth_i(0));
s_popn(3); s_popn(3);
s_push(ret); s_push(ret);
break; break;
@ -533,12 +637,12 @@ void run_stdlib_func(BUILTIN id, size_t args_count) {
s_push(ret); s_push(ret);
break; break;
case BUILTIN_Lcompare: case BUILTIN_Lcompare:
ret = (void *)Lcompare(*s_nth(0), *s_nth(1)); ret = (void *)Lcompare(*s_nth(1), *s_nth(0));
s_popn(2); s_popn(2);
s_push(ret); s_push(ret);
break; break;
case BUILTIN_LflatCompare: case BUILTIN_LflatCompare:
ret = (void *)LflatCompare(*s_nth(0), *s_nth(1)); ret = (void *)LflatCompare(*s_nth(1), *s_nth(0));
s_popn(2); s_popn(2);
s_push(ret); s_push(ret);
break; break;
@ -571,7 +675,7 @@ void run_stdlib_func(BUILTIN id, size_t args_count) {
call_anyarg_func<20>((void (*)()) & Lprintf, args_count); call_anyarg_func<20>((void (*)()) & Lprintf, args_count);
break; break;
case BUILTIN_Lfopen: case BUILTIN_Lfopen:
ret = (void *)Lfopen((char *)*s_nth(0), (char *)*s_nth(1)); ret = (void *)Lfopen((char *)*s_nth(1), (char *)*s_nth(0));
s_popn(2); s_popn(2);
s_push(ret); s_push(ret);
break; break;
@ -586,7 +690,7 @@ void run_stdlib_func(BUILTIN id, size_t args_count) {
s_push(ret); s_push(ret);
break; break;
case BUILTIN_Lfwrite: case BUILTIN_Lfwrite:
/*ret = (void *)*/ Lfwrite((char *)*s_nth(0), (char *)*s_nth(1)); /*ret = (void *)*/ Lfwrite((char *)*s_nth(1), (char *)*s_nth(0));
s_popn(2); s_popn(2);
// s_push(ret); // NOTE: ?? // s_push(ret); // NOTE: ??
break; break;
@ -605,8 +709,8 @@ void run_stdlib_func(BUILTIN id, size_t args_count) {
s_push(ret); s_push(ret);
break; break;
case BUILTIN_LregexpMatch: case BUILTIN_LregexpMatch:
ret = (void *)LregexpMatch((struct re_pattern_buffer *)*s_nth(0), ret = (void *)LregexpMatch((struct re_pattern_buffer *)*s_nth(2),
(char *)*s_nth(1), *s_nth_i(2)); (char *)*s_nth(1), *s_nth_i(0));
s_popn(2); s_popn(2);
s_push(ret); s_push(ret);
break; break;

View file

@ -272,9 +272,9 @@ template <bool use_out> static inline void print_space(std::ostream &out) {
template <bool use_out, ArgT arg> template <bool use_out, ArgT arg>
requires(arg == ArgT::INT) requires(arg == ArgT::INT)
static inline uint read_print_val(char **ip, const Bytefile &bf, static inline aint read_print_val(char **ip, const Bytefile &bf,
std::ostream &out) { std::ostream &out) {
uint val = ip_read_int_safe(ip, &bf); aint val = ip_read_int_safe(ip, &bf);
if constexpr (use_out) { if constexpr (use_out) {
out << val; out << val;
} }
@ -283,12 +283,17 @@ static inline uint read_print_val(char **ip, const Bytefile &bf,
template <bool use_out, ArgT arg> template <bool use_out, ArgT arg>
requires(arg == ArgT::OFFSET) requires(arg == ArgT::OFFSET)
static inline uint read_print_val(char **ip, const Bytefile &bf, static inline aint read_print_val(char **ip, const Bytefile &bf,
std::ostream &out) { std::ostream &out) {
uint val = ip_read_int_safe(ip, &bf); aint val = ip_read_int_safe(ip, &bf);
if constexpr (use_out) { if constexpr (use_out) {
// NOTE: < 0 for builtin closures
if (val >= 0) {
out << std::hex << val << std::dec;
} else {
out << val; out << val;
} }
}
return val; return val;
} }
@ -500,7 +505,8 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
read_print_cmd_seq_opt<do_read_args, use_out>(cmd, l, ip, bf, out); read_print_cmd_seq_opt<do_read_args, use_out>(cmd, l, ip, bf, out);
print_space<use_out>(out); print_space<use_out>(out);
if constexpr (do_read_args) { if constexpr (do_read_args) {
size_t call_p = read_print_val<use_out, ArgT::OFFSET>(ip, bf, out); /*aint call_offset =*/read_print_val<use_out, ArgT::OFFSET>(ip, bf,
out);
print_space<use_out>(out); print_space<use_out>(out);
size_t args_count = read_print_val<use_out, ArgT::INT>(ip, bf, out); size_t args_count = read_print_val<use_out, ArgT::INT>(ip, bf, out);
for (size_t i = 0; i < args_count; i++) { for (size_t i = 0; i < args_count; i++) {
@ -662,16 +668,16 @@ void print_file_info(const Bytefile &bf, std::ostream &out) {
void print_file_code(const Bytefile &bf, std::ostream &out) { void print_file_code(const Bytefile &bf, std::ostream &out) {
char *ip = bf.code_ptr; char *ip = bf.code_ptr;
while (true) { while (ip - bf.code_ptr < bf.code_size) {
out << " " << std::setfill('0') << std::setw(8) << std::hex out << " " << std::setfill('0') << std::setw(8) << std::hex
<< ip - bf.code_ptr << ": " << std::dec; << ip - bf.code_ptr << ": " << std::dec;
const auto [cmd, l] = parse_command(&ip, &bf, out); const auto [cmd, l] = parse_command(&ip, &bf, out);
out << std::endl; out << std::endl;
if (cmd == Cmd::EXIT) { // if (cmd == Cmd::EXIT) {
std::cout << "> EXIT" << std::endl; // std::cout << "> EXIT" << std::endl;
break; // break;
} // }
} }
} }

View file

@ -0,0 +1,38 @@
#!/usr/bin/env bash
dune build
prefix="../stdlib/regression/"
suffix=".lama"
compiler=../_build/default/src/Driver.exe
echo "Used compiler path:"
echo $compiler
echo "Build modules:"
for mod in ../stdlib/*.lama; do
echo $mod
$compiler -b $mod -I ../stdlib/
done
echo "Run tests:"
for test in ../stdlib/regression/*01.lama; do
echo $test
$compiler -b $test -I ../stdlib/ > /dev/null
test_file="${test%.*}"
echo $test_file
# cat $test_file.input | ./byterun.exe -p test*.bc > test.bc.code
# cat $test_file.input | ./byterun.exe -p test*.bc
echo "" | ./byterun.exe -vi test*.bc
echo "" | ./byterun.exe -vi test*.bc > test.log
# sed '1d;s/^..//' $test_file.t > test_orig.log
# diff test.log test_orig.log
rm test*.bc
# rm test.log test_orig.log
echo "done"
done
rm *.bc
rm *.o