lama_byterun/byterun/src/analyzer.cpp

388 lines
11 KiB
C++
Raw Normal View History

2024-12-13 13:32:50 +03:00
#include "analyzer.hpp"
#include "parser.hpp"
#include <iostream>
2024-12-13 13:32:50 +03:00
extern "C" {
#include "types.h"
}
2024-12-13 13:32:50 +03:00
#include <vector>
void analyze(Bytefile *bf) {
static constexpr const int NOT_VISITED = -1;
2024-12-15 01:32:49 +03:00
std::vector<int> visited(bf->code_size, NOT_VISITED); // store stack depth
std::vector<bool> control_flow_in(bf->code_size, false); // store
std::vector<size_t> to_visit_func;
std::vector<size_t> to_visit_jmp;
int current_stack_depth = 0;
const uint globals_count = bf->global_area_size;
uint current_locals_count = 0;
uint current_args_count = 0;
uint16_t *current_begin_counter = nullptr;
char *ip = bf->code_ptr;
char *current_ip = ip;
char *saved_current_ip = current_ip;
auto const jmp_to_visit_push = [&saved_current_ip, &bf, &visited,
&current_stack_depth,
&to_visit_jmp](size_t offset) {
if (visited[offset] == NOT_VISITED) {
visited[offset] = current_stack_depth;
to_visit_jmp.push_back(offset);
} else if (visited[offset] != current_stack_depth) {
// TODO: is this condition same for calls?
ip_failure(saved_current_ip, bf,
"different stack depth on same point is not allowed");
}
};
auto const func_to_visit_push = [&saved_current_ip, &bf, &visited,
&current_stack_depth,
&to_visit_func](size_t offset) {
if (visited[offset] == NOT_VISITED) {
to_visit_func.push_back(offset);
2024-12-13 13:32:50 +03:00
}
visited[offset] = 0;
};
auto const jmp_control_push = [&jmp_to_visit_push,
&control_flow_in](size_t offset) {
control_flow_in[offset] = true;
jmp_to_visit_push(offset);
2024-12-13 13:32:50 +03:00
};
auto const func_control_push = [&func_to_visit_push,
&control_flow_in](size_t offset) {
2024-12-13 13:32:50 +03:00
control_flow_in[offset] = true;
func_to_visit_push(offset);
};
auto const check_correct_var = [&saved_current_ip, &bf, &globals_count,
&current_locals_count,
&current_args_count](uint8_t l, uint id) {
if (l > 3) {
ip_failure(saved_current_ip, bf, "unexpected variable category");
}
VarCategory category = to_var_category(l);
switch (category) {
case VAR_GLOBAL:
if (id >= globals_count) {
ip_failure(saved_current_ip, bf, "global var index is out of range");
}
break;
case VAR_LOCAL:
if (id >= current_locals_count) {
ip_failure(saved_current_ip, bf, "local var index is out of range");
}
break;
case VAR_ARGUMENT:
if (id >= current_args_count) {
ip_failure(saved_current_ip, bf, "argument var index is out of range");
}
break;
case VAR_CLOSURE:
// NOTE: impossible to properly check there (?)
break;
}
2024-12-13 13:32:50 +03:00
};
// add publics
to_visit_func.reserve(bf->public_symbols_number);
for (size_t i = 0; i < bf->public_symbols_number; ++i) {
func_control_push(get_public_offset(bf, i));
2024-12-13 13:32:50 +03:00
}
if (to_visit_func.size() == 0) {
2024-12-13 13:32:50 +03:00
failure("no public symbols detected");
}
while (true) {
ip = bf->code_ptr;
if (current_begin_counter == nullptr) {
if (to_visit_func.empty()) {
break;
}
ip += to_visit_func.back();
to_visit_func.pop_back();
current_stack_depth = 0;
} else {
if (to_visit_jmp.empty()) {
current_begin_counter = nullptr;
continue;
}
ip += to_visit_jmp.back();
to_visit_jmp.pop_back();
}
2024-12-13 13:32:50 +03:00
if (ip >= bf->code_ptr + bf->code_size) {
ip_safe_failure(ip, bf, "instruction pointer is out of range (>= size)");
2024-12-13 13:32:50 +03:00
}
if (ip < bf->code_ptr) {
ip_safe_failure(ip, bf, "instruction pointer is out of range (< 0)");
2024-12-13 13:32:50 +03:00
}
current_ip = ip;
saved_current_ip = current_ip;
2024-12-13 13:32:50 +03:00
#ifdef DEBUG_VERSION
const auto [cmd, l] = parse_command(&ip, *bf, std::cout);
#else
const auto [cmd, l] = parse_command(&ip, *bf);
#endif
if (current_begin_counter == nullptr && cmd != Cmd::BEGIN &&
cmd != Cmd::CBEGIN) {
ip_failure(saved_current_ip, bf, "function does not start with begin");
}
if (visited[current_ip - bf->code_ptr] == NOT_VISITED) {
ip_failure(saved_current_ip, bf, "not visited command");
}
current_stack_depth = visited[current_ip - bf->code_ptr];
#ifdef DEBUG_VERSION
std::cout << " -- [" << current_stack_depth << ']' << '\n';
#endif
2024-12-13 13:32:50 +03:00
++current_ip; // skip command byte
size_t extra_stack_during_opr = 0;
// change stack depth
switch (cmd) {
case Cmd::BINOP:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::CONST:
++current_stack_depth;
break;
case Cmd::STRING:
++current_stack_depth;
break;
case Cmd::SEXP:
ip_read_string(&current_ip, *bf);
current_stack_depth -= ip_read_int(&current_ip, *bf);
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::STI:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::STA:
current_stack_depth -= 3;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::JMP:
break;
case Cmd::END:
--current_stack_depth;
break;
case Cmd::RET:
--current_stack_depth;
break;
case Cmd::DROP:
--current_stack_depth;
break;
case Cmd::DUP:
++current_stack_depth;
break;
case Cmd::SWAP:
if (current_stack_depth < 2) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::ELEM:
current_stack_depth -= 2;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::LD:
check_correct_var(l, ip_read_int(&current_ip, *bf));
++current_stack_depth;
break;
case Cmd::LDA:
check_correct_var(l, ip_read_int(&current_ip, *bf));
++current_stack_depth;
break;
case Cmd::ST:
check_correct_var(l, ip_read_int(&current_ip, *bf));
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::CJMPz:
case Cmd::CJMPnz:
--current_stack_depth;
break;
case Cmd::BEGIN:
case Cmd::CBEGIN:
if (current_begin_counter != nullptr) {
ip_failure(saved_current_ip, bf, "unexpected function beginning");
}
2024-12-15 01:32:49 +03:00
current_args_count = ip_read_int(&current_ip, *bf);
current_begin_counter = (uint16_t *)(current_ip + sizeof(uint16_t));
current_locals_count = ip_read_int(&current_ip, *bf);
2024-12-15 01:32:49 +03:00
// TODO: check uint16_t max ??
(*(uint16_t *)(current_ip - sizeof(uint16_t))) = current_locals_count;
*current_begin_counter = 0;
break;
case Cmd::CLOSURE: {
ip_read_int(&ip, *bf); // offset
size_t args_count = ip_read_int(&current_ip, *bf); // args count
extra_stack_during_opr = args_count;
for (aint i = 0; i < args_count; i++) {
aint arg_type = ip_read_byte(&current_ip, *bf);
aint arg_id = ip_read_int(&current_ip, *bf);
check_correct_var(arg_type, arg_id);
}
++current_stack_depth;
} break;
case Cmd::CALLC: {
2024-12-15 01:32:49 +03:00
uint args_count = ip_read_int(&current_ip, *bf);
current_stack_depth -= args_count + 1; // + closure itself
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
2024-12-15 01:32:49 +03:00
// NOTE: can't check args == cbegin args (?)
} break;
case Cmd::CALL: {
2024-12-15 01:32:49 +03:00
uint call_offset = ip_read_int(&current_ip, *bf); // call offset
uint args_count = ip_read_int(&current_ip, *bf);
current_stack_depth -= args_count;
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
2024-12-15 01:32:49 +03:00
// TODO: unify with next loop
if (call_offset >= bf->code_size) {
ip_failure(saved_current_ip, bf, "jump/call out of file");
2024-12-15 01:32:49 +03:00
}
if (args_count != *(uint *)(bf->code_ptr + call_offset + 1)) {
ip_failure(saved_current_ip, bf, "wrong call argument count");
2024-12-15 01:32:49 +03:00
}
} break;
case Cmd::TAG:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::ARRAY:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::FAIL:
--current_stack_depth;
break;
case Cmd::LINE:
break;
case Cmd::PATT:
--current_stack_depth;
if (l == 0) { // to arg for '=str' // FIXME: magic const
--current_stack_depth;
}
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::Lread:
++current_stack_depth;
break;
case Cmd::Lwrite:
case Cmd::Llength:
case Cmd::Lstring:
if (current_stack_depth < 1) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
break;
case Cmd::Barray:
current_stack_depth -= ip_read_int(&current_ip, *bf); // elem count
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
++current_stack_depth;
break;
case Cmd::EXIT:
ip_failure(saved_current_ip, bf,
"exit should be unreachable"); // NOTE: not sure
break;
case Cmd::_UNDEF_:
ip_failure(saved_current_ip, bf, "undefined command");
break;
}
if (current_begin_counter == nullptr) {
ip_failure(saved_current_ip, bf, "function does not start with begin");
}
if (current_stack_depth < 0) {
ip_failure(saved_current_ip, bf, "not enough elements in stack");
}
*current_begin_counter =
std::max(*current_begin_counter,
(uint16_t)(current_stack_depth + extra_stack_during_opr));
current_ip = saved_current_ip;
++current_ip; // skip command byte
// do jumps
2024-12-13 13:32:50 +03:00
switch (cmd) {
case Cmd::EXIT:
case Cmd::END:
case Cmd::FAIL:
2024-12-13 13:32:50 +03:00
break;
case Cmd::CJMPz:
case Cmd::CJMPnz:
case Cmd::CLOSURE:
case Cmd::CALL:
jmp_to_visit_push(ip - bf->code_ptr);
2024-12-13 13:32:50 +03:00
case Cmd::JMP: {
bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL);
uint jmp_p = ip_read_int(&current_ip, *bf);
2024-12-15 01:32:49 +03:00
if (jmp_p >= bf->code_size) {
// NOTE: maybe also should check that > begin (?)
ip_failure(saved_current_ip, bf, "jump/call out of file");
}
if (is_call) {
func_control_push(jmp_p);
} else {
jmp_control_push(jmp_p);
2024-12-13 13:32:50 +03:00
}
break;
}
case Cmd::_UNDEF_:
ip_failure(saved_current_ip, bf, "undefined command");
2024-12-13 13:32:50 +03:00
break;
default:
jmp_to_visit_push(ip - bf->code_ptr);
2024-12-13 13:32:50 +03:00
break;
}
}
}