mirror of
https://github.com/ProgramSnail/Lama.git
synced 2025-12-06 06:48:48 +00:00
file parser, file merge, callf command remove, SM fixes. todo: fix interpreter and analyzer with new algorithm
This commit is contained in:
parent
51381aea43
commit
343a21ee2d
8 changed files with 256 additions and 77 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <ostream>
|
#include <ostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
|
|
@ -35,7 +36,7 @@ enum class Cmd : int8_t {
|
||||||
ARRAY,
|
ARRAY,
|
||||||
FAIL,
|
FAIL,
|
||||||
LINE,
|
LINE,
|
||||||
CALLF,
|
// CALLF,
|
||||||
PATT,
|
PATT,
|
||||||
// NOTE: no longer used
|
// NOTE: no longer used
|
||||||
// Lread,
|
// Lread,
|
||||||
|
|
@ -49,6 +50,8 @@ enum class Cmd : int8_t {
|
||||||
|
|
||||||
Bytefile *read_file(const char *fname);
|
Bytefile *read_file(const char *fname);
|
||||||
|
|
||||||
|
Bytefile *merge_files(const std::vector<Bytefile> &bytefiles);
|
||||||
|
|
||||||
std::pair<Cmd, uint8_t> parse_command(char **ip, const Bytefile *bf);
|
std::pair<Cmd, uint8_t> parse_command(char **ip, const Bytefile *bf);
|
||||||
std::pair<Cmd, uint8_t> parse_command(char **ip, const Bytefile *bf,
|
std::pair<Cmd, uint8_t> parse_command(char **ip, const Bytefile *bf,
|
||||||
std::ostream &out);
|
std::ostream &out);
|
||||||
|
|
|
||||||
|
|
@ -255,7 +255,7 @@ static inline void **var_by_category(enum VarCategory category, size_t id) {
|
||||||
// s.bf->global_area_size);
|
// s.bf->global_area_size);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
var = s.stack + STACK_SIZE - 1 - id;
|
var = s.bf->global_ptr + STACK_SIZE - 1 - id;
|
||||||
break;
|
break;
|
||||||
case VAR_LOCAL:
|
case VAR_LOCAL:
|
||||||
#ifndef WITH_CHECK
|
#ifndef WITH_CHECK
|
||||||
|
|
|
||||||
|
|
@ -164,7 +164,7 @@ enum CMD_CTRLS {
|
||||||
CMD_CTRL_ARRAY,
|
CMD_CTRL_ARRAY,
|
||||||
CMD_CTRL_FAIL,
|
CMD_CTRL_FAIL,
|
||||||
CMD_CTRL_LINE,
|
CMD_CTRL_LINE,
|
||||||
CMD_CTRL_CALLF,
|
// CMD_CTRL_CALLF,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum CMD_PATTS {
|
enum CMD_PATTS {
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,11 @@
|
||||||
#include "../../runtime/runtime.h"
|
#include "../../runtime/runtime.h"
|
||||||
#include "../../runtime/runtime_common.h"
|
#include "../../runtime/runtime_common.h"
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint offset;
|
||||||
|
char label[0];
|
||||||
|
} Subst;
|
||||||
|
|
||||||
/* The unpacked representation of bytecode file */
|
/* The unpacked representation of bytecode file */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint main_offset; /* offset of the function 'main' */
|
uint main_offset; /* offset of the function 'main' */
|
||||||
|
|
@ -14,10 +19,12 @@ typedef struct {
|
||||||
int *imports_ptr; /* A pointer to the beginning of imports table */
|
int *imports_ptr; /* A pointer to the beginning of imports table */
|
||||||
int *public_ptr; /* A pointer to the beginning of publics table */
|
int *public_ptr; /* A pointer to the beginning of publics table */
|
||||||
char *code_ptr; /* A pointer to the bytecode itself */
|
char *code_ptr; /* A pointer to the bytecode itself */
|
||||||
int *global_ptr; /* A pointer to the global area */
|
void **global_ptr; /* A pointer to the global area */
|
||||||
|
char *substs_ptr; /* A pointer to the substs area */
|
||||||
int code_size; /* The size (in bytes) of code */
|
int code_size; /* The size (in bytes) of code */
|
||||||
uint stringtab_size; /* The size (in bytes) of the string table */
|
uint stringtab_size; /* The size (in bytes) of the string table */
|
||||||
uint global_area_size; /* The size (in words) of global area */
|
uint global_area_size; /* The size (in words) of global area */
|
||||||
|
uint substs_area_size; /* number of required address substitutions */
|
||||||
uint imports_number; /* The number of imports */
|
uint imports_number; /* The number of imports */
|
||||||
uint public_symbols_number; /* The number of public symbols */
|
uint public_symbols_number; /* The number of public symbols */
|
||||||
char buffer[0];
|
char buffer[0];
|
||||||
|
|
@ -64,6 +71,10 @@ static inline size_t get_public_offset_unsafe(const Bytefile *bf, size_t i) {
|
||||||
|
|
||||||
// read from ip
|
// read from ip
|
||||||
|
|
||||||
|
static inline void ip_write_int_unsafe(char *ip, int32_t x) {
|
||||||
|
*((int32_t *)ip) = x;
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint16_t ip_read_half_int_unsafe(char **ip) {
|
static inline uint16_t ip_read_half_int_unsafe(char **ip) {
|
||||||
*ip += sizeof(uint16_t);
|
*ip += sizeof(uint16_t);
|
||||||
return *(uint16_t *)((*ip) - sizeof(uint16_t));
|
return *(uint16_t *)((*ip) - sizeof(uint16_t));
|
||||||
|
|
@ -124,6 +135,13 @@ static inline size_t get_public_offset_safe(const Bytefile *f, size_t i) {
|
||||||
|
|
||||||
// read from ip
|
// read from ip
|
||||||
|
|
||||||
|
static inline void ip_write_int_safe(char *ip, int32_t x, const Bytefile *bf) {
|
||||||
|
if (ip + sizeof(int32_t) > bf->code_ptr + bf->code_size) {
|
||||||
|
failure("last command is invalid, int parameter can not be read\n");
|
||||||
|
}
|
||||||
|
ip_write_int_unsafe(ip, x);
|
||||||
|
}
|
||||||
|
|
||||||
static inline uint16_t ip_read_half_int_safe(char **ip, const Bytefile *bf) {
|
static inline uint16_t ip_read_half_int_safe(char **ip, const Bytefile *bf) {
|
||||||
if (*ip + sizeof(uint16_t) > bf->code_ptr + bf->code_size) {
|
if (*ip + sizeof(uint16_t) > bf->code_ptr + bf->code_size) {
|
||||||
failure("last command is invalid, int parameter can not be read\n");
|
failure("last command is invalid, int parameter can not be read\n");
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,6 @@ void analyze(uint32_t mod_id) {
|
||||||
};
|
};
|
||||||
|
|
||||||
auto const func_to_visit_push = [&saved_current_ip, mod_id, &visited,
|
auto const func_to_visit_push = [&saved_current_ip, mod_id, &visited,
|
||||||
¤t_stack_depth,
|
|
||||||
&to_visit_func](size_t offset) {
|
&to_visit_func](size_t offset) {
|
||||||
if (visited[offset] == NOT_VISITED) {
|
if (visited[offset] == NOT_VISITED) {
|
||||||
visited[offset] = 0;
|
visited[offset] = 0;
|
||||||
|
|
@ -258,10 +257,11 @@ void analyze(uint32_t mod_id) {
|
||||||
is_in_closure = (cmd == Cmd::CBEGIN);
|
is_in_closure = (cmd == Cmd::CBEGIN);
|
||||||
break;
|
break;
|
||||||
case Cmd::CLOSURE: {
|
case Cmd::CLOSURE: {
|
||||||
uint closure_offset = ip_read_int_unsafe(¤t_ip); // closure offset
|
/*uint closure_offset = */ ip_read_int_unsafe(
|
||||||
|
¤t_ip); // closure offset
|
||||||
size_t args_count = ip_read_int_unsafe(¤t_ip); // args count
|
size_t args_count = ip_read_int_unsafe(¤t_ip); // args count
|
||||||
extra_stack_during_opr = args_count;
|
extra_stack_during_opr = args_count;
|
||||||
for (aint i = 0; i < args_count; i++) {
|
for (size_t i = 0; i < args_count; i++) {
|
||||||
aint arg_type = ip_read_byte_unsafe(¤t_ip);
|
aint arg_type = ip_read_byte_unsafe(¤t_ip);
|
||||||
aint arg_id = ip_read_int_unsafe(¤t_ip);
|
aint arg_id = ip_read_int_unsafe(¤t_ip);
|
||||||
check_correct_var(arg_type, arg_id);
|
check_correct_var(arg_type, arg_id);
|
||||||
|
|
@ -296,7 +296,7 @@ void analyze(uint32_t mod_id) {
|
||||||
}
|
}
|
||||||
++current_stack_depth;
|
++current_stack_depth;
|
||||||
|
|
||||||
if (call_offset >= bf->code_size) {
|
if ((int)call_offset >= bf->code_size) {
|
||||||
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
|
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -322,18 +322,18 @@ void analyze(uint32_t mod_id) {
|
||||||
break;
|
break;
|
||||||
case Cmd::LINE:
|
case Cmd::LINE:
|
||||||
break;
|
break;
|
||||||
case Cmd::CALLF: {
|
// case Cmd::CALLF: {
|
||||||
// TODO: find link to real function and replace call (need to save all
|
// // TODO: find link to real function and replace call (need to save all
|
||||||
// modules in one space) <- optimization
|
// // modules in one space) <- optimization
|
||||||
|
|
||||||
ip_read_int_unsafe(¤t_ip); // function name (str)
|
// ip_read_int_unsafe(¤t_ip); // function name (str)
|
||||||
uint args_count = ip_read_int_unsafe(¤t_ip);
|
// uint args_count = ip_read_int_unsafe(¤t_ip);
|
||||||
current_stack_depth -= args_count;
|
// current_stack_depth -= args_count;
|
||||||
if (current_stack_depth < 0) {
|
// if (current_stack_depth < 0) {
|
||||||
ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
|
// ip_failure(saved_current_ip, mod_id, "not enough elements in stack");
|
||||||
}
|
// }
|
||||||
++current_stack_depth;
|
// ++current_stack_depth;
|
||||||
} break;
|
// } break;
|
||||||
case Cmd::PATT:
|
case Cmd::PATT:
|
||||||
--current_stack_depth;
|
--current_stack_depth;
|
||||||
if (l == CMD_PATT_STR) {
|
if (l == CMD_PATT_STR) {
|
||||||
|
|
@ -402,7 +402,7 @@ void analyze(uint32_t mod_id) {
|
||||||
bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL);
|
bool is_call = (cmd == Cmd::CLOSURE || cmd == Cmd::CALL);
|
||||||
|
|
||||||
uint jmp_p = ip_read_int_unsafe(¤t_ip);
|
uint jmp_p = ip_read_int_unsafe(¤t_ip);
|
||||||
if (jmp_p >= bf->code_size) {
|
if ((int)jmp_p >= bf->code_size) {
|
||||||
// NOTE: maybe also should check that > begin (?)
|
// NOTE: maybe also should check that > begin (?)
|
||||||
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
|
ip_failure(saved_current_ip, mod_id, "jump/call out of file");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -527,39 +527,39 @@ void run_mod(uint mod_id, int argc, char **argv) {
|
||||||
// maybe some metainfo should be collected
|
// maybe some metainfo should be collected
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CMD_CTRL_CALLF: { // CALLF %s %d // call external function
|
// case CMD_CTRL_CALLF: { // CALLF %s %d // call external function
|
||||||
const char *call_func_name = ip_read_string(&s.ip);
|
// const char *call_func_name = ip_read_string(&s.ip);
|
||||||
size_t args_count = ip_read_int(&s.ip); // args count
|
// size_t args_count = ip_read_int(&s.ip); // args count
|
||||||
|
|
||||||
if (run_stdlib_func(call_func_name, args_count)) {
|
// if (run_stdlib_func(call_func_name, args_count)) {
|
||||||
// case of stdlib function
|
// // case of stdlib function
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
|
|
||||||
if (strcmp(call_func_name, ".array") == 0) {
|
// if (strcmp(call_func_name, ".array") == 0) {
|
||||||
call_Barray(args_count, &s.ip, buffer);
|
// call_Barray(args_count, &s.ip, buffer);
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
|
|
||||||
struct ModSearchResult func = mod_search_pub_symbol(call_func_name);
|
// struct ModSearchResult func = mod_search_pub_symbol(call_func_name);
|
||||||
if (func.mod_file == NULL) {
|
// if (func.mod_file == NULL) {
|
||||||
failure("RUNTIME ERROR: external function <%s> with <%zu> args not found\n", call_func_name, args_count);
|
// failure("RUNTIME ERROR: external function <%s> with <%zu> args not found\n", call_func_name, args_count);
|
||||||
}
|
// }
|
||||||
|
|
||||||
call_happened = true;
|
// call_happened = true;
|
||||||
s.is_closure_call = false;
|
// s.is_closure_call = false;
|
||||||
s.call_ip = s.ip;
|
// s.call_ip = s.ip;
|
||||||
s.call_module_id = s.current_module_id;
|
// s.call_module_id = s.current_module_id;
|
||||||
|
|
||||||
s.current_module_id = func.mod_id;
|
// s.current_module_id = func.mod_id;
|
||||||
s.bf = func.mod_file;
|
// s.bf = func.mod_file;
|
||||||
|
|
||||||
if (func.symbol_offset >= s.bf->code_size) {
|
// if (func.symbol_offset >= s.bf->code_size) {
|
||||||
s_failure(&s, "jump out of file");
|
// s_failure(&s, "jump out of file");
|
||||||
}
|
// }
|
||||||
s.ip = s.bf->code_ptr + func.symbol_offset;
|
// s.ip = s.bf->code_ptr + func.symbol_offset;
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
|
|
||||||
default:
|
default:
|
||||||
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
|
s_failure(&s, "invalid opcode"); // %d-%d\n", h, l);
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "parser.hpp"
|
#include "parser.hpp"
|
||||||
|
|
||||||
|
|
@ -38,6 +40,8 @@ enum class ArgT {
|
||||||
// void *__start_custom_data;
|
// void *__start_custom_data;
|
||||||
// void *__stop_custom_data;
|
// void *__stop_custom_data;
|
||||||
|
|
||||||
|
//
|
||||||
|
|
||||||
// Reads a binary bytecode file by name and unpacks it
|
// Reads a binary bytecode file by name and unpacks it
|
||||||
Bytefile *read_file(const char *fname) {
|
Bytefile *read_file(const char *fname) {
|
||||||
FILE *f = fopen(fname, "rb");
|
FILE *f = fopen(fname, "rb");
|
||||||
|
|
@ -51,14 +55,15 @@ Bytefile *read_file(const char *fname) {
|
||||||
failure("read file %s: %s\n", fname, strerror(errno));
|
failure("read file %s: %s\n", fname, strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
long size = ftell(f);
|
size_t size = ftell(f);
|
||||||
|
|
||||||
// [uint] stringtab_size
|
// [uint] stringtab_size
|
||||||
// [uint] global_area_size
|
// [uint] global_area_size
|
||||||
|
// [uint] substs_area_size
|
||||||
// [uint] imports_number
|
// [uint] imports_number
|
||||||
// [uint] public_symbols_number
|
// [uint] public_symbols_number
|
||||||
// char[0] buffer
|
// char[0] buffer
|
||||||
long file_header_size = 4 * sizeof(uint) + sizeof(char[0]);
|
size_t file_header_size = 5 * sizeof(uint) + sizeof(char[0]);
|
||||||
|
|
||||||
long additional_size = sizeof(Bytefile) - file_header_size;
|
long additional_size = sizeof(Bytefile) - file_header_size;
|
||||||
file = (Bytefile *)malloc(size +
|
file = (Bytefile *)malloc(size +
|
||||||
|
|
@ -79,9 +84,10 @@ Bytefile *read_file(const char *fname) {
|
||||||
|
|
||||||
fclose(f);
|
fclose(f);
|
||||||
|
|
||||||
long imports_size = file->imports_number * sizeof(int);
|
size_t imports_size = file->imports_number * sizeof(int);
|
||||||
long public_symbols_size = file->public_symbols_number * 2 * sizeof(int);
|
size_t public_symbols_size = file->public_symbols_number * 2 * sizeof(int);
|
||||||
long strings_buffer_offset = public_symbols_size + imports_size;
|
|
||||||
|
size_t strings_buffer_offset = public_symbols_size + imports_size;
|
||||||
if (file->buffer + strings_buffer_offset >= file_end) {
|
if (file->buffer + strings_buffer_offset >= file_end) {
|
||||||
failure("public symbols are out of the file size\n");
|
failure("public symbols are out of the file size\n");
|
||||||
}
|
}
|
||||||
|
|
@ -89,11 +95,18 @@ Bytefile *read_file(const char *fname) {
|
||||||
if (file->string_ptr + file->stringtab_size > file_end) {
|
if (file->string_ptr + file->stringtab_size > file_end) {
|
||||||
failure("strings table is out of the file size\n");
|
failure("strings table is out of the file size\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t substs_buffer_offset = strings_buffer_offset + file->stringtab_size;
|
||||||
|
file->substs_ptr = file->buffer + substs_buffer_offset;
|
||||||
|
if ((char *)file->substs_ptr + file->substs_area_size > file_end) {
|
||||||
|
failure("substitutions table is out of the file size\n");
|
||||||
|
}
|
||||||
|
|
||||||
// if (file->stringtab_size > 0 &&
|
// if (file->stringtab_size > 0 &&
|
||||||
// file->string_ptr[file->stringtab_size - 1] != 0) {
|
// file->string_ptr[file->stringtab_size - 1] != 0) {
|
||||||
// failure("strings table is not zero-ended\n");
|
// failure("strings table is not zero-ended\n");
|
||||||
// }
|
// }
|
||||||
file->code_size = size - strings_buffer_offset - file->stringtab_size;
|
file->code_size = size - substs_buffer_offset - file->substs_area_size;
|
||||||
|
|
||||||
if (file->code_size < 0 || public_symbols_size < 0 ||
|
if (file->code_size < 0 || public_symbols_size < 0 ||
|
||||||
file->stringtab_size < 0) {
|
file->stringtab_size < 0) {
|
||||||
|
|
@ -109,6 +122,138 @@ Bytefile *read_file(const char *fname) {
|
||||||
return file;
|
return file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Offsets {
|
||||||
|
size_t strings;
|
||||||
|
size_t globals;
|
||||||
|
size_t code;
|
||||||
|
};
|
||||||
|
|
||||||
|
void rewrite_code_with_offsets(Bytefile *bytefile, const Offsets &offsets) {
|
||||||
|
char *ip = bytefile->code_ptr;
|
||||||
|
while (ip - bytefile->code_ptr < bytefile->code_size) {
|
||||||
|
const auto [cmd, l] = parse_command(&ip, bytefile);
|
||||||
|
|
||||||
|
char *cmd_ip = ip;
|
||||||
|
|
||||||
|
switch (cmd) {
|
||||||
|
case Cmd::STRING:
|
||||||
|
ip_write_int_unsafe(cmd_ip, ip_read_int_unsafe(&ip) +
|
||||||
|
offsets.strings); // TODO: check
|
||||||
|
break;
|
||||||
|
case Cmd::JMP:
|
||||||
|
case Cmd::CJMPnz:
|
||||||
|
case Cmd::CJMPz:
|
||||||
|
case Cmd::CLOSURE:
|
||||||
|
case Cmd::CALL:
|
||||||
|
ip_write_int_unsafe(cmd_ip, ip_read_int_unsafe(&ip) +
|
||||||
|
offsets.code); // TODO: check
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void subst_in_code(Bytefile *bytefile,
|
||||||
|
const std::unordered_map<std::string, size_t> &publics) {
|
||||||
|
for (size_t i = 0; i < bytefile->substs_area_size;) {
|
||||||
|
if (i + sizeof(uint32_t) >= bytefile->substs_area_size) {
|
||||||
|
failure("substitution %zu offset is out of area", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t offset = *(uint32_t *)(bytefile->substs_ptr + i);
|
||||||
|
i += sizeof(uint32_t);
|
||||||
|
const char *name = bytefile->substs_ptr + i;
|
||||||
|
|
||||||
|
i += strlen(name);
|
||||||
|
|
||||||
|
if (i > bytefile->substs_area_size) {
|
||||||
|
failure("substitution %zu name is out of area", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto it = publics.find(name);
|
||||||
|
if (it == publics.end()) {
|
||||||
|
failure("public name for substitution is not found: %s", name);
|
||||||
|
}
|
||||||
|
|
||||||
|
*(uint32_t *)(bytefile->code_ptr + offset) = it->second;
|
||||||
|
// TODO: check: +4 to match ?
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Offsets calc_merge_sizes(const std::vector<Bytefile *> &bytefiles) {
|
||||||
|
Offsets sizes{.strings = 0, .globals = 0, .code = 0};
|
||||||
|
for (size_t i = 0; i < bytefiles.size(); ++i) {
|
||||||
|
sizes.strings += bytefiles[i]->stringtab_size;
|
||||||
|
sizes.strings += bytefiles[i]->global_area_size;
|
||||||
|
sizes.strings += bytefiles[i]->code_size;
|
||||||
|
}
|
||||||
|
return sizes;
|
||||||
|
}
|
||||||
|
|
||||||
|
Bytefile *merge_files(std::vector<Bytefile *> &&bytefiles) {
|
||||||
|
Offsets sizes = calc_merge_sizes(bytefiles);
|
||||||
|
Bytefile *result = (Bytefile *)malloc(sizeof(Bytefile) + sizes.strings +
|
||||||
|
sizes.code); // globals - on stack
|
||||||
|
|
||||||
|
// collect publics
|
||||||
|
std::unordered_map<std::string, size_t> publics;
|
||||||
|
std::vector<size_t> main_offsets;
|
||||||
|
{
|
||||||
|
size_t code_offset = 0;
|
||||||
|
for (size_t i = 0; i < bytefiles.size(); ++i) {
|
||||||
|
for (size_t j = 0; j < bytefiles[i]->public_symbols_number; ++j) {
|
||||||
|
const char *name = get_public_name_unsafe(bytefiles[i], j);
|
||||||
|
size_t offset =
|
||||||
|
get_public_name_offset_unsafe(bytefiles[i], j) + code_offset;
|
||||||
|
|
||||||
|
if (strcmp(name, "main") == 0) {
|
||||||
|
main_offsets.push_back(offset);
|
||||||
|
} else if (!publics.insert({name, offset}).second) {
|
||||||
|
failure("public name found more then once: %s", name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
code_offset += bytefiles[i]->code_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// init result
|
||||||
|
result->code_size = sizes.code;
|
||||||
|
result->stringtab_size = sizes.strings;
|
||||||
|
result->global_area_size = sizes.globals;
|
||||||
|
result->substs_area_size = 0;
|
||||||
|
result->imports_number = 0;
|
||||||
|
result->public_symbols_number = 0;
|
||||||
|
|
||||||
|
result->main_offset = 0; // TODO: save al main offsets in some way (?)
|
||||||
|
result->string_ptr = result->buffer;
|
||||||
|
result->imports_ptr = NULL;
|
||||||
|
result->public_ptr = NULL;
|
||||||
|
result->code_ptr = result->string_ptr + result->stringtab_size;
|
||||||
|
result->global_ptr = NULL;
|
||||||
|
result->substs_ptr = NULL;
|
||||||
|
|
||||||
|
// update & merge code segments
|
||||||
|
Offsets offsets{.strings = 0, .globals = 0, .code = 0};
|
||||||
|
for (size_t i = 0; i < bytefiles.size(); ++i) {
|
||||||
|
rewrite_code_with_offsets(bytefiles[i], offsets);
|
||||||
|
subst_in_code(bytefiles[i], publics);
|
||||||
|
|
||||||
|
// copy data to merged file
|
||||||
|
memcpy(result->string_ptr + offsets.strings, bytefiles[i]->string_ptr,
|
||||||
|
bytefiles[i]->stringtab_size);
|
||||||
|
memcpy(result->code_ptr + offsets.code, bytefiles[i]->code_ptr,
|
||||||
|
bytefiles[i]->code_size);
|
||||||
|
|
||||||
|
// update offsets
|
||||||
|
offsets.strings += bytefiles[i]->stringtab_size;
|
||||||
|
offsets.globals += bytefiles[i]->global_area_size;
|
||||||
|
offsets.code += bytefiles[i]->code_size;
|
||||||
|
free(bytefiles[i]);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
const char *command_name(Cmd cmd, int8_t l) {
|
const char *command_name(Cmd cmd, int8_t l) {
|
||||||
static const char *const ops[] = {
|
static const char *const ops[] = {
|
||||||
#define OP_TO_STR(id, op) "BINOP:" #op,
|
#define OP_TO_STR(id, op) "BINOP:" #op,
|
||||||
|
|
@ -212,8 +357,8 @@ const char *command_name(Cmd cmd, int8_t l) {
|
||||||
return "FAIL";
|
return "FAIL";
|
||||||
case Cmd::LINE:
|
case Cmd::LINE:
|
||||||
return "LINE";
|
return "LINE";
|
||||||
case Cmd::CALLF:
|
// case Cmd::CALLF:
|
||||||
return "CALLF";
|
// return "CALLF";
|
||||||
case Cmd::PATT:
|
case Cmd::PATT:
|
||||||
if (l >= sizeof(pats) / sizeof(char *)) {
|
if (l >= sizeof(pats) / sizeof(char *)) {
|
||||||
return "_UNDEF_PATT_";
|
return "_UNDEF_PATT_";
|
||||||
|
|
@ -455,7 +600,7 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
|
||||||
|
|
||||||
case CMD_CTRL:
|
case CMD_CTRL:
|
||||||
switch (l) {
|
switch (l) {
|
||||||
case CMD_CTRL_CJMPz: // CJMPnz 0x%.8x
|
case CMD_CTRL_CJMPz: // CJMPz 0x%.8x
|
||||||
cmd = Cmd::CJMPz;
|
cmd = Cmd::CJMPz;
|
||||||
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::OFFSET>(cmd, l, ip,
|
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::OFFSET>(cmd, l, ip,
|
||||||
bf, out);
|
bf, out);
|
||||||
|
|
@ -532,11 +677,12 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
|
||||||
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
|
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip, bf,
|
||||||
out);
|
out);
|
||||||
break;
|
break;
|
||||||
case CMD_CTRL_CALLF: // CALLF %s %d
|
// NOTE: is replaced
|
||||||
cmd = Cmd::CALLF;
|
// case CMD_CTRL_CALLF: // CALLF %s %d
|
||||||
read_print_cmd_seq_opt<do_read_args, use_out, ArgT::STR, ArgT::INT>(
|
// cmd = Cmd::CALLF;
|
||||||
cmd, l, ip, bf, out);
|
// read_print_cmd_seq_opt<do_read_args, use_out, ArgT::STR, ArgT::INT>(
|
||||||
break;
|
// cmd, l, ip, bf, out);
|
||||||
|
// break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
failure("invalid opcode");
|
failure("invalid opcode");
|
||||||
|
|
@ -574,8 +720,8 @@ std::pair<Cmd, uint8_t> parse_command_impl(char **ip, const Bytefile &bf,
|
||||||
|
|
||||||
// case CMD_BUILTIN_Barray: // CALL Barray %d
|
// case CMD_BUILTIN_Barray: // CALL Barray %d
|
||||||
// cmd = Cmd::Barray;
|
// cmd = Cmd::Barray;
|
||||||
// read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l, ip,
|
// read_print_cmd_seq_opt<do_read_args, use_out, ArgT::INT>(cmd, l,
|
||||||
// bf,
|
// ip, bf,
|
||||||
// out);
|
// out);
|
||||||
// break;
|
// break;
|
||||||
|
|
||||||
|
|
@ -613,6 +759,7 @@ bool is_command_name(char *ip, const Bytefile *bf, Cmd cmd) {
|
||||||
void print_file_info(const Bytefile &bf, std::ostream &out) {
|
void print_file_info(const Bytefile &bf, std::ostream &out) {
|
||||||
out << "String table size : " << bf.stringtab_size << '\n';
|
out << "String table size : " << bf.stringtab_size << '\n';
|
||||||
out << "Global area size : " << bf.global_area_size << '\n';
|
out << "Global area size : " << bf.global_area_size << '\n';
|
||||||
|
out << "Substitutions area size : " << bf.substs_area_size << '\n';
|
||||||
out << "Number of imports : " << bf.imports_number << '\n';
|
out << "Number of imports : " << bf.imports_number << '\n';
|
||||||
out << "Number of public symbols: " << bf.public_symbols_number << '\n';
|
out << "Number of public symbols: " << bf.public_symbols_number << '\n';
|
||||||
|
|
||||||
|
|
|
||||||
25
src/SM.ml
25
src/SM.ml
|
|
@ -103,6 +103,7 @@ type prg = insn list [@@deriving gt ~options:{ show }]
|
||||||
|
|
||||||
module ByteCode = struct
|
module ByteCode = struct
|
||||||
module M = Map.Make (String)
|
module M = Map.Make (String)
|
||||||
|
module IM = Map.Make (Int)
|
||||||
module S = Set.Make (String)
|
module S = Set.Make (String)
|
||||||
|
|
||||||
module StringTab = struct
|
module StringTab = struct
|
||||||
|
|
@ -184,7 +185,7 @@ module ByteCode = struct
|
||||||
let add_public l = pubs := S.add l !pubs in
|
let add_public l = pubs := S.add l !pubs in
|
||||||
let add_import l = imports := S.add l !imports in
|
let add_import l = imports := S.add l !imports in
|
||||||
let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in
|
let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in
|
||||||
let add_func_fixup c l = func_fixups := (c, Buffer.length code, l) :: !func_fixups in
|
let add_func_fixup l = func_fixups := (Buffer.length code, l) :: !func_fixups in
|
||||||
let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char.chr x) in
|
let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char.chr x) in
|
||||||
let add_ints =
|
let add_ints =
|
||||||
List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x)
|
List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x)
|
||||||
|
|
@ -308,7 +309,7 @@ module ByteCode = struct
|
||||||
| CALL (fn, n, _) ->
|
| CALL (fn, n, _) ->
|
||||||
(add_bytes [ (5 * 16) + 6 ];
|
(add_bytes [ (5 * 16) + 6 ];
|
||||||
(* 1 = sizeof byte *)
|
(* 1 = sizeof byte *)
|
||||||
add_func_fixup (Buffer.length code - 1) fn;
|
add_func_fixup fn;
|
||||||
add_ints [ 0; n ])
|
add_ints [ 0; n ])
|
||||||
(* 0x57 s:32 n:32 *)
|
(* 0x57 s:32 n:32 *)
|
||||||
| TAG (s, n) ->
|
| TAG (s, n) ->
|
||||||
|
|
@ -337,17 +338,19 @@ module ByteCode = struct
|
||||||
failwith
|
failwith
|
||||||
(Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
|
(Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)
|
||||||
in
|
in
|
||||||
|
let substs = Stdlib.ref [] in
|
||||||
|
let add_subst c l = substs := (c, l) :: !substs in
|
||||||
List.iter insn_code insns;
|
List.iter insn_code insns;
|
||||||
add_bytes [ 255 ];
|
add_bytes [ 255 ];
|
||||||
let code = Buffer.to_bytes code in
|
let code = Buffer.to_bytes code in
|
||||||
List.iter
|
List.iter
|
||||||
(fun (cmd_ofs, addr_ofs, l) ->
|
(fun (addr_ofs, l) ->
|
||||||
Bytes.set_int32_ne code addr_ofs
|
Bytes.set_int32_ne code addr_ofs
|
||||||
(Int32.of_int
|
(Int32.of_int
|
||||||
@@
|
@@
|
||||||
try M.find l !lmap
|
try M.find l !lmap
|
||||||
with Not_found ->
|
with Not_found ->
|
||||||
Bytes.set_int8 code cmd_ofs ((5 * 16) + 11); StringTab.add st l))
|
add_subst addr_ofs l; 0))
|
||||||
!func_fixups;
|
!func_fixups;
|
||||||
List.iter
|
List.iter
|
||||||
(fun (ofs, l) ->
|
(fun (ofs, l) ->
|
||||||
|
|
@ -373,10 +376,17 @@ module ByteCode = struct
|
||||||
failwith (Printf.sprintf "ERROR: undefined label '%s'" l) ))
|
failwith (Printf.sprintf "ERROR: undefined label '%s'" l) ))
|
||||||
@@ S.elements !pubs
|
@@ S.elements !pubs
|
||||||
in
|
in
|
||||||
let st = Buffer.to_bytes st.StringTab.buffer in
|
let str_table = Buffer.to_bytes st.StringTab.buffer in
|
||||||
|
let subst_table = Buffer.create 1024 in
|
||||||
let file = Buffer.create 1024 in
|
let file = Buffer.create 1024 in
|
||||||
Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st);
|
List.iter
|
||||||
|
(fun (c, l) ->
|
||||||
|
Buffer.add_int32_ne subst_table @@ Int32.of_int c;
|
||||||
|
Buffer.add_string subst_table l)
|
||||||
|
!substs;
|
||||||
|
Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length str_table);
|
||||||
Buffer.add_int32_ne file (Int32.of_int @@ !glob_count);
|
Buffer.add_int32_ne file (Int32.of_int @@ !glob_count);
|
||||||
|
Buffer.add_int32_ne file (Int32.of_int @@ Buffer.length subst_table);
|
||||||
Buffer.add_int32_ne file (Int32.of_int @@ List.length imports);
|
Buffer.add_int32_ne file (Int32.of_int @@ List.length imports);
|
||||||
Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs);
|
Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs);
|
||||||
List.iter
|
List.iter
|
||||||
|
|
@ -388,7 +398,8 @@ module ByteCode = struct
|
||||||
Buffer.add_int32_ne file n;
|
Buffer.add_int32_ne file n;
|
||||||
Buffer.add_int32_ne file o)
|
Buffer.add_int32_ne file o)
|
||||||
pubs;
|
pubs;
|
||||||
Buffer.add_bytes file st;
|
Buffer.add_bytes file str_table;
|
||||||
|
Buffer.add_bytes file @@ Buffer.to_bytes subst_table;
|
||||||
Buffer.add_bytes file code;
|
Buffer.add_bytes file code;
|
||||||
let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in
|
let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in
|
||||||
Buffer.output_buffer f file;
|
Buffer.output_buffer f file;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue