sm printer enchantments, compiler fixes (one test passed, no file prefix & impords asm generation yet)

This commit is contained in:
ProgramSnail 2025-06-09 16:18:01 +03:00
parent 13ea4c7968
commit 5c30116de3
5 changed files with 540 additions and 404 deletions

View file

@ -1,9 +1,9 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# xmake build xmake build
# cp "build/linux/x86_64/release/byterun" byterun.exe cp "build/linux/x86_64/release/byterun" byterun.exe
dune build > /dev/null # dune build > /dev/null
prefix="../regression/" prefix="../regression/"
suffix=".lama" suffix=".lama"

View file

@ -36,8 +36,11 @@ int main(int argc, char **argv) {
return 0; return 0;
} else if (strcmp(argv[1], "-s") == 0) { } else if (strcmp(argv[1], "-s") == 0) {
std::ifstream file(argv[2]); std::ifstream file(argv[2]);
auto instrs = compile_to_code(parse_sm(file)); // std::cout << "-- parse\n";
for (auto &instr : instrs) { auto instrs = parse_sm(file);
// std::cout << "-- compile\n";
auto asm_instrs = compile_to_code(instrs);
for (auto &instr : asm_instrs) {
std::cout << instr << "\n"; std::cout << instr << "\n";
} }
return 0; return 0;

View file

@ -159,7 +159,7 @@ T from_number(int n) {
std::string str_of_int = std::to_string(n); std::string str_of_int = std::to_string(n);
std::string name64 = std::format("%r{}", str_of_int); std::string name64 = std::format("%r{}", str_of_int);
std::string name8 = std::format("%r{}b", std::move(str_of_int)); std::string name8 = std::format("%r{}b", std::move(str_of_int));
return {.name = name8, .reg = {.name8 = name8, .name64 = name64}}; return {.name = name64, .reg = {.name8 = name8, .name64 = name64}};
} }
T of_8bit(const T &r) { return {.name = r.reg.name8, .reg = r.reg}; } T of_8bit(const T &r) { return {.name = r.reg.name8, .reg = r.reg}; }
T of_64bit(const T &r) { return {.name = r.reg.name64, .reg = r.reg}; } T of_64bit(const T &r) { return {.name = r.reg.name64, .reg = r.reg}; }
@ -565,7 +565,7 @@ public:
State next_state(const State &v) const { State next_state(const State &v) const {
return std::visit(utils::multifunc{ return std::visit(utils::multifunc{
[](const S &x) -> State { return {S(x.n)}; }, [](const S &x) -> State { return {S(x.n + 1)}; },
[this](const R &x) -> State { [this](const R &x) -> State {
if (x.n + 1 >= registers.size()) { if (x.n + 1 >= registers.size()) {
return {S(0)}; return {S(0)};
@ -644,8 +644,9 @@ public:
} }
Location pop() { Location pop() {
const auto loc = location();
state = previous_state(state); state = previous_state(state);
return location(); return loc;
} }
Location peek() const { return location(); } Location peek() const { return location(); }
@ -812,13 +813,11 @@ public:
void set_stack(std::string const &l) { stackmap.insert({l, stack}); } void set_stack(std::string const &l) { stackmap.insert({l, stack}); }
/* retrieves a stack for a label */ /* retrieves a stack for a label */
std::optional<SymbolicStack *> retrieve_stack(std::string const &l) { void retrieve_stack(std::string const &l) {
auto it = stackmap.find(l); auto it = stackmap.find(l);
if (it != stackmap.end()) { if (it != stackmap.end()) {
return &it->second; stack = it->second;
} }
return std::nullopt;
} }
/* checks if there is a stack for a label */ /* checks if there is a stack for a label */
@ -905,8 +904,8 @@ public:
static uint64_t hash(const std::string &tag) { static uint64_t hash(const std::string &tag) {
assert(!tag.empty()); assert(!tag.empty());
uint64_t h = 0; uint64_t h = 0;
for (size_t i = 0; i < std::min((tag.size() - 1), 9lu); ++i) { for (size_t i = 0; i < std::min(tag.size(), 9lu); ++i) {
h = (h << 6) | chars[tag[i]]; h = (h << 6) | chars.find(tag[i]);
} }
return h; return h;
} }
@ -956,11 +955,11 @@ public:
}; };
const auto y = escape(x); const auto y = escape(x);
const auto it = stringm.find(y); auto it = stringm.find(y);
if (it == stringm.end()) { if (it == stringm.end()) {
const auto name = std::format("string_{}", scount); const auto name = std::format("string_{}", scount);
stringm.insert({y, name}); it = stringm.insert({y, name}).first;
++scount; ++scount;
} }
return M{DataKind::D, Externality::I, Addressed::A, it->second}; return M{DataKind::D, Externality::I, Addressed::A, it->second};
@ -1025,9 +1024,6 @@ public:
/* generate a line number information for current function */ /* generate a line number information for current function */
std::vector<Instr> gen_line(size_t line) { std::vector<Instr> gen_line(size_t line) {
const std::string lab = std::format(".L{}", nlabels); const std::string lab = std::format(".L{}", nlabels);
++nlabels;
first_line = false;
std::vector<Instr> code; std::vector<Instr> code;
if (do_opt_stabs()) { if (do_opt_stabs()) {
if (fname == "main") { if (fname == "main") {
@ -1042,6 +1038,10 @@ public:
} }
} }
code.push_back(Label{lab}); code.push_back(Label{lab});
++nlabels;
first_line = false;
return code; return code;
} }
@ -1067,10 +1067,11 @@ std::string to_code(const Env &env, const Opnd &opnd) {
: std::format("-{}(%rbp)", offset); : std::format("-{}(%rbp)", offset);
}, },
[&env](const Opnd::M &x) { [&env](const Opnd::M &x) {
return std::format( // NOTE: anotner way to show
"M {} {} {} {}", x.kind == DataKind::F ? "Function" : "Data", // return std::format(
x.ext == Externality::I ? "Internal" : "External", // "M {} {} {} {}", x.kind == DataKind::F ? "Function" : "Data",
x.addr == Addressed::A ? "Address" : "Value", x.name); // x.ext == Externality::I ? "Internal" : "External",
// x.addr == Addressed::A ? "Address" : "Value", x.name);
if (x.ext == Externality::I || x.kind == DataKind::F) { if (x.ext == Externality::I || x.kind == DataKind::F) {
return std::format("{}(%rip)", x.name); return std::format("{}(%rip)", x.name);
} }
@ -1494,7 +1495,7 @@ std::pair<size_t, std::vector<Instr>> setup_arguments(Env &env, size_t nargs) {
std::optional<Instr> setup_closure(Env &env, std::optional<Instr> setup_closure(Env &env,
const std::optional<std::string> &fname) { const std::optional<std::string> &fname) {
if (!fname) { if (fname) {
return {}; return {};
} }
const auto closure = env.pop(); const auto closure = env.pop();
@ -1629,7 +1630,7 @@ std::vector<Instr> compile_call(Env &env,
size_t nargs, bool tail) { size_t nargs, bool tail) {
std::optional<std::string> fname; std::optional<std::string> fname;
if (fname_in) { if (fname_in) {
fname = (*fname_in)[0] == '.' ? utils::labeled_builtin(fname->substr(1)) fname = (*fname_in)[0] == '.' ? utils::labeled_builtin(fname_in->substr(1))
: std::string{*fname_in}; : std::string{*fname_in};
} }
@ -1695,9 +1696,18 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
const SMInstr &instr) { const SMInstr &instr) {
using namespace utils::compile; using namespace utils::compile;
const std::string stack_state = env.mode.is_debug ? env.print_stack() : ""; auto instr_str = print_sm(instr);
std::vector<Instr> debug_info;
if (env.mode.is_debug) {
debug_info.push_back(Meta{"# " + instr_str});
debug_info.push_back(Meta{"# " + env.print_stack()});
} else {
debug_info.push_back(Meta{"# " + instr_str});
}
if (env.is_barrier()) { if (env.is_barrier()) {
return std::visit( // return utils::concat(
std::move(debug_info),
std::visit( //
utils::multifunc{ utils::multifunc{
// //
[&env](const SMInstr::LABEL &x) -> std::vector<Instr> { [&env](const SMInstr::LABEL &x) -> std::vector<Instr> {
@ -1718,9 +1728,11 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
}, },
[](const auto &) -> std::vector<Instr> { return {}; }, [](const auto &) -> std::vector<Instr> { return {}; },
}, },
*instr); *instr));
} else { } else {
return std::visit( // return utils::concat(
std::move(debug_info),
std::visit( //
utils::multifunc{ utils::multifunc{
// //
[&env](const SMInstr::PUBLIC &x) [&env](const SMInstr::PUBLIC &x)
@ -1741,7 +1753,8 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
// NOTE: probably will change for bytecode cmd // NOTE: probably will change for bytecode cmd
const Externality ext = const Externality ext =
env.is_external(x.name) ? Externality::E : Externality::I; env.is_external(x.name) ? Externality::E : Externality::I;
const auto address = M{DataKind::F, ext, Addressed::A, x.name}; const auto address =
M{DataKind::F, ext, Addressed::A, x.name};
const auto l = env.allocate(); const auto l = env.allocate();
std::vector<Instr> result; std::vector<Instr> result;
@ -1753,9 +1766,10 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
utils::concat(std::move(result), std::move(mov_result)); utils::concat(std::move(result), std::move(mov_result));
} }
std::reverse(result.begin(), result.end()); std::reverse(result.begin(), result.end());
return utils::concat( return utils::concat(std::move(result), mov(address, l),
std::move(result), mov(address, l), compile_call(env, ".closure",
compile_call(env, ".closure", 1 + x.closure.size(), false)); 1 + x.closure.size(),
false));
}, },
[&env](const SMInstr::CONST &x) -> std::vector<Instr> { [&env](const SMInstr::CONST &x) -> std::vector<Instr> {
const auto s = env.allocate(); const auto s = env.allocate();
@ -1779,7 +1793,7 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
return s.is<S>() || s.is<M>() return s.is<S>() || s.is<M>()
? std::vector<Instr>{Mov{s, rax}, ? std::vector<Instr>{Mov{s, rax},
Mov{rax, env.loc(x.v)}} Mov{rax, env.loc(x.v)}}
: std::vector<Instr>{Mov{s, env.loc(x.v)}}; : std::vector<Instr>{Mov{env.loc(x.v), s}};
}, },
[&env](const SMInstr::ST &x) -> std::vector<Instr> { [&env](const SMInstr::ST &x) -> std::vector<Instr> {
env.register_variable(x.v); env.register_variable(x.v);
@ -1826,7 +1840,8 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
[&env, &imports, [&env, &imports,
&cmd](const SMInstr::BEGIN &x) -> std::vector<Instr> { &cmd](const SMInstr::BEGIN &x) -> std::vector<Instr> {
{ {
const bool is_safepoint = safepoint_functions.count(x.f) != 0; const bool is_safepoint =
safepoint_functions.count(x.f) != 0;
const bool is_vararg = vararg_functions.count(x.f) != 0; const bool is_vararg = vararg_functions.count(x.f) != 0;
if (is_safepoint && is_vararg) { if (is_safepoint && is_vararg) {
failure("Function name %s is reserved for built-in", failure("Function name %s is reserved for built-in",
@ -1834,7 +1849,8 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
} }
} }
const std::string name = (x.f[0] == 'L' ? x.f.substr(1) : x.f); const std::string name =
(x.f[0] == 'L' ? x.f.substr(1) : x.f);
const auto stabs = [&env, &x, &name]() -> std::vector<Instr> { const auto stabs = [&env, &x, &name]() -> std::vector<Instr> {
if (!env.do_opt_stabs()) { if (!env.do_opt_stabs()) {
@ -1846,8 +1862,8 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
std::vector<Instr> func = { std::vector<Instr> func = {
Meta{std::format("\t.type {}, @function", name)}, Meta{std::format("\t.type {}, @function", name)},
Meta{ Meta{std::format("\t.stabs \"{}:F1\",36,0,0,{}", name,
std::format("\t.stabs \"{}:F1\",36,0,0,{}", name, x.f)}, x.f)},
}; };
std::vector<Instr> arguments = std::vector<Instr> arguments =
@ -1870,7 +1886,7 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
return {}; return {};
} }
auto argc_correct_label = x.f + "_argc_correct"; std::string argc_correct_label = x.f + "_argc_correct";
auto pat_addr = // TODO: check is that is the same string to auto pat_addr = // TODO: check is that is the same string to
// ocaml version one // ocaml version one
env.register_string( env.register_string(
@ -1901,6 +1917,7 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
env.assert_empty_stack(); env.assert_empty_stack();
const bool has_closure = !x.closure.empty(); const bool has_closure = !x.closure.empty();
env.enter(x.f, x.nargs, x.nlocals, has_closure); env.enter(x.f, x.nargs, x.nlocals, has_closure);
return utils::concat( return utils::concat(
std::move(stabs_code), std::move(stabs_code),
Instr{Meta{"\t.cfi_startproc"}}, Instr{Meta{"\t.cfi_startproc"}},
@ -1957,7 +1974,7 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
std::vector<std::string>{imports}, std::vector<std::string>{imports},
[](const auto &i) { return i != "Std"; }), [](const auto &i) { return i != "Std"; }),
[](const auto &i) -> Instr { [](const auto &i) -> Instr {
return Call{std::format("init {}", i)}; return Call{std::format("init{}", i)};
}) : std::vector<Instr>{}), }) : std::vector<Instr>{}),
std::move(check_argc_code) std::move(check_argc_code)
); );
@ -1983,19 +2000,21 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
? Binop{Opr::XOR, rax, rax} ? Binop{Opr::XOR, rax, rax}
: std::optional<Instr>{}}, : std::optional<Instr>{}},
std::vector<Instr>{ std::vector<Instr>{
Meta{"\t.cfi_restore\t5"}, Meta{"\t.cfi_restore\trbp"},
Meta{"\t.cfi_def_cfa\t4, 4"}, Meta{"\t.cfi_def_cfa\t4, 4"},
Ret{}, Ret{},
Meta{"\t.cfi_endproc"}, Meta{"\t.cfi_endproc"},
Meta{ Meta{/* Allocate space for the symbolic stack
/* Allocate space for the symbolic stack Add extra word if needed to preserve alignment
Add extra word if needed to preserve alignment */ */
std::format( std::format(
"\t.set\t{},\t{}", env.prefixed(env.lsize()), "\t.set\t{},\t{}", env.prefixed(env.lsize()),
(env.get_allocated() % 2 == 0 (env.get_allocated() % 2 == 0
? (env.get_allocated() * word_size) ? (env.get_allocated() * word_size)
: ((env.get_allocated() + 1) * word_size)))}, : ((env.get_allocated() + 1) *
Meta{std::format("\t.set\t{},\t{}", word_size)))},
Meta{std::format(
"\t.set\t{},\t{}",
env.prefixed(env.get_allocated_size()), env.prefixed(env.get_allocated_size()),
env.get_allocated())}, env.get_allocated())},
}, },
@ -2104,7 +2123,7 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
utils::unreachable(); utils::unreachable();
}, },
}, },
*instr); *instr));
} }
} }
@ -2120,8 +2139,8 @@ std::vector<Instr> compile(const Options &cmd, Env &env,
} }
std::vector<std::string> compile_to_code(const std::vector<SMInstr> &code) { std::vector<std::string> compile_to_code(const std::vector<SMInstr> &code) {
Options cmd{.topname = "byterun", .filename = "byterun"}; // TODO TMP Options cmd{.topname = "main", .filename = "byterun"}; // TODO TMP
Env env(Mode{.is_debug = true, .target_os = OS::LINUX}); Env env(Mode{.is_debug = false, .target_os = OS::LINUX});
auto asm_code = compile(cmd, env, {/*imports (TODO TMP)*/}, code); auto asm_code = compile(cmd, env, {/*imports (TODO TMP)*/}, code);
std::vector<std::string> res; std::vector<std::string> res;

View file

@ -3,9 +3,11 @@
#include <algorithm> #include <algorithm>
#include <any> #include <any>
#include <charconv> #include <charconv>
#include <format>
#include <functional> #include <functional>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <sstream>
#include <unordered_map> #include <unordered_map>
using Result = utils::Result<SMInstr, std::string>; using Result = utils::Result<SMInstr, std::string>;
@ -21,7 +23,9 @@ std::vector<SMInstr> parse_sm(std::istream &in) {
std::string instr_str; std::string instr_str;
std::getline(in, instr_str); std::getline(in, instr_str);
#ifdef DEBUG
std::cout << "line: <" << instr_str << ">\n"; std::cout << "line: <" << instr_str << ">\n";
#endif
if (instr_str.empty()) { if (instr_str.empty()) {
continue; continue;
} }
@ -62,7 +66,9 @@ struct ParsingResult {
// //
std::string_view trim(std::string_view str) { std::string_view trim(std::string_view str) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
auto begin = str.begin(); auto begin = str.begin();
for (; begin != str.end() && *begin == ' '; ++begin) { for (; begin != str.end() && *begin == ' '; ++begin) {
} }
@ -75,19 +81,25 @@ std::string_view trim(std::string_view str) {
} }
std::string_view substr_to(std::string_view line, size_t &pos, char to) { std::string_view substr_to(std::string_view line, size_t &pos, char to) {
#ifdef DEBUG
std::cout << __func__ << " with " << line.substr(pos) << '\n'; std::cout << __func__ << " with " << line.substr(pos) << '\n';
#endif
auto offset = line.find(to, pos); auto offset = line.find(to, pos);
if (offset == std::string::npos) { if (offset == std::string::npos) {
#ifdef DEBUG
std::cout << "value \"\"\n"; std::cout << "value \"\"\n";
#endif
return ""; return "";
}; };
std::string_view result = line.substr(pos, offset); std::string_view result = line.substr(pos, offset);
pos += offset + 1; pos += offset + 1;
#ifdef DEBUG
std::cout << "value " << result << "\n"; std::cout << "value " << result << "\n";
#endif
return result; return result;
} }
@ -99,23 +111,31 @@ template <typename T> using Matches = std::vector<std::pair<std::string, T>>;
// not required here) // not required here)
template <typename T> template <typename T>
ParsingResult prefix_matcher(std::string_view s, const Matches<T> &values) { ParsingResult prefix_matcher(std::string_view s, const Matches<T> &values) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
for (auto &value : values) { for (auto &value : values) {
if (s.substr(0, value.first.size()) == value.first) { if (s.substr(0, value.first.size()) == value.first) {
return {value.second, s.substr(value.first.size())}; return {value.second, s.substr(value.first.size())};
} }
} }
#ifdef DEBUG
std::cout << "can't parse prefix from " << s << '\n'; std::cout << "can't parse prefix from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
ParsingResult parse_any_val(std::string_view s); ParsingResult parse_any_val(std::string_view s);
ParsingResult parse_str(std::string_view s) { ParsingResult parse_str(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
if (s.size() < 2 || s.front() != '"') { if (s.size() < 2 || s.front() != '"') {
#ifdef DEBUG
std::cout << "can't parse string from " << s << '\n'; std::cout << "can't parse string from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
@ -135,13 +155,17 @@ ParsingResult parse_str(std::string_view s) {
} }
ParsingResult parse_int(std::string_view s) { ParsingResult parse_int(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
int value = 0; int value = 0;
auto res = std::from_chars(s.data(), s.data() + s.size(), value); auto res = std::from_chars(s.data(), s.data() + s.size(), value);
if (res.ec != std::errc{}) { if (res.ec != std::errc{}) {
#ifdef DEBUG
std::cout << "can't parse int from " << s << '\n'; std::cout << "can't parse int from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
@ -149,13 +173,17 @@ ParsingResult parse_int(std::string_view s) {
} }
ParsingResult parse_bool(std::string_view s) { ParsingResult parse_bool(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
static const Matches<bool> bools = {{"true", true}, {"false", false}}; static const Matches<bool> bools = {{"true", true}, {"false", false}};
return prefix_matcher(s, bools); return prefix_matcher(s, bools);
} }
ParsingResult parse_opr(std::string_view s) { ParsingResult parse_opr(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
static const Matches<Opr> oprs = { static const Matches<Opr> oprs = {
{"+", Opr::ADD}, // + {"+", Opr::ADD}, // +
{"-", Opr::SUB}, // - {"-", Opr::SUB}, // -
@ -183,7 +211,9 @@ Opr any_opr_cast(std::any value) {
} }
ParsingResult parse_patt(std::string_view s) { ParsingResult parse_patt(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
static const Matches<Patt> patts = { static const Matches<Patt> patts = {
{"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED}, {"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED},
{"Array", Patt::ARRAY}, {"String", Patt::STRING}, {"Array", Patt::ARRAY}, {"String", Patt::STRING},
@ -204,7 +234,9 @@ Patt any_patt_cast(std::any value) {
// --- // ---
ParsingResult parse_var(std::string_view s) { ParsingResult parse_var(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
static const std::map<std::string, std::function<ValT(std::any &&)>, static const std::map<std::string, std::function<ValT(std::any &&)>,
std::less<>> std::less<>>
vars = { vars = {
@ -234,7 +266,9 @@ ParsingResult parse_var(std::string_view s) {
auto arg_str = std::string{substr_to(s, pos, ' ')}; auto arg_str = std::string{substr_to(s, pos, ' ')};
auto arg_it = vars.find(arg_str); auto arg_it = vars.find(arg_str);
if (arg_it == vars.end()) { if (arg_it == vars.end()) {
#ifdef DEBUG
std::cout << "can't parse var from " << s << '\n'; std::cout << "can't parse var from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
++pos; // '(' ++pos; // '('
@ -242,21 +276,27 @@ ParsingResult parse_var(std::string_view s) {
// NOTE: s_rest starts with ')' // NOTE: s_rest starts with ')'
auto [id, s_rest] = parse_any_val(s.substr(pos)); auto [id, s_rest] = parse_any_val(s.substr(pos));
if (not id.has_value()) { if (not id.has_value()) {
#ifdef DEBUG
std::cout << "any val: can't parse int from " << s << '\n'; std::cout << "any val: can't parse int from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
try { try {
return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')' return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')'
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
#ifdef DEBUG
std::cout << "bad any cast: can't parse var from " << s << '\n'; std::cout << "bad any cast: can't parse var from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
} }
// (_, _) // (_, _)
ParsingResult parse_pair(std::string_view s) { ParsingResult parse_pair(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
if (s.size() < 2 || s.front() != '(') { if (s.size() < 2 || s.front() != '(') {
return {}; return {};
} }
@ -272,9 +312,13 @@ ParsingResult parse_pair(std::string_view s) {
// [_, ..., _] // [_, ..., _]
ParsingResult parse_array(std::string_view s, char first_symbol = '[', ParsingResult parse_array(std::string_view s, char first_symbol = '[',
char last_symbol = ']') { char last_symbol = ']') {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
if (s.size() < 2 || s.front() != first_symbol) { if (s.size() < 2 || s.front() != first_symbol) {
#ifdef DEBUG
std::cout << "can't parse array from " << s << '\n'; std::cout << "can't parse array from " << s << '\n';
#endif
return {}; return {};
} }
@ -291,7 +335,9 @@ ParsingResult parse_array(std::string_view s, char first_symbol = '[',
res = parse_any_val(res.rest); res = parse_any_val(res.rest);
if (not res.value.has_value()) { if (not res.value.has_value()) {
#ifdef DEBUG
std::cout << "can't parse array elem from " << s << '\n'; std::cout << "can't parse array elem from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
@ -304,9 +350,13 @@ ParsingResult parse_array(std::string_view s, char first_symbol = '[',
// { blab="_"; elab="_" names=[...]; subs=[...]} // { blab="_"; elab="_" names=[...]; subs=[...]}
ParsingResult parse_scope(std::string_view s) { ParsingResult parse_scope(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
if (s.size() < 2 || s.front() != '{') { if (s.size() < 2 || s.front() != '{') {
#ifdef DEBUG
std::cout << "can't parse scope from " << s << '\n'; std::cout << "can't parse scope from " << s << '\n';
#endif
return {}; return {};
} }
@ -354,13 +404,17 @@ ParsingResult parse_scope(std::string_view s) {
return {scope, res.rest.substr(3)}; // skip '; }' return {scope, res.rest.substr(3)}; // skip '; }'
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
#ifdef DEBUG
std::cout << "bad any cast: can't parse int from " << s << '\n'; std::cout << "bad any cast: can't parse int from " << s << '\n';
#endif
return {{}, s}; return {{}, s};
} }
} }
ParsingResult parse_any_val(std::string_view s) { ParsingResult parse_any_val(std::string_view s) {
#ifdef DEBUG
std::cout << __func__ << " with " << s << '\n'; std::cout << __func__ << " with " << s << '\n';
#endif
ParsingResult res; ParsingResult res;
if (res = parse_str(s); res.value.has_value()) { if (res = parse_str(s); res.value.has_value()) {
@ -541,7 +595,9 @@ private:
}; };
utils::Result<SMInstr> parse_sm(const std::string &line) { utils::Result<SMInstr> parse_sm(const std::string &line) {
#ifdef DEBUG
std::cout << __func__ << '\n'; std::cout << __func__ << '\n';
#endif
std::unordered_map<std::string, SMInstr> to_instr = { std::unordered_map<std::string, SMInstr> to_instr = {
{"BINOP", SMInstr{SMInstr::BINOP{}}}, {"BINOP", SMInstr{SMInstr::BINOP{}}},
{"CONST", SMInstr{SMInstr::CONST{}}}, {"CONST", SMInstr{SMInstr::CONST{}}},
@ -624,107 +680,165 @@ utils::Result<SMInstr> parse_sm(const std::string &line) {
return instr.build(); return instr.build();
} }
// TODO: TMP: not efficient, for test purposes only // ---
const char *print_opr(Opr opr) {
static const std::vector<const char *> oprs = {
"+", // Opr::ADD, // +
"-", // Opr::SUB // -
"*", // Opr::MULT // *
"/", // Opr::DIV // /
"%", // Opr::MOD // %
"<=", // Opr::LEQ // <=
"<", // Opr::LT // <
">", // Opr::GT // >
">=", // Opr::GEQ // >=
"==", // Opr::EQ // ==
"!=", // Opr::NEQ // !=
"&&", // Opr::AND // &&
"!!", // Opr::OR // !!
}; // TODO: check format: cpp vs lama
return oprs.at(size_t(opr));
}
const char *print_patt(Patt patt) {
static const std::vector<const char *> patts = {
"Boxed", // Patt::BOXED
"UnBoxed", // Patt::UNBOXED
"Array", // Patt::ARRAY
"String", // Patt::STRING
"SExp", // Patt::SEXP
"Closure", // Patt::CLOSURE
"StrCmp", // Patt::STRCMP
}; // TODO: check
return patts.at(size_t(patt));
}
std::string print_var(const ValT &var) {
return std::visit(utils::multifunc{
[](const ValT::Global &x) -> std::string {
return std::format("Global (\"{}\")", x.s);
},
[](const ValT::Fun &x) -> std::string {
return std::format("Wun (\"{}\")", x.s);
},
[](const ValT::Local &x) -> std::string {
return std::format("Local ({})", x.n);
},
[](const ValT::Arg &x) -> std::string {
return std::format("Arg ({})", x.n);
},
[](const ValT::Access &x) -> std::string {
return std::format("Access ({})", x.n);
},
},
*var);
}
std::string print_var_array(const std::vector<ValT> &vars) {
std::stringstream result;
result << "[";
for (size_t i = 0; i < vars.size(); ++i) {
result << print_var(vars[i]);
if (i + 1 != vars.size()) {
result << ", ";
}
}
result << "]";
return result.str();
}
// TODO: number of printed information reduced for now // TODO: number of printed information reduced for now
std::string print_sm(const SMInstr &instr) { std::string print_sm(const SMInstr &instr) {
return {std::visit<std::string>( // return {std::visit<std::string>( //
utils::multifunc{ utils::multifunc{
// //
[](const SMInstr::PUBLIC &x) -> std::string { [](const SMInstr::PUBLIC &x) -> std::string {
return "PUBLIC [" + x.name + "]"; return std::format("PUBLIC (\"{}\")", x.name);
}, },
[](const SMInstr::EXTERN &x) -> std::string { [](const SMInstr::EXTERN &x) -> std::string {
return "EXTERN [" + x.name + "]"; return std::format("EXTERN (\"{}\")", x.name);
}, },
[](const SMInstr::IMPORT &x) -> std::string { [](const SMInstr::IMPORT &x) -> std::string {
return "IMPORT [" + x.name + "]"; return std::format("IMPORT (\"{}\")", x.name);
}, },
[](const SMInstr::CLOSURE &x) -> std::string { [](const SMInstr::CLOSURE &x) -> std::string {
return "CLOSURE [" + x.name + return std::format("CLOSURE (\"{}\", {})", x.name,
". args_count=" + std::to_string(x.closure.size()) + "]"; print_var_array(x.closure));
}, },
[](const SMInstr::CONST &x) -> std::string { [](const SMInstr::CONST &x) -> std::string {
return "CONST [" + std::to_string(x.n) + "]"; return std::format("CONST ({})", x.n);
}, },
[](const SMInstr::STRING &x) -> std::string { [](const SMInstr::STRING &x) -> std::string {
return "STRING [" + x.str + "]"; return std::format("STRING (\"{}\")", x.str);
}, },
[](const SMInstr::LDA &) -> std::string { [](const SMInstr::LDA &x) -> std::string {
// x.v return std::format("LDA ({})", print_var(x.v));
return "LDA";
}, },
[](const SMInstr::LD &) -> std::string { [](const SMInstr::LD &x) -> std::string {
// x.v return std::format("LD ({})", print_var(x.v));
return "LD";
}, },
[](const SMInstr::ST &) -> std::string { [](const SMInstr::ST &x) -> std::string {
// x.v return std::format("ST ({})", print_var(x.v));
return "ST";
}, },
[](const SMInstr::STA &) -> std::string { return "STA"; }, [](const SMInstr::STA &) -> std::string { return "STA"; },
[](const SMInstr::STI &) -> std::string { return "STI"; }, [](const SMInstr::STI &) -> std::string { return "STI"; },
[](const SMInstr::BINOP &) -> std::string { [](const SMInstr::BINOP &x) -> std::string {
// x.opr return std::format("BINOP (\"{}\")", print_opr(x.opr));
return "BINOP";
}, },
[](const SMInstr::LABEL &x) -> std::string { [](const SMInstr::LABEL &x) -> std::string {
return "LABEL [" + x.s + "]"; return std::format("LABEL (\"{}\")", x.s);
}, },
[](const SMInstr::FLABEL &x) -> std::string { [](const SMInstr::FLABEL &x) -> std::string {
return "FLABEL [" + x.s + "]"; return std::format("FLABEL (\"{}\")", x.s);
}, },
[](const SMInstr::SLABEL &x) -> std::string { [](const SMInstr::SLABEL &x) -> std::string {
return "SLABEL [" + x.s + "]"; return std::format("SLABEL (\"{}\")", x.s);
}, },
[](const SMInstr::JMP &x) -> std::string { [](const SMInstr::JMP &x) -> std::string {
return "JMP [" + x.l + "]"; return std::format("JMP (\"{}\")", x.l);
}, },
[](const SMInstr::CJMP &x) -> std::string { [](const SMInstr::CJMP &x) -> std::string {
return "CJMP [" + x.s + ". " + x.l + "]"; return std::format("CJMP (\"{}\", \"{}\")", x.s, x.l);
}, },
[](const SMInstr::BEGIN &) -> std::string { [](const SMInstr::BEGIN &x) -> std::string {
// x.f
// x.nargs
// x.nlocals
// x.closure // x.closure
// x.args // x.args
// x.scopes // x.scopes
return "BEGIN"; return std::format("BEGIN (\"{}\", {}, {})", x.f, x.nargs,
x.nlocals);
}, },
[](const SMInstr::END &) -> std::string { return "END"; }, [](const SMInstr::END &) -> std::string { return "END"; },
[](const SMInstr::RET &) -> std::string { return "RET"; }, [](const SMInstr::RET &) -> std::string { return "RET"; },
[](const SMInstr::ELEM &) -> std::string { return "ELEM"; }, [](const SMInstr::ELEM &) -> std::string { return "ELEM"; },
[](const SMInstr::CALL &x) -> std::string { [](const SMInstr::CALL &x) -> std::string {
// x.tail return std::format("CALL (\"{}\", {}, {})", x.fname, x.n,
return "CALL [" + x.fname + ". " + std::to_string(x.n) + "]"; x.tail ? "true" : "false");
}, },
[](const SMInstr::CALLC &x) -> std::string { [](const SMInstr::CALLC &x) -> std::string {
// x.tail return std::format("CALLC ({}, {})", x.n,
return "CALLC [" + std::to_string(x.n) + "]"; x.tail ? "true" : "false");
}, },
[](const SMInstr::SEXP &x) -> std::string { [](const SMInstr::SEXP &x) -> std::string {
return "SEXP [" + x.tag + ". " + std::to_string(x.n) + "]"; return std::format("SEXP (\"{}\", {})", x.tag, x.n);
}, },
[](const SMInstr::DROP &) -> std::string { return "DROP"; }, [](const SMInstr::DROP &) -> std::string { return "DROP"; },
[](const SMInstr::DUP &) -> std::string { return "DUP"; }, [](const SMInstr::DUP &) -> std::string { return "DUP"; },
[](const SMInstr::SWAP &) -> std::string { return "SWAP"; }, [](const SMInstr::SWAP &) -> std::string { return "SWAP"; },
[](const SMInstr::TAG &x) -> std::string { [](const SMInstr::TAG &x) -> std::string {
return "TAG [" + x.tag + ". " + std::to_string(x.n) + "]"; return std::format("TAG (\"{}\", {})", x.tag, x.n);
}, },
[](const SMInstr::ARRAY &x) -> std::string { [](const SMInstr::ARRAY &x) -> std::string {
return "ARRAY [" + std::to_string(x.n) + "]"; return std::format("ARRAY ({})", x.n);
}, },
[](const SMInstr::PATT &) -> std::string { [](const SMInstr::PATT &x) -> std::string {
// x.patt return std::format("PATT (\"{}\")", print_patt(x.patt));
return "PATT";
}, },
[](const SMInstr::LINE &x) -> std::string { [](const SMInstr::LINE &x) -> std::string {
return "LINE [" + std::to_string(x.n) + "]"; return std::format("LINE ({})", x.n);
}, },
[](const SMInstr::FAIL &x) -> std::string { [](const SMInstr::FAIL &x) -> std::string {
return "FAIL [" + std::to_string(x.line) + ". " + return std::format("FAIL ({}, {}, {})", x.line, x.col, x.val);
std::to_string(x.col) + ". " + std::to_string(x.val) + ". " +
"]";
}, },
// [](auto) -> std::string { // [](auto) -> std::string {
// throw std::bad_any_cast{}; // create another error ? // throw std::bad_any_cast{}; // create another error ?

View file

@ -1,7 +1,7 @@
-- add_rules("mode.debug", "mode.release") -- add_rules("mode.debug", "mode.release")
-- add_rules("c++.unity_build") -- add_rules("c++.unity_build")
set_languages("c++23", "c23") set_languages("c++20", "c11")
target("byterun") target("byterun")
set_kind("binary") set_kind("binary")