From 5c30116de366091c4fef4951b2f76bc6dcf1ec6f Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Mon, 9 Jun 2025 16:18:01 +0300 Subject: [PATCH] sm printer enchantments, compiler fixes (one test passed, no file prefix & impords asm generation yet) --- byterun/regression_check.sh | 6 +- byterun/src/cli.cpp | 7 +- byterun/src/compiler.cpp | 727 ++++++++++++++++++------------------ byterun/src/sm_parser.cpp | 202 +++++++--- byterun/xmake.lua | 2 +- 5 files changed, 540 insertions(+), 404 deletions(-) diff --git a/byterun/regression_check.sh b/byterun/regression_check.sh index 68c0a6699..7ef986b38 100755 --- a/byterun/regression_check.sh +++ b/byterun/regression_check.sh @@ -1,9 +1,9 @@ #!/usr/bin/env bash -# xmake build -# cp "build/linux/x86_64/release/byterun" byterun.exe +xmake build +cp "build/linux/x86_64/release/byterun" byterun.exe -dune build > /dev/null +# dune build > /dev/null prefix="../regression/" suffix=".lama" diff --git a/byterun/src/cli.cpp b/byterun/src/cli.cpp index ab2875363..24f5ce7d1 100644 --- a/byterun/src/cli.cpp +++ b/byterun/src/cli.cpp @@ -36,8 +36,11 @@ int main(int argc, char **argv) { return 0; } else if (strcmp(argv[1], "-s") == 0) { std::ifstream file(argv[2]); - auto instrs = compile_to_code(parse_sm(file)); - for (auto &instr : instrs) { + // std::cout << "-- parse\n"; + auto instrs = parse_sm(file); + // std::cout << "-- compile\n"; + auto asm_instrs = compile_to_code(instrs); + for (auto &instr : asm_instrs) { std::cout << instr << "\n"; } return 0; diff --git a/byterun/src/compiler.cpp b/byterun/src/compiler.cpp index 7ce0fc93b..bf29b1e97 100644 --- a/byterun/src/compiler.cpp +++ b/byterun/src/compiler.cpp @@ -159,7 +159,7 @@ T from_number(int n) { std::string str_of_int = std::to_string(n); std::string name64 = std::format("%r{}", str_of_int); std::string name8 = std::format("%r{}b", std::move(str_of_int)); - return {.name = name8, .reg = {.name8 = name8, .name64 = name64}}; + return {.name = name64, .reg = {.name8 = name8, .name64 = name64}}; } T of_8bit(const T &r) { return {.name = r.reg.name8, .reg = r.reg}; } T of_64bit(const T &r) { return {.name = r.reg.name64, .reg = r.reg}; } @@ -565,7 +565,7 @@ public: State next_state(const State &v) const { return std::visit(utils::multifunc{ - [](const S &x) -> State { return {S(x.n)}; }, + [](const S &x) -> State { return {S(x.n + 1)}; }, [this](const R &x) -> State { if (x.n + 1 >= registers.size()) { return {S(0)}; @@ -644,8 +644,9 @@ public: } Location pop() { + const auto loc = location(); state = previous_state(state); - return location(); + return loc; } Location peek() const { return location(); } @@ -812,13 +813,11 @@ public: void set_stack(std::string const &l) { stackmap.insert({l, stack}); } /* retrieves a stack for a label */ - std::optional retrieve_stack(std::string const &l) { + void retrieve_stack(std::string const &l) { auto it = stackmap.find(l); if (it != stackmap.end()) { - return &it->second; + stack = it->second; } - - return std::nullopt; } /* checks if there is a stack for a label */ @@ -905,8 +904,8 @@ public: static uint64_t hash(const std::string &tag) { assert(!tag.empty()); uint64_t h = 0; - for (size_t i = 0; i < std::min((tag.size() - 1), 9lu); ++i) { - h = (h << 6) | chars[tag[i]]; + for (size_t i = 0; i < std::min(tag.size(), 9lu); ++i) { + h = (h << 6) | chars.find(tag[i]); } return h; } @@ -956,11 +955,11 @@ public: }; const auto y = escape(x); - const auto it = stringm.find(y); + auto it = stringm.find(y); if (it == stringm.end()) { const auto name = std::format("string_{}", scount); - stringm.insert({y, name}); + it = stringm.insert({y, name}).first; ++scount; } return M{DataKind::D, Externality::I, Addressed::A, it->second}; @@ -1025,9 +1024,6 @@ public: /* generate a line number information for current function */ std::vector gen_line(size_t line) { const std::string lab = std::format(".L{}", nlabels); - ++nlabels; - first_line = false; - std::vector code; if (do_opt_stabs()) { if (fname == "main") { @@ -1042,6 +1038,10 @@ public: } } code.push_back(Label{lab}); + + ++nlabels; + first_line = false; + return code; } @@ -1067,10 +1067,11 @@ std::string to_code(const Env &env, const Opnd &opnd) { : std::format("-{}(%rbp)", offset); }, [&env](const Opnd::M &x) { - return std::format( - "M {} {} {} {}", x.kind == DataKind::F ? "Function" : "Data", - x.ext == Externality::I ? "Internal" : "External", - x.addr == Addressed::A ? "Address" : "Value", x.name); + // NOTE: anotner way to show + // return std::format( + // "M {} {} {} {}", x.kind == DataKind::F ? "Function" : "Data", + // x.ext == Externality::I ? "Internal" : "External", + // x.addr == Addressed::A ? "Address" : "Value", x.name); if (x.ext == Externality::I || x.kind == DataKind::F) { return std::format("{}(%rip)", x.name); } @@ -1494,7 +1495,7 @@ std::pair> setup_arguments(Env &env, size_t nargs) { std::optional setup_closure(Env &env, const std::optional &fname) { - if (!fname) { + if (fname) { return {}; } const auto closure = env.pop(); @@ -1629,7 +1630,7 @@ std::vector compile_call(Env &env, size_t nargs, bool tail) { std::optional fname; if (fname_in) { - fname = (*fname_in)[0] == '.' ? utils::labeled_builtin(fname->substr(1)) + fname = (*fname_in)[0] == '.' ? utils::labeled_builtin(fname_in->substr(1)) : std::string{*fname_in}; } @@ -1695,213 +1696,229 @@ std::vector compile(const Options &cmd, Env &env, const SMInstr &instr) { using namespace utils::compile; - const std::string stack_state = env.mode.is_debug ? env.print_stack() : ""; - if (env.is_barrier()) { - return std::visit( // - utils::multifunc{ - // - [&env](const SMInstr::LABEL &x) -> std::vector { - if (env.has_stack(x.s)) { - env.drop_barrier(); - env.retrieve_stack(x.s); - return {Label{x.s}}; - } - env.drop_stack(); - return {}; - }, - [&env](const SMInstr::FLABEL &x) -> std::vector { - env.drop_barrier(); - return {Label{x.s}}; - }, - [](const SMInstr::SLABEL &x) -> std::vector { - return {Label{x.s}}; - }, - [](const auto &) -> std::vector { return {}; }, - }, - *instr); + auto instr_str = print_sm(instr); + std::vector debug_info; + if (env.mode.is_debug) { + debug_info.push_back(Meta{"# " + instr_str}); + debug_info.push_back(Meta{"# " + env.print_stack()}); } else { - return std::visit( // - utils::multifunc{ - // - [&env](const SMInstr::PUBLIC &x) - -> std::vector { // NOTE: not required in bytecode - env.register_public(x.name); - return {}; + debug_info.push_back(Meta{"# " + instr_str}); + } + if (env.is_barrier()) { + return utils::concat( + std::move(debug_info), + std::visit( // + utils::multifunc{ + // + [&env](const SMInstr::LABEL &x) -> std::vector { + if (env.has_stack(x.s)) { + env.drop_barrier(); + env.retrieve_stack(x.s); + return {Label{x.s}}; + } + env.drop_stack(); + return {}; + }, + [&env](const SMInstr::FLABEL &x) -> std::vector { + env.drop_barrier(); + return {Label{x.s}}; + }, + [](const SMInstr::SLABEL &x) -> std::vector { + return {Label{x.s}}; + }, + [](const auto &) -> std::vector { return {}; }, }, - [&env](const SMInstr::EXTERN &x) - -> std::vector { // NOTE: not required in bytecode - env.register_extern(x.name); - return {}; - }, - [](const SMInstr::IMPORT &) - -> std::vector { // NOTE: not required in bytecode - return {}; - }, - [&env](const SMInstr::CLOSURE &x) -> std::vector { - // NOTE: probably will change for bytecode cmd - const Externality ext = - env.is_external(x.name) ? Externality::E : Externality::I; - const auto address = M{DataKind::F, ext, Addressed::A, x.name}; - const auto l = env.allocate(); + *instr)); + } else { + return utils::concat( + std::move(debug_info), + std::visit( // + utils::multifunc{ + // + [&env](const SMInstr::PUBLIC &x) + -> std::vector { // NOTE: not required in bytecode + env.register_public(x.name); + return {}; + }, + [&env](const SMInstr::EXTERN &x) + -> std::vector { // NOTE: not required in bytecode + env.register_extern(x.name); + return {}; + }, + [](const SMInstr::IMPORT &) + -> std::vector { // NOTE: not required in bytecode + return {}; + }, + [&env](const SMInstr::CLOSURE &x) -> std::vector { + // NOTE: probably will change for bytecode cmd + const Externality ext = + env.is_external(x.name) ? Externality::E : Externality::I; + const auto address = + M{DataKind::F, ext, Addressed::A, x.name}; + const auto l = env.allocate(); - std::vector result; - result.reserve(x.closure.size()); - for (const auto &c : x.closure) { - const auto cr = env.allocate(); - std::vector mov_result = mov(env.loc(c), cr); - result = - utils::concat(std::move(result), std::move(mov_result)); - } - std::reverse(result.begin(), result.end()); - return utils::concat( - std::move(result), mov(address, l), - compile_call(env, ".closure", 1 + x.closure.size(), false)); - }, - [&env](const SMInstr::CONST &x) -> std::vector { - const auto s = env.allocate(); - return {Mov{L{box(x.n)}, s}}; - }, - [&env](const SMInstr::STRING &x) -> std::vector { - const auto addr = env.register_string(x.str); - const auto l = env.allocate(); - return utils::concat(mov(addr, l), - compile_call(env, ".string", 1, false)); - }, - [&env](const SMInstr::LDA &x) -> std::vector { - env.register_variable(x.v); - const auto s = env.allocate(); - const auto s_ = env.allocate(); - return std::vector{Lea{env.loc(x.v), rax}, Mov{rax, s}, - Mov{rax, s_}}; - }, - [&env](const SMInstr::LD &x) -> std::vector { - const auto s = env.allocate(); - return s.is() || s.is() - ? std::vector{Mov{s, rax}, - Mov{rax, env.loc(x.v)}} - : std::vector{Mov{s, env.loc(x.v)}}; - }, - [&env](const SMInstr::ST &x) -> std::vector { - env.register_variable(x.v); - const auto s = env.peek(); - return s.is() || s.is() - ? std::vector{Mov{s, rax}, - Mov{rax, env.loc(x.v)}} - : std::vector{Mov{s, env.loc(x.v)}}; - }, - [&env](const SMInstr::STA &) -> std::vector { - return compile_call(env, ".sta", 3, false); - }, - [&env](const SMInstr::STI &) -> std::vector { - const auto v = env.pop(); - const auto x = env.peek(); - return x.is() || x.is() + std::vector result; + result.reserve(x.closure.size()); + for (const auto &c : x.closure) { + const auto cr = env.allocate(); + std::vector mov_result = mov(env.loc(c), cr); + result = + utils::concat(std::move(result), std::move(mov_result)); + } + std::reverse(result.begin(), result.end()); + return utils::concat(std::move(result), mov(address, l), + compile_call(env, ".closure", + 1 + x.closure.size(), + false)); + }, + [&env](const SMInstr::CONST &x) -> std::vector { + const auto s = env.allocate(); + return {Mov{L{box(x.n)}, s}}; + }, + [&env](const SMInstr::STRING &x) -> std::vector { + const auto addr = env.register_string(x.str); + const auto l = env.allocate(); + return utils::concat(mov(addr, l), + compile_call(env, ".string", 1, false)); + }, + [&env](const SMInstr::LDA &x) -> std::vector { + env.register_variable(x.v); + const auto s = env.allocate(); + const auto s_ = env.allocate(); + return std::vector{Lea{env.loc(x.v), rax}, Mov{rax, s}, + Mov{rax, s_}}; + }, + [&env](const SMInstr::LD &x) -> std::vector { + const auto s = env.allocate(); + return s.is() || s.is() + ? std::vector{Mov{s, rax}, + Mov{rax, env.loc(x.v)}} + : std::vector{Mov{env.loc(x.v), s}}; + }, + [&env](const SMInstr::ST &x) -> std::vector { + env.register_variable(x.v); + const auto s = env.peek(); + return s.is() || s.is() + ? std::vector{Mov{s, rax}, + Mov{rax, env.loc(x.v)}} + : std::vector{Mov{s, env.loc(x.v)}}; + }, + [&env](const SMInstr::STA &) -> std::vector { + return compile_call(env, ".sta", 3, false); + }, + [&env](const SMInstr::STI &) -> std::vector { + const auto v = env.pop(); + const auto x = env.peek(); + return x.is() || x.is() ? std::vector{Mov{v, rdx},Mov{x, rax},Mov{rdx, I{0, rax}},Mov{rdx, x}, } : std::vector{Mov{v, rax}, Mov{rax, I{0, x}}, Mov{rax, x}}; - }, - [&env](const SMInstr::BINOP &x) -> std::vector { - return compile_binop(env, x.opr); - }, - [](const SMInstr::LABEL &x) -> std::vector { - return {Label{x.s}}; - }, - [](const SMInstr::FLABEL &x) -> std::vector { - return {Label{x.s}}; - }, - [](const SMInstr::SLABEL &x) -> std::vector { - return {Label{x.s}}; - }, - [&env](const SMInstr::JMP &x) -> std::vector { - env.set_stack(x.l); - env.set_barrier(); - return {Jmp{x.l}}; - }, - [&env](const SMInstr::CJMP &y) -> std::vector { - const auto x = env.pop(); - env.set_stack(y.l); - return {Sar1{x}, /*!!!*/ Binop{Opr::CMP, L{0}, x}, - CJmp{y.s, y.l}}; - }, - [&env, &imports, - &cmd](const SMInstr::BEGIN &x) -> std::vector { - { - const bool is_safepoint = safepoint_functions.count(x.f) != 0; - const bool is_vararg = vararg_functions.count(x.f) != 0; - if (is_safepoint && is_vararg) { - failure("Function name %s is reserved for built-in", - x.f.c_str()); - } - } + }, + [&env](const SMInstr::BINOP &x) -> std::vector { + return compile_binop(env, x.opr); + }, + [](const SMInstr::LABEL &x) -> std::vector { + return {Label{x.s}}; + }, + [](const SMInstr::FLABEL &x) -> std::vector { + return {Label{x.s}}; + }, + [](const SMInstr::SLABEL &x) -> std::vector { + return {Label{x.s}}; + }, + [&env](const SMInstr::JMP &x) -> std::vector { + env.set_stack(x.l); + env.set_barrier(); + return {Jmp{x.l}}; + }, + [&env](const SMInstr::CJMP &y) -> std::vector { + const auto x = env.pop(); + env.set_stack(y.l); + return {Sar1{x}, /*!!!*/ Binop{Opr::CMP, L{0}, x}, + CJmp{y.s, y.l}}; + }, + [&env, &imports, + &cmd](const SMInstr::BEGIN &x) -> std::vector { + { + const bool is_safepoint = + safepoint_functions.count(x.f) != 0; + const bool is_vararg = vararg_functions.count(x.f) != 0; + if (is_safepoint && is_vararg) { + failure("Function name %s is reserved for built-in", + x.f.c_str()); + } + } - const std::string name = (x.f[0] == 'L' ? x.f.substr(1) : x.f); + const std::string name = + (x.f[0] == 'L' ? x.f.substr(1) : x.f); - const auto stabs = [&env, &x, &name]() -> std::vector { - if (!env.do_opt_stabs()) { - return {}; - } - if (x.f == "main") { - return {Meta{"\t.type main, @function"}}; - } + const auto stabs = [&env, &x, &name]() -> std::vector { + if (!env.do_opt_stabs()) { + return {}; + } + if (x.f == "main") { + return {Meta{"\t.type main, @function"}}; + } - std::vector func = { - Meta{std::format("\t.type {}, @function", name)}, - Meta{ - std::format("\t.stabs \"{}:F1\",36,0,0,{}", name, x.f)}, - }; + std::vector func = { + Meta{std::format("\t.type {}, @function", name)}, + Meta{std::format("\t.stabs \"{}:F1\",36,0,0,{}", name, + x.f)}, + }; - std::vector arguments = - {} /* OCAML_VER: TODO: stabs for function arguments */; + std::vector arguments = + {} /* OCAML_VER: TODO: stabs for function arguments */; - std::vector variables; - for (const auto &scope : x.scopes) { - utils::insert(variables, stabs_scope(env, x, scope)); - } + std::vector variables; + for (const auto &scope : x.scopes) { + utils::insert(variables, stabs_scope(env, x, scope)); + } - return utils::concat(std::move(func), std::move(arguments), - std::move(variables)); - }; + return utils::concat(std::move(func), std::move(arguments), + std::move(variables)); + }; - auto stabs_code = stabs(); + auto stabs_code = stabs(); - const auto check_argc = [&env, &cmd, &x, - &name]() -> std::vector { - if (x.f == cmd.topname) { - return {}; - } + const auto check_argc = [&env, &cmd, &x, + &name]() -> std::vector { + if (x.f == cmd.topname) { + return {}; + } - auto argc_correct_label = x.f + "_argc_correct"; - auto pat_addr = // TODO: check is that is the same string to - // ocaml version one - env.register_string( - "Function %s called with incorrect arguments count. \ + std::string argc_correct_label = x.f + "_argc_correct"; + auto pat_addr = // TODO: check is that is the same string to + // ocaml version one + env.register_string( + "Function %s called with incorrect arguments count. \ Expected: %d. Actual: %d\\n"); - auto name_addr = env.register_string(name); - const auto pat_loc = env.allocate(); - const auto name_loc = env.allocate(); - const auto expected_loc = env.allocate(); - const auto actual_loc = env.allocate(); - std::vector fail_call = - compile_call(env, "failure", 4, false); + auto name_addr = env.register_string(name); + const auto pat_loc = env.allocate(); + const auto name_loc = env.allocate(); + const auto expected_loc = env.allocate(); + const auto actual_loc = env.allocate(); + std::vector fail_call = + compile_call(env, "failure", 4, false); - env.pop(); - return utils::concat( - std::vector{ - Meta{"# Check arguments count"}, - Binop{Opr::CMP, L{x.nargs}, r11}, - CJmp{"e", argc_correct_label}, - Mov{r11, actual_loc}, - Mov{L{x.nargs}, expected_loc}, - Mov{name_addr, name_loc}, - Mov{pat_addr, pat_loc}, - }, - std::move(fail_call), Instr{Label{argc_correct_label}}); - }; - auto check_argc_code = check_argc(); - env.assert_empty_stack(); - const bool has_closure = !x.closure.empty(); - env.enter(x.f, x.nargs, x.nlocals, has_closure); - return utils::concat( + env.pop(); + return utils::concat( + std::vector{ + Meta{"# Check arguments count"}, + Binop{Opr::CMP, L{x.nargs}, r11}, + CJmp{"e", argc_correct_label}, + Mov{r11, actual_loc}, + Mov{L{x.nargs}, expected_loc}, + Mov{name_addr, name_loc}, + Mov{pat_addr, pat_loc}, + }, + std::move(fail_call), Instr{Label{argc_correct_label}}); + }; + auto check_argc_code = check_argc(); + env.assert_empty_stack(); + const bool has_closure = !x.closure.empty(); + env.enter(x.f, x.nargs, x.nlocals, has_closure); + + return utils::concat( std::move(stabs_code), Instr{Meta{"\t.cfi_startproc"}}, (x.f == cmd.topname ? std::vector{ @@ -1957,154 +1974,156 @@ std::vector compile(const Options &cmd, Env &env, std::vector{imports}, [](const auto &i) { return i != "Std"; }), [](const auto &i) -> Instr { - return Call{std::format("init {}", i)}; + return Call{std::format("init{}", i)}; }) : std::vector{}), std::move(check_argc_code) ); - }, - [&env](const SMInstr::END &) -> std::vector { - const auto x = env.pop(); - env.assert_empty_stack(); - const auto &name = env.fname; - std::optional stabs = - env.do_opt_stabs() - ? Meta{std::format("\t.size {}, .-{}", name, name)} - : std::optional{}; + }, + [&env](const SMInstr::END &) -> std::vector { + const auto x = env.pop(); + env.assert_empty_stack(); + const auto &name = env.fname; + std::optional stabs = + env.do_opt_stabs() + ? Meta{std::format("\t.size {}, .-{}", name, name)} + : std::optional{}; - std::vector result = utils::concat( - std::vector{ - Mov{x, rax}, - /*!!*/ - Label{env.epilogue()}, - Mov{rbp, rsp}, - Pop{rbp}, - }, - std::optional{name == "main" - ? Binop{Opr::XOR, rax, rax} - : std::optional{}}, - std::vector{ - Meta{"\t.cfi_restore\t5"}, - Meta{"\t.cfi_def_cfa\t4, 4"}, - Ret{}, - Meta{"\t.cfi_endproc"}, - Meta{ - /* Allocate space for the symbolic stack - Add extra word if needed to preserve alignment */ - std::format( - "\t.set\t{},\t{}", env.prefixed(env.lsize()), - (env.get_allocated() % 2 == 0 - ? (env.get_allocated() * word_size) - : ((env.get_allocated() + 1) * word_size)))}, - Meta{std::format("\t.set\t{},\t{}", - env.prefixed(env.get_allocated_size()), - env.get_allocated())}, - }, - std::move(stabs)); + std::vector result = utils::concat( + std::vector{ + Mov{x, rax}, + /*!!*/ + Label{env.epilogue()}, + Mov{rbp, rsp}, + Pop{rbp}, + }, + std::optional{name == "main" + ? Binop{Opr::XOR, rax, rax} + : std::optional{}}, + std::vector{ + Meta{"\t.cfi_restore\trbp"}, + Meta{"\t.cfi_def_cfa\t4, 4"}, + Ret{}, + Meta{"\t.cfi_endproc"}, + Meta{/* Allocate space for the symbolic stack + Add extra word if needed to preserve alignment + */ + std::format( + "\t.set\t{},\t{}", env.prefixed(env.lsize()), + (env.get_allocated() % 2 == 0 + ? (env.get_allocated() * word_size) + : ((env.get_allocated() + 1) * + word_size)))}, + Meta{std::format( + "\t.set\t{},\t{}", + env.prefixed(env.get_allocated_size()), + env.get_allocated())}, + }, + std::move(stabs)); - env.leave(); - return result; + env.leave(); + return result; + }, + [&env](const SMInstr::RET &) -> std::vector { + const auto x = env.peek(); + return {Mov{x, rax}, Jmp{env.epilogue()}}; + }, + [&env](const SMInstr::ELEM &) -> std::vector { + return compile_call(env, ".elem", 2, false); + }, + [&env](const SMInstr::CALL &x) -> std::vector { + return compile_call(env, x.fname, x.n, x.tail); // call + }, + [&env](const SMInstr::CALLC &x) -> std::vector { + return compile_call(env, {}, x.n, x.tail); // closure call + }, + [&env](const SMInstr::SEXP &x) -> std::vector { + const auto s = env.allocate(); + auto code = compile_call(env, ".sexp", x.n + 1, false); + return utils::concat(mov(L{box(env.hash(x.tag))}, s), + std::move(code)); + }, + [&env](const SMInstr::DROP &) -> std::vector { + env.pop(); + return {}; + }, + [&env](const SMInstr::DUP &) -> std::vector { + const auto x = env.peek(); + const auto s = env.allocate(); + return mov(x, s); + }, + [&env](const SMInstr::SWAP &) -> std::vector { + const auto [x, y] = env.peek2(); + return {Push{x}, Push{y}, Pop{x}, Pop{y}}; + }, + [&env](const SMInstr::TAG &x) -> std::vector { + const auto s1 = env.allocate(); + const auto s2 = env.allocate(); + auto code = compile_call(env, ".tag", 3, false); + return utils::concat(mov(L{box(env.hash(x.tag))}, s1), + mov(L{box(x.n)}, s2), std::move(code)); + }, + [&env](const SMInstr::ARRAY &x) -> std::vector { + const auto s = env.allocate(); + auto code = compile_call(env, ".array_patt", 2, false); + return utils::concat(std::vector{Mov{L{box(x.n)}, s}}, + std::move(code)); + }, + [&env](const SMInstr::PATT &x) -> std::vector { + std::string fname; + switch (x.patt) { + case Patt::STRCMP: + return compile_call(env, ".string_patt", 2, false); + case Patt::BOXED: + fname = ".boxed_patt"; + break; + case Patt::UNBOXED: + fname = ".unboxed_patt"; + break; + case Patt::ARRAY: + fname = ".array_tag_patt"; + break; + case Patt::STRING: + fname = ".string_tag_patt"; + break; + case Patt::SEXP: + fname = ".sexp_tag_patt"; + break; + case Patt::CLOSURE: + fname = ".closure_tag_patt"; + break; + default: + failure("Unexpected pattern %s: %d", __FILE__, __LINE__); + break; + } + return compile_call(env, fname, 1, false); + }, + [&env](const SMInstr::LINE &x) -> std::vector { + return env.gen_line(x.n); + }, + [&env, &cmd](const SMInstr::FAIL &x) -> std::vector { + const auto v = x.val ? env.peek() : env.pop(); + const auto msg_addr = env.register_string(cmd.filename); + const auto vr = env.allocate(); + const auto sr = env.allocate(); + const auto liner = env.allocate(); + const auto colr = env.allocate(); + auto code = compile_call(env, ".match_failure", 4, false); + env.pop(); + return utils::concat( + std::vector{ + Mov{L{static_cast(x.col)}, colr}, + Mov{L{static_cast(x.line)}, liner}, + Mov{msg_addr, sr}, + Mov{v, vr}, + }, + std::move(code)); + }, + [](const auto &) -> std::vector { + failure("invalid SM insn\n"); // TODO: better error + utils::unreachable(); + }, }, - [&env](const SMInstr::RET &) -> std::vector { - const auto x = env.peek(); - return {Mov{x, rax}, Jmp{env.epilogue()}}; - }, - [&env](const SMInstr::ELEM &) -> std::vector { - return compile_call(env, ".elem", 2, false); - }, - [&env](const SMInstr::CALL &x) -> std::vector { - return compile_call(env, x.fname, x.n, x.tail); // call - }, - [&env](const SMInstr::CALLC &x) -> std::vector { - return compile_call(env, {}, x.n, x.tail); // closure call - }, - [&env](const SMInstr::SEXP &x) -> std::vector { - const auto s = env.allocate(); - auto code = compile_call(env, ".sexp", x.n + 1, false); - return utils::concat(mov(L{box(env.hash(x.tag))}, s), - std::move(code)); - }, - [&env](const SMInstr::DROP &) -> std::vector { - env.pop(); - return {}; - }, - [&env](const SMInstr::DUP &) -> std::vector { - const auto x = env.peek(); - const auto s = env.allocate(); - return mov(x, s); - }, - [&env](const SMInstr::SWAP &) -> std::vector { - const auto [x, y] = env.peek2(); - return {Push{x}, Push{y}, Pop{x}, Pop{y}}; - }, - [&env](const SMInstr::TAG &x) -> std::vector { - const auto s1 = env.allocate(); - const auto s2 = env.allocate(); - auto code = compile_call(env, ".tag", 3, false); - return utils::concat(mov(L{box(env.hash(x.tag))}, s1), - mov(L{box(x.n)}, s2), std::move(code)); - }, - [&env](const SMInstr::ARRAY &x) -> std::vector { - const auto s = env.allocate(); - auto code = compile_call(env, ".array_patt", 2, false); - return utils::concat(std::vector{Mov{L{box(x.n)}, s}}, - std::move(code)); - }, - [&env](const SMInstr::PATT &x) -> std::vector { - std::string fname; - switch (x.patt) { - case Patt::STRCMP: - return compile_call(env, ".string_patt", 2, false); - case Patt::BOXED: - fname = ".boxed_patt"; - break; - case Patt::UNBOXED: - fname = ".unboxed_patt"; - break; - case Patt::ARRAY: - fname = ".array_tag_patt"; - break; - case Patt::STRING: - fname = ".string_tag_patt"; - break; - case Patt::SEXP: - fname = ".sexp_tag_patt"; - break; - case Patt::CLOSURE: - fname = ".closure_tag_patt"; - break; - default: - failure("Unexpected pattern %s: %d", __FILE__, __LINE__); - break; - } - return compile_call(env, fname, 1, false); - }, - [&env](const SMInstr::LINE &x) -> std::vector { - return env.gen_line(x.n); - }, - [&env, &cmd](const SMInstr::FAIL &x) -> std::vector { - const auto v = x.val ? env.peek() : env.pop(); - const auto msg_addr = env.register_string(cmd.filename); - const auto vr = env.allocate(); - const auto sr = env.allocate(); - const auto liner = env.allocate(); - const auto colr = env.allocate(); - auto code = compile_call(env, ".match_failure", 4, false); - env.pop(); - return utils::concat( - std::vector{ - Mov{L{static_cast(x.col)}, colr}, - Mov{L{static_cast(x.line)}, liner}, - Mov{msg_addr, sr}, - Mov{v, vr}, - }, - std::move(code)); - }, - [](const auto &) -> std::vector { - failure("invalid SM insn\n"); // TODO: better error - utils::unreachable(); - }, - }, - *instr); + *instr)); } } @@ -2120,8 +2139,8 @@ std::vector compile(const Options &cmd, Env &env, } std::vector compile_to_code(const std::vector &code) { - Options cmd{.topname = "byterun", .filename = "byterun"}; // TODO TMP - Env env(Mode{.is_debug = true, .target_os = OS::LINUX}); + Options cmd{.topname = "main", .filename = "byterun"}; // TODO TMP + Env env(Mode{.is_debug = false, .target_os = OS::LINUX}); auto asm_code = compile(cmd, env, {/*imports (TODO TMP)*/}, code); std::vector res; diff --git a/byterun/src/sm_parser.cpp b/byterun/src/sm_parser.cpp index 91b5219a8..94de9dfd4 100644 --- a/byterun/src/sm_parser.cpp +++ b/byterun/src/sm_parser.cpp @@ -3,9 +3,11 @@ #include #include #include +#include #include #include #include +#include #include using Result = utils::Result; @@ -21,7 +23,9 @@ std::vector parse_sm(std::istream &in) { std::string instr_str; std::getline(in, instr_str); +#ifdef DEBUG std::cout << "line: <" << instr_str << ">\n"; +#endif if (instr_str.empty()) { continue; } @@ -62,7 +66,9 @@ struct ParsingResult { // std::string_view trim(std::string_view str) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif auto begin = str.begin(); for (; begin != str.end() && *begin == ' '; ++begin) { } @@ -75,19 +81,25 @@ std::string_view trim(std::string_view str) { } std::string_view substr_to(std::string_view line, size_t &pos, char to) { +#ifdef DEBUG std::cout << __func__ << " with " << line.substr(pos) << '\n'; +#endif auto offset = line.find(to, pos); if (offset == std::string::npos) { +#ifdef DEBUG std::cout << "value \"\"\n"; +#endif return ""; }; std::string_view result = line.substr(pos, offset); pos += offset + 1; +#ifdef DEBUG std::cout << "value " << result << "\n"; +#endif return result; } @@ -99,23 +111,31 @@ template using Matches = std::vector>; // not required here) template ParsingResult prefix_matcher(std::string_view s, const Matches &values) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif for (auto &value : values) { if (s.substr(0, value.first.size()) == value.first) { return {value.second, s.substr(value.first.size())}; } } +#ifdef DEBUG std::cout << "can't parse prefix from " << s << '\n'; +#endif return {{}, s}; } ParsingResult parse_any_val(std::string_view s); ParsingResult parse_str(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif if (s.size() < 2 || s.front() != '"') { +#ifdef DEBUG std::cout << "can't parse string from " << s << '\n'; +#endif return {{}, s}; } @@ -135,13 +155,17 @@ ParsingResult parse_str(std::string_view s) { } ParsingResult parse_int(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif int value = 0; auto res = std::from_chars(s.data(), s.data() + s.size(), value); if (res.ec != std::errc{}) { +#ifdef DEBUG std::cout << "can't parse int from " << s << '\n'; +#endif return {{}, s}; } @@ -149,13 +173,17 @@ ParsingResult parse_int(std::string_view s) { } ParsingResult parse_bool(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif static const Matches bools = {{"true", true}, {"false", false}}; return prefix_matcher(s, bools); } ParsingResult parse_opr(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif static const Matches oprs = { {"+", Opr::ADD}, // + {"-", Opr::SUB}, // - @@ -183,7 +211,9 @@ Opr any_opr_cast(std::any value) { } ParsingResult parse_patt(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif static const Matches patts = { {"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED}, {"Array", Patt::ARRAY}, {"String", Patt::STRING}, @@ -204,7 +234,9 @@ Patt any_patt_cast(std::any value) { // --- ParsingResult parse_var(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif static const std::map, std::less<>> vars = { @@ -234,7 +266,9 @@ ParsingResult parse_var(std::string_view s) { auto arg_str = std::string{substr_to(s, pos, ' ')}; auto arg_it = vars.find(arg_str); if (arg_it == vars.end()) { +#ifdef DEBUG std::cout << "can't parse var from " << s << '\n'; +#endif return {{}, s}; } ++pos; // '(' @@ -242,21 +276,27 @@ ParsingResult parse_var(std::string_view s) { // NOTE: s_rest starts with ')' auto [id, s_rest] = parse_any_val(s.substr(pos)); if (not id.has_value()) { +#ifdef DEBUG std::cout << "any val: can't parse int from " << s << '\n'; +#endif return {{}, s}; } try { return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')' } catch (const std::bad_any_cast &) { +#ifdef DEBUG std::cout << "bad any cast: can't parse var from " << s << '\n'; +#endif return {{}, s}; } } // (_, _) ParsingResult parse_pair(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif if (s.size() < 2 || s.front() != '(') { return {}; } @@ -272,9 +312,13 @@ ParsingResult parse_pair(std::string_view s) { // [_, ..., _] ParsingResult parse_array(std::string_view s, char first_symbol = '[', char last_symbol = ']') { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif if (s.size() < 2 || s.front() != first_symbol) { +#ifdef DEBUG std::cout << "can't parse array from " << s << '\n'; +#endif return {}; } @@ -291,7 +335,9 @@ ParsingResult parse_array(std::string_view s, char first_symbol = '[', res = parse_any_val(res.rest); if (not res.value.has_value()) { +#ifdef DEBUG std::cout << "can't parse array elem from " << s << '\n'; +#endif return {{}, s}; } @@ -304,9 +350,13 @@ ParsingResult parse_array(std::string_view s, char first_symbol = '[', // { blab="_"; elab="_" names=[...]; subs=[...]} ParsingResult parse_scope(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif if (s.size() < 2 || s.front() != '{') { +#ifdef DEBUG std::cout << "can't parse scope from " << s << '\n'; +#endif return {}; } @@ -354,13 +404,17 @@ ParsingResult parse_scope(std::string_view s) { return {scope, res.rest.substr(3)}; // skip '; }' } catch (const std::bad_any_cast &) { +#ifdef DEBUG std::cout << "bad any cast: can't parse int from " << s << '\n'; +#endif return {{}, s}; } } ParsingResult parse_any_val(std::string_view s) { +#ifdef DEBUG std::cout << __func__ << " with " << s << '\n'; +#endif ParsingResult res; if (res = parse_str(s); res.value.has_value()) { @@ -541,7 +595,9 @@ private: }; utils::Result parse_sm(const std::string &line) { +#ifdef DEBUG std::cout << __func__ << '\n'; +#endif std::unordered_map to_instr = { {"BINOP", SMInstr{SMInstr::BINOP{}}}, {"CONST", SMInstr{SMInstr::CONST{}}}, @@ -624,107 +680,165 @@ utils::Result parse_sm(const std::string &line) { return instr.build(); } -// TODO: TMP: not efficient, for test purposes only +// --- + +const char *print_opr(Opr opr) { + static const std::vector oprs = { + "+", // Opr::ADD, // + + "-", // Opr::SUB // - + "*", // Opr::MULT // * + "/", // Opr::DIV // / + "%", // Opr::MOD // % + "<=", // Opr::LEQ // <= + "<", // Opr::LT // < + ">", // Opr::GT // > + ">=", // Opr::GEQ // >= + "==", // Opr::EQ // == + "!=", // Opr::NEQ // != + "&&", // Opr::AND // && + "!!", // Opr::OR // !! + }; // TODO: check format: cpp vs lama + return oprs.at(size_t(opr)); +} + +const char *print_patt(Patt patt) { + static const std::vector patts = { + "Boxed", // Patt::BOXED + "UnBoxed", // Patt::UNBOXED + "Array", // Patt::ARRAY + "String", // Patt::STRING + "SExp", // Patt::SEXP + "Closure", // Patt::CLOSURE + "StrCmp", // Patt::STRCMP + }; // TODO: check + return patts.at(size_t(patt)); +} + +std::string print_var(const ValT &var) { + return std::visit(utils::multifunc{ + [](const ValT::Global &x) -> std::string { + return std::format("Global (\"{}\")", x.s); + }, + [](const ValT::Fun &x) -> std::string { + return std::format("Wun (\"{}\")", x.s); + }, + [](const ValT::Local &x) -> std::string { + return std::format("Local ({})", x.n); + }, + [](const ValT::Arg &x) -> std::string { + return std::format("Arg ({})", x.n); + }, + [](const ValT::Access &x) -> std::string { + return std::format("Access ({})", x.n); + }, + }, + *var); +} + +std::string print_var_array(const std::vector &vars) { + std::stringstream result; + result << "["; + for (size_t i = 0; i < vars.size(); ++i) { + result << print_var(vars[i]); + if (i + 1 != vars.size()) { + result << ", "; + } + } + result << "]"; + return result.str(); +} + // TODO: number of printed information reduced for now std::string print_sm(const SMInstr &instr) { return {std::visit( // utils::multifunc{ // [](const SMInstr::PUBLIC &x) -> std::string { - return "PUBLIC [" + x.name + "]"; + return std::format("PUBLIC (\"{}\")", x.name); }, [](const SMInstr::EXTERN &x) -> std::string { - return "EXTERN [" + x.name + "]"; + return std::format("EXTERN (\"{}\")", x.name); }, [](const SMInstr::IMPORT &x) -> std::string { - return "IMPORT [" + x.name + "]"; + return std::format("IMPORT (\"{}\")", x.name); }, [](const SMInstr::CLOSURE &x) -> std::string { - return "CLOSURE [" + x.name + - ". args_count=" + std::to_string(x.closure.size()) + "]"; + return std::format("CLOSURE (\"{}\", {})", x.name, + print_var_array(x.closure)); }, [](const SMInstr::CONST &x) -> std::string { - return "CONST [" + std::to_string(x.n) + "]"; + return std::format("CONST ({})", x.n); }, [](const SMInstr::STRING &x) -> std::string { - return "STRING [" + x.str + "]"; + return std::format("STRING (\"{}\")", x.str); }, - [](const SMInstr::LDA &) -> std::string { - // x.v - return "LDA"; + [](const SMInstr::LDA &x) -> std::string { + return std::format("LDA ({})", print_var(x.v)); }, - [](const SMInstr::LD &) -> std::string { - // x.v - return "LD"; + [](const SMInstr::LD &x) -> std::string { + return std::format("LD ({})", print_var(x.v)); }, - [](const SMInstr::ST &) -> std::string { - // x.v - return "ST"; + [](const SMInstr::ST &x) -> std::string { + return std::format("ST ({})", print_var(x.v)); }, [](const SMInstr::STA &) -> std::string { return "STA"; }, [](const SMInstr::STI &) -> std::string { return "STI"; }, - [](const SMInstr::BINOP &) -> std::string { - // x.opr - return "BINOP"; + [](const SMInstr::BINOP &x) -> std::string { + return std::format("BINOP (\"{}\")", print_opr(x.opr)); }, [](const SMInstr::LABEL &x) -> std::string { - return "LABEL [" + x.s + "]"; + return std::format("LABEL (\"{}\")", x.s); }, [](const SMInstr::FLABEL &x) -> std::string { - return "FLABEL [" + x.s + "]"; + return std::format("FLABEL (\"{}\")", x.s); }, [](const SMInstr::SLABEL &x) -> std::string { - return "SLABEL [" + x.s + "]"; + return std::format("SLABEL (\"{}\")", x.s); }, [](const SMInstr::JMP &x) -> std::string { - return "JMP [" + x.l + "]"; + return std::format("JMP (\"{}\")", x.l); }, [](const SMInstr::CJMP &x) -> std::string { - return "CJMP [" + x.s + ". " + x.l + "]"; + return std::format("CJMP (\"{}\", \"{}\")", x.s, x.l); }, - [](const SMInstr::BEGIN &) -> std::string { - // x.f - // x.nargs - // x.nlocals + [](const SMInstr::BEGIN &x) -> std::string { // x.closure // x.args // x.scopes - return "BEGIN"; + return std::format("BEGIN (\"{}\", {}, {})", x.f, x.nargs, + x.nlocals); }, [](const SMInstr::END &) -> std::string { return "END"; }, [](const SMInstr::RET &) -> std::string { return "RET"; }, [](const SMInstr::ELEM &) -> std::string { return "ELEM"; }, [](const SMInstr::CALL &x) -> std::string { - // x.tail - return "CALL [" + x.fname + ". " + std::to_string(x.n) + "]"; + return std::format("CALL (\"{}\", {}, {})", x.fname, x.n, + x.tail ? "true" : "false"); }, [](const SMInstr::CALLC &x) -> std::string { - // x.tail - return "CALLC [" + std::to_string(x.n) + "]"; + return std::format("CALLC ({}, {})", x.n, + x.tail ? "true" : "false"); }, [](const SMInstr::SEXP &x) -> std::string { - return "SEXP [" + x.tag + ". " + std::to_string(x.n) + "]"; + return std::format("SEXP (\"{}\", {})", x.tag, x.n); }, [](const SMInstr::DROP &) -> std::string { return "DROP"; }, [](const SMInstr::DUP &) -> std::string { return "DUP"; }, [](const SMInstr::SWAP &) -> std::string { return "SWAP"; }, [](const SMInstr::TAG &x) -> std::string { - return "TAG [" + x.tag + ". " + std::to_string(x.n) + "]"; + return std::format("TAG (\"{}\", {})", x.tag, x.n); }, [](const SMInstr::ARRAY &x) -> std::string { - return "ARRAY [" + std::to_string(x.n) + "]"; + return std::format("ARRAY ({})", x.n); }, - [](const SMInstr::PATT &) -> std::string { - // x.patt - return "PATT"; + [](const SMInstr::PATT &x) -> std::string { + return std::format("PATT (\"{}\")", print_patt(x.patt)); }, [](const SMInstr::LINE &x) -> std::string { - return "LINE [" + std::to_string(x.n) + "]"; + return std::format("LINE ({})", x.n); }, [](const SMInstr::FAIL &x) -> std::string { - return "FAIL [" + std::to_string(x.line) + ". " + - std::to_string(x.col) + ". " + std::to_string(x.val) + ". " + - "]"; + return std::format("FAIL ({}, {}, {})", x.line, x.col, x.val); }, // [](auto) -> std::string { // throw std::bad_any_cast{}; // create another error ? diff --git a/byterun/xmake.lua b/byterun/xmake.lua index 3df9ff418..0949913f1 100644 --- a/byterun/xmake.lua +++ b/byterun/xmake.lua @@ -1,7 +1,7 @@ -- add_rules("mode.debug", "mode.release") -- add_rules("c++.unity_build") -set_languages("c++23", "c23") +set_languages("c++20", "c11") target("byterun") set_kind("binary")