From a7f23771244602df7e7df980dc88d93c9d49480c Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Sun, 18 May 2025 14:32:42 +0300 Subject: [PATCH] sm parser: instuction builder --- byterun/include/sm_parser.hpp | 8 ++ byterun/src/compiler.cpp | 6 - byterun/src/sm_parser.cpp | 244 +++++++++++++++++++++++++++------- 3 files changed, 201 insertions(+), 57 deletions(-) diff --git a/byterun/include/sm_parser.hpp b/byterun/include/sm_parser.hpp index cac4cf9de..56fd9581e 100644 --- a/byterun/include/sm_parser.hpp +++ b/byterun/include/sm_parser.hpp @@ -5,6 +5,14 @@ #include #include +namespace utils { +// https://en.cppreference.com/w/cpp/utility/variant/visit2 +template struct multifunc : Ts... { + using Ts::operator()...; +}; +template multifunc(Ts...) -> multifunc; +} // namespace utils + enum class Patt { BOXED, UNBOXED, diff --git a/byterun/src/compiler.cpp b/byterun/src/compiler.cpp index 0387da2f7..f6cd4ae50 100644 --- a/byterun/src/compiler.cpp +++ b/byterun/src/compiler.cpp @@ -16,12 +16,6 @@ namespace utils { -// https://en.cppreference.com/w/cpp/utility/variant/visit2 -template struct multifunc : Ts... { - using Ts::operator()...; -}; -template multifunc(Ts...) -> multifunc; - // https://en.cppreference.com/w/cpp/utility/unreachable [[noreturn]] inline void unreachable() { // Uses compiler specific extensions if possible. diff --git a/byterun/src/sm_parser.cpp b/byterun/src/sm_parser.cpp index 0e7d86157..95ba39179 100644 --- a/byterun/src/sm_parser.cpp +++ b/byterun/src/sm_parser.cpp @@ -1,8 +1,8 @@ #include "sm_parser.hpp" +#include #include #include -#include #include std::vector parse_sm(std::istream &in) { @@ -28,13 +28,6 @@ std::vector parse_sm(std::istream &in) { return result; } -std::optional> -parse(std::string::iterator begin, std::string::iterator end) { - size_t result = 0; - std::from_chars(&*begin, &*end, result); - // TODO -} - std::string substr_to(const std::string &line, size_t &pos, char to) { auto offset = line.find(pos, to); @@ -48,29 +41,169 @@ std::string substr_to(const std::string &line, size_t &pos, char to) { return result; } +// TODO: parsers + combinators + +// parse_str +// parse_int +// parse_bool +// parse_opr +// parse_patt +// parse_var + +// parse_array +// parse_scope + +// parse_or + struct SMInstrBuilder { public: - SMInstrBuilder(SMInstr instr) : instr(instr), args_pushed(0) {} + SMInstrBuilder(SMInstr instr) : instr(instr) {} - SMInstr build() { - // TODO: check for all arps - return instr; + std::optional build() { + // TODO: check too many args ?? + try { + // TODO: check for all args present + return {std::visit( // + utils::multifunc{ + // + [&args = args](SMInstr::PUBLIC x) -> SMInstr { + x.name = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::EXTERN x) -> SMInstr { + x.name = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::IMPORT x) -> SMInstr { + x.name = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::CLOSURE x) -> SMInstr { + x.name = std::any_cast(args.at(0)); + x.closure = std::any_cast>(args.at(1)); + return x; + }, + [&args = args](SMInstr::CONST x) -> SMInstr { + x.n = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::STRING x) -> SMInstr { + x.str = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::LDA x) -> SMInstr { + x.v = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::LD x) -> SMInstr { + x.v = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::ST x) -> SMInstr { + x.v = std::any_cast(args.at(0)); + return x; + }, + [](SMInstr::STA x) -> SMInstr { return x; }, + [](SMInstr::STI x) -> SMInstr { return x; }, + [&args = args](SMInstr::BINOP x) -> SMInstr { + x.opr = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::LABEL x) -> SMInstr { + x.s = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::FLABEL x) -> SMInstr { + x.s = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::SLABEL x) -> SMInstr { + x.s = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::JMP x) -> SMInstr { + x.l = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::CJMP x) -> SMInstr { + x.s = std::any_cast(args.at(0)); + x.l = std::any_cast(args.at(1)); + return x; + }, + [&args = args](SMInstr::BEGIN x) -> SMInstr { + x.f = std::any_cast(args.at(0)); + x.nargs = std::any_cast(args.at(1)); + x.nlocals = std::any_cast(args.at(2)); + x.closure = std::any_cast>(args.at(3)); + x.args = std::any_cast>(args.at(4)); + x.scopes = std::any_cast>(args.at(5)); + return x; + }, + [](SMInstr::END x) -> SMInstr { return x; }, + [](SMInstr::RET x) -> SMInstr { return x; }, + [](SMInstr::ELEM x) -> SMInstr { return x; }, + [&args = args](SMInstr::CALL x) -> SMInstr { + x.fname = std::any_cast(args.at(0)); + x.n = std::any_cast(args.at(1)); + x.tail = std::any_cast(args.at(2)); + return x; + }, + [&args = args](SMInstr::CALLC x) -> SMInstr { + x.n = std::any_cast(args.at(1)); + x.tail = std::any_cast(args.at(2)); + return x; + }, + [&args = args](SMInstr::SEXP x) -> SMInstr { + x.tag = std::any_cast(args.at(0)); + x.n = std::any_cast(args.at(1)); + return x; + }, + [](SMInstr::DROP x) -> SMInstr { return x; }, + [](SMInstr::DUP x) -> SMInstr { return x; }, + [](SMInstr::SWAP x) -> SMInstr { return x; }, + [&args = args](SMInstr::TAG x) -> SMInstr { + x.tag = std::any_cast(args.at(0)); + x.n = std::any_cast(args.at(1)); + return x; + }, + [&args = args](SMInstr::ARRAY x) -> SMInstr { + x.n = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::PATT x) -> SMInstr { + x.patt = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::LINE x) -> SMInstr { + x.n = std::any_cast(args.at(0)); + return x; + }, + [&args = args](SMInstr::FAIL x) -> SMInstr { + x.line = std::any_cast(args.at(0)); + x.col = std::any_cast(args.at(1)); + x.val = std::any_cast(args.at(2)); + return x; + }, + // [](auto) -> SMInstr { + // throw std::bad_any_cast{}; // create another error ? + // }, + }, + *instr)}; + } catch (const std::bad_any_cast &) { + return {}; + } catch (const std::out_of_range &) { + return {}; + } } - // TODO - void push_arg(std::string &&value) {} - void push_arg(bool value) {} - void push_arg(int value) {} - void push_arg(ValT value) {} - void push_arg(Patt value) {} + template void push_arg(T &&value) { args.emplace_back(value); } private: SMInstr instr; - size_t args_pushed = 0; -} + std::vector args; +}; -std::optional -parse_sm(const std::string &line) { +std::optional parse_sm(const std::string &line) { std::unordered_map to_instr = { {"BINOP", SMInstr{SMInstr::BINOP{}}}, {"CONST", SMInstr{SMInstr::CONST{}}}, @@ -106,8 +239,6 @@ parse_sm(const std::string &line) { {"LINE", SMInstr{SMInstr::LINE{}}}, }; - char ch; // NOTE: for helpers - size_t pos = 0; std::string cmd = substr_to(line, pos, ' '); @@ -126,35 +257,46 @@ parse_sm(const std::string &line) { return std::nullopt; } - bool was_last_arg = false; - while (!was_last_arg) { - std::string arg = substr_to(line, pos, '('); - ++pos; + // TODO: Automatically parse any structures with parser combinators + // if (cmd == "BEGIN") { + // // TODO: BEGIN + // } else { - if (arg.empty()) { - arg = line.substr(pos); - arg.pop_back(); // ')' - was_last_arg = true; - } + // bool was_last_arg = false; + // while (!was_last_arg) { + // std::string arg = substr_to(line, pos, '('); + // ++pos; - if (arg.front() == '"') { - instr.push_arg(arg.substr(1, arg.size() - 2)); - } else if (arg == "true") { - instr.push_arg(true); - } else if (arg == "false") { - instr.push_arg(false); - } else if () { // TODO: Local, Global, Arg - } else if () { // TODO: BEGIN vectors, etc - } else if () { // TODO: PATT patterns - } else if () { // TODO: CLUSURE vector - } else if (int n = 0; std::from_chars(line.data() + pos, - line.data() + pos + line.size(), n) - .ec == std::errc{}) { - instr.push_arg(n); - } else { - return std::nullopt; - } - } + // if (arg.empty()) { + // arg = line.substr(pos); + // arg.pop_back(); // ')' + // was_last_arg = true; + // } + + // if (arg.front() == '"') { + // instr.push_arg(arg.substr(1, arg.size() - 2)); + // } else if (arg.front() == '[') { + // // TODO: parse array + // instr.push_arg(arg.substr(1, arg.size() - 2)); + // } else if (arg == "true") { + // instr.push_arg(true); + // } else if (arg == "false") { + // instr.push_arg(false); + // } else if (auto maybe_var = parse_var(arg); maybe_var) { + // instr.push_arg(*maybe_var); + // } else if () { // TODO: CLUSURE vector + // } else if (auto maybe_patt = parse_patt(arg); maybe_patt) { + // instr.push_arg(*maybe_patt); + // } else if (int n = 0; std::from_chars(line.data() + pos, + // line.data() + pos + line.size(), + // n) + // .ec == std::errc{}) { + // instr.push_arg(n); + // } else { + // return std::nullopt; + // } + // } + // } return instr.build(); }