From 7c3ff0457325dd180c4cb290578bb52d6a552806 Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Sun, 1 Jun 2025 16:51:56 +0300 Subject: [PATCH] sm_parser: fixes, tested on Functions.sm --- byterun/include/sm_parser.hpp | 23 +++++- byterun/src/cli.cpp | 4 +- byterun/src/sm_parser.cpp | 150 ++++++++++++++++++++++++++-------- 3 files changed, 140 insertions(+), 37 deletions(-) diff --git a/byterun/include/sm_parser.hpp b/byterun/include/sm_parser.hpp index 61187121c..2627a6e1a 100644 --- a/byterun/include/sm_parser.hpp +++ b/byterun/include/sm_parser.hpp @@ -11,6 +11,23 @@ template struct multifunc : Ts... { using Ts::operator()...; }; template multifunc(Ts...) -> multifunc; + +template struct Result { + struct Error { + E v; + }; + + bool failed() { return value.index() != 0; } + + static Result failure(E &&err) { return Result{.value = Error{err}}; } + static Result success(T &&value) { return Result{.value = value}; } + + const T &get_value() { return std::get<0>(value); } + const E &get_error() { return std::get<1>(value).v; } + + std::variant value = Error{E{}}; +}; + } // namespace utils enum class Patt { @@ -239,6 +256,10 @@ struct SMInstr { std::vector parse_sm(std::istream &in); -std::optional parse_sm(const std::string &line); +utils::Result parse_sm(const std::string &line); std::string print_sm(const SMInstr &instr); + +// --- + +template struct utils::Result; diff --git a/byterun/src/cli.cpp b/byterun/src/cli.cpp index af52ba733..52e0a576b 100644 --- a/byterun/src/cli.cpp +++ b/byterun/src/cli.cpp @@ -26,11 +26,13 @@ int main(int argc, char **argv) { do_interpretation = true; } else if (strcmp(argv[1], "-sm") == 0) { // TODO: TMP, FOR CHECKS std::ifstream file(argv[2]); + std::cout << "-- parse\n"; auto instrs = parse_sm(file); - std::cout << "instructions form file\n"; + std::cout << "-- print\n"; for (auto &instr : instrs) { std::cout << print_sm(instr) << "\n"; } + return 0; } #ifdef WITH_CHECK else if (strcmp(argv[1], "-vi") == 0) { diff --git a/byterun/src/sm_parser.cpp b/byterun/src/sm_parser.cpp index 22e88ad65..91b5219a8 100644 --- a/byterun/src/sm_parser.cpp +++ b/byterun/src/sm_parser.cpp @@ -8,24 +8,32 @@ #include #include +using Result = utils::Result; + std::vector parse_sm(std::istream &in) { std::vector result; for (size_t i = 1; !in.eof(); ++i) { if (in.fail()) { - std::cerr << "line " << i << ": input failure"; + std::cerr << "line " << i << ": input failure\n"; break; } std::string instr_str; std::getline(in, instr_str); + + std::cout << "line: <" << instr_str << ">\n"; + if (instr_str.empty()) { + continue; + } auto instr = parse_sm(instr_str); - if (!instr) { - std::cerr << "line " << i << ": instr parsing failure"; + if (instr.failed()) { + std::cerr << "line " << i << ": instr parsing failure: <" + << instr.get_error() << ">\n"; break; } - result.push_back(std::move(instr.value())); + result.push_back(std::move(instr.get_value())); } return result; @@ -53,16 +61,33 @@ struct ParsingResult { // -std::string_view substr_to(const std::string_view line, size_t &pos, char to) { - auto offset = line.find(pos, to); +std::string_view trim(std::string_view str) { + std::cout << __func__ << '\n'; + auto begin = str.begin(); + for (; begin != str.end() && *begin == ' '; ++begin) { + } + + auto end = str.end(); + for (; end > begin && *std::prev(end) == ' '; --end) { + } + + return {begin, end}; +} + +std::string_view substr_to(std::string_view line, size_t &pos, char to) { + std::cout << __func__ << " with " << line.substr(pos) << '\n'; + + auto offset = line.find(to, pos); if (offset == std::string::npos) { + std::cout << "value \"\"\n"; return ""; }; std::string_view result = line.substr(pos, offset); pos += offset + 1; + std::cout << "value " << result << "\n"; return result; } @@ -74,19 +99,23 @@ template using Matches = std::vector>; // not required here) template ParsingResult prefix_matcher(std::string_view s, const Matches &values) { + std::cout << __func__ << '\n'; for (auto &value : values) { if (s.substr(0, value.first.size()) == value.first) { return {value.second, s.substr(value.first.size())}; } } + std::cout << "can't parse prefix from " << s << '\n'; return {{}, s}; } ParsingResult parse_any_val(std::string_view s); ParsingResult parse_str(std::string_view s) { + std::cout << __func__ << '\n'; if (s.size() < 2 || s.front() != '"') { + std::cout << "can't parse string from " << s << '\n'; return {{}, s}; } @@ -106,11 +135,13 @@ ParsingResult parse_str(std::string_view s) { } ParsingResult parse_int(std::string_view s) { + std::cout << __func__ << '\n'; int value = 0; auto res = std::from_chars(s.data(), s.data() + s.size(), value); - if (res.ec == std::errc{}) { + if (res.ec != std::errc{}) { + std::cout << "can't parse int from " << s << '\n'; return {{}, s}; } @@ -118,11 +149,13 @@ ParsingResult parse_int(std::string_view s) { } ParsingResult parse_bool(std::string_view s) { + std::cout << __func__ << '\n'; static const Matches bools = {{"true", true}, {"false", false}}; return prefix_matcher(s, bools); } ParsingResult parse_opr(std::string_view s) { + std::cout << __func__ << '\n'; static const Matches oprs = { {"+", Opr::ADD}, // + {"-", Opr::SUB}, // - @@ -141,7 +174,16 @@ ParsingResult parse_opr(std::string_view s) { return prefix_matcher(s, oprs); } +Opr any_opr_cast(std::any value) { + auto res = parse_opr(std::any_cast(std::move(value))); + if (!res.value.has_value()) { + throw std::bad_any_cast{}; + } + return std::any_cast(std::move(res.value)); +} + ParsingResult parse_patt(std::string_view s) { + std::cout << __func__ << '\n'; static const Matches patts = { {"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED}, {"Array", Patt::ARRAY}, {"String", Patt::STRING}, @@ -151,9 +193,18 @@ ParsingResult parse_patt(std::string_view s) { return prefix_matcher(s, patts); } +Patt any_patt_cast(std::any value) { + auto res = parse_patt(std::any_cast(std::move(value))); + if (!res.value.has_value()) { + throw std::bad_any_cast{}; + } + return std::any_cast(std::move(res.value)); +} + // --- ParsingResult parse_var(std::string_view s) { + std::cout << __func__ << '\n'; static const std::map, std::less<>> vars = { @@ -183,6 +234,7 @@ ParsingResult parse_var(std::string_view s) { auto arg_str = std::string{substr_to(s, pos, ' ')}; auto arg_it = vars.find(arg_str); if (arg_it == vars.end()) { + std::cout << "can't parse var from " << s << '\n'; return {{}, s}; } ++pos; // '(' @@ -190,18 +242,21 @@ ParsingResult parse_var(std::string_view s) { // NOTE: s_rest starts with ')' auto [id, s_rest] = parse_any_val(s.substr(pos)); if (not id.has_value()) { + std::cout << "any val: can't parse int from " << s << '\n'; return {{}, s}; } try { return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')' } catch (const std::bad_any_cast &) { + std::cout << "bad any cast: can't parse var from " << s << '\n'; return {{}, s}; } } // (_, _) ParsingResult parse_pair(std::string_view s) { + std::cout << __func__ << '\n'; if (s.size() < 2 || s.front() != '(') { return {}; } @@ -215,32 +270,43 @@ ParsingResult parse_pair(std::string_view s) { } // [_, ..., _] -ParsingResult parse_array(std::string_view s, char first_symbol = '[') { +ParsingResult parse_array(std::string_view s, char first_symbol = '[', + char last_symbol = ']') { + std::cout << __func__ << '\n'; if (s.size() < 2 || s.front() != first_symbol) { + std::cout << "can't parse array from " << s << '\n'; return {}; } std::vector values; - ParsingResult res{{}, s.substr(1)}; // skip '[' (first_symbol) + ParsingResult res{{}, s.substr(1)}; // skip first_symbol + while (true) { + if (res.rest.front() == ',' /*in regular arrays*/ || + res.rest.front() == ';' /*in scopes*/) { + res.rest = res.rest.substr(2); // skip ', ' + } else if (res.rest.front() == last_symbol) { + break; + } - while (not s.empty()) { res = parse_any_val(res.rest); if (not res.value.has_value()) { + std::cout << "can't parse array elem from " << s << '\n'; return {{}, s}; } values.push_back(std::move(res.value)); - res.value = {}; // do not use moved value - res.rest = res.rest.substr(1); // skip ',' (or ']' at the end) + res.value = {}; // do not use moved value } - return {values, res.rest}; + return {values, res.rest.substr(1)}; } // { blab="_"; elab="_" names=[...]; subs=[...]} ParsingResult parse_scope(std::string_view s) { + std::cout << __func__ << '\n'; if (s.size() < 2 || s.front() != '{') { + std::cout << "can't parse scope from " << s << '\n'; return {}; } @@ -286,13 +352,15 @@ ParsingResult parse_scope(std::string_view s) { res.value = {}; // do not use moved vlue } - return {scope, res.rest.substr(1)}; // skip '}' + return {scope, res.rest.substr(3)}; // skip '; }' } catch (const std::bad_any_cast &) { + std::cout << "bad any cast: can't parse int from " << s << '\n'; return {{}, s}; } } ParsingResult parse_any_val(std::string_view s) { + std::cout << __func__ << " with " << s << '\n'; ParsingResult res; if (res = parse_str(s); res.value.has_value()) { @@ -304,12 +372,13 @@ ParsingResult parse_any_val(std::string_view s) { if (res = parse_bool(s); res.value.has_value()) { return res; } - if (res = parse_opr(s); res.value.has_value()) { - return res; - } - if (res = parse_patt(s); res.value.has_value()) { - return res; - } + // NOTE: parsed from string later + // if (res = parse_opr(s); res.value.has_value()) { + // return res; + // } + // if (res = parse_patt(s); res.value.has_value()) { + // return res; + // } if (res = parse_var(s); res.value.has_value()) { return res; } @@ -327,11 +396,11 @@ struct SMInstrBuilder { public: SMInstrBuilder(SMInstr instr) : instr(instr) {} - std::optional build() { + Result build() { // TODO: check too many args ?? try { // TODO: check for all args present - return {std::visit( // + return Result::success(std::visit( // utils::multifunc{ // [&args = args](SMInstr::PUBLIC x) -> SMInstr { @@ -347,7 +416,7 @@ public: return x; }, [&args = args](SMInstr::CLOSURE x) -> SMInstr { - x.name = std::any_cast(args.at(0)); + x.name = std::any_cast(args.at(0)); x.closure = any_array_cast(args.at(1)); return x; }, @@ -374,7 +443,7 @@ public: [](SMInstr::STA x) -> SMInstr { return x; }, [](SMInstr::STI x) -> SMInstr { return x; }, [&args = args](SMInstr::BINOP x) -> SMInstr { - x.opr = std::any_cast(args.at(0)); + x.opr = any_opr_cast(args.at(0)); return x; }, [&args = args](SMInstr::LABEL x) -> SMInstr { @@ -417,8 +486,8 @@ public: return x; }, [&args = args](SMInstr::CALLC x) -> SMInstr { - x.n = std::any_cast(args.at(1)); - x.tail = std::any_cast(args.at(2)); + x.n = std::any_cast(args.at(0)); + x.tail = std::any_cast(args.at(1)); return x; }, [&args = args](SMInstr::SEXP x) -> SMInstr { @@ -439,7 +508,7 @@ public: return x; }, [&args = args](SMInstr::PATT x) -> SMInstr { - x.patt = std::any_cast(args.at(0)); + x.patt = any_patt_cast(args.at(0)); return x; }, [&args = args](SMInstr::LINE x) -> SMInstr { @@ -456,11 +525,11 @@ public: // throw std::bad_any_cast{}; // create another error ? // }, }, - *instr)}; + *instr)); } catch (const std::bad_any_cast &) { - return {}; + return Result::failure("build: bad any cast"); } catch (const std::out_of_range &) { - return {}; + return Result::failure("build: out of range"); } } @@ -471,7 +540,8 @@ private: std::vector args; }; -std::optional parse_sm(const std::string &line) { +utils::Result parse_sm(const std::string &line) { + std::cout << __func__ << '\n'; std::unordered_map to_instr = { {"BINOP", SMInstr{SMInstr::BINOP{}}}, {"CONST", SMInstr{SMInstr::CONST{}}}, @@ -510,9 +580,14 @@ std::optional parse_sm(const std::string &line) { size_t pos = 0; auto cmd = std::string{substr_to(line, pos, ' ')}; + if (cmd.empty()) { + cmd = line; + pos = line.size(); + } + auto instr_it = to_instr.find(cmd); if (instr_it == to_instr.end()) { - return std::nullopt; + return Result::failure("instr name not found: `" + cmd + "`"); } SMInstrBuilder instr{instr_it->second}; @@ -522,14 +597,19 @@ std::optional parse_sm(const std::string &line) { } // (_, ..., _) - args - ParsingResult args_res = parse_array({line.data(), line.data() + pos}); + ParsingResult args_res = + parse_array({line.data() + pos, line.data() + line.size()}, '(', ')'); + + if (!args_res.value.has_value()) { + return Result::failure("arguments list parsing error"); + } try { auto args = std::any_cast>(std::move(args_res.value)); args_res.value = {}; if (not args_res.rest.empty()) { - return std::nullopt; + return Result::failure("extra symbols after instr parsed args"); } // TODO: put all array at once @@ -538,7 +618,7 @@ std::optional parse_sm(const std::string &line) { } args = {}; } catch (const std::bad_any_cast &) { - return std::nullopt; + return Result::failure("bad any cast: parsed argument list is not array"); } return instr.build();