sm_parser: fixes, tested on Functions.sm

This commit is contained in:
ProgramSnail 2025-06-01 16:51:56 +03:00
parent 66ccf639c8
commit 7c3ff04573
3 changed files with 140 additions and 37 deletions

View file

@ -11,6 +11,23 @@ template <class... Ts> struct multifunc : Ts... {
using Ts::operator()...; using Ts::operator()...;
}; };
template <class... Ts> multifunc(Ts...) -> multifunc<Ts...>; template <class... Ts> multifunc(Ts...) -> multifunc<Ts...>;
template <typename T, typename E = std::string> struct Result {
struct Error {
E v;
};
bool failed() { return value.index() != 0; }
static Result failure(E &&err) { return Result{.value = Error{err}}; }
static Result success(T &&value) { return Result{.value = value}; }
const T &get_value() { return std::get<0>(value); }
const E &get_error() { return std::get<1>(value).v; }
std::variant<T, Error> value = Error{E{}};
};
} // namespace utils } // namespace utils
enum class Patt { enum class Patt {
@ -239,6 +256,10 @@ struct SMInstr {
std::vector<SMInstr> parse_sm(std::istream &in); std::vector<SMInstr> parse_sm(std::istream &in);
std::optional<SMInstr> parse_sm(const std::string &line); utils::Result<SMInstr> parse_sm(const std::string &line);
std::string print_sm(const SMInstr &instr); std::string print_sm(const SMInstr &instr);
// ---
template struct utils::Result<SMInstr, std::string>;

View file

@ -26,11 +26,13 @@ int main(int argc, char **argv) {
do_interpretation = true; do_interpretation = true;
} else if (strcmp(argv[1], "-sm") == 0) { // TODO: TMP, FOR CHECKS } else if (strcmp(argv[1], "-sm") == 0) { // TODO: TMP, FOR CHECKS
std::ifstream file(argv[2]); std::ifstream file(argv[2]);
std::cout << "-- parse\n";
auto instrs = parse_sm(file); auto instrs = parse_sm(file);
std::cout << "instructions form file\n"; std::cout << "-- print\n";
for (auto &instr : instrs) { for (auto &instr : instrs) {
std::cout << print_sm(instr) << "\n"; std::cout << print_sm(instr) << "\n";
} }
return 0;
} }
#ifdef WITH_CHECK #ifdef WITH_CHECK
else if (strcmp(argv[1], "-vi") == 0) { else if (strcmp(argv[1], "-vi") == 0) {

View file

@ -8,24 +8,32 @@
#include <map> #include <map>
#include <unordered_map> #include <unordered_map>
using Result = utils::Result<SMInstr, std::string>;
std::vector<SMInstr> parse_sm(std::istream &in) { std::vector<SMInstr> parse_sm(std::istream &in) {
std::vector<SMInstr> result; std::vector<SMInstr> result;
for (size_t i = 1; !in.eof(); ++i) { for (size_t i = 1; !in.eof(); ++i) {
if (in.fail()) { if (in.fail()) {
std::cerr << "line " << i << ": input failure"; std::cerr << "line " << i << ": input failure\n";
break; break;
} }
std::string instr_str; std::string instr_str;
std::getline(in, instr_str); std::getline(in, instr_str);
std::cout << "line: <" << instr_str << ">\n";
if (instr_str.empty()) {
continue;
}
auto instr = parse_sm(instr_str); auto instr = parse_sm(instr_str);
if (!instr) { if (instr.failed()) {
std::cerr << "line " << i << ": instr parsing failure"; std::cerr << "line " << i << ": instr parsing failure: <"
<< instr.get_error() << ">\n";
break; break;
} }
result.push_back(std::move(instr.value())); result.push_back(std::move(instr.get_value()));
} }
return result; return result;
@ -53,16 +61,33 @@ struct ParsingResult {
// //
std::string_view substr_to(const std::string_view line, size_t &pos, char to) { std::string_view trim(std::string_view str) {
auto offset = line.find(pos, to); std::cout << __func__ << '\n';
auto begin = str.begin();
for (; begin != str.end() && *begin == ' '; ++begin) {
}
auto end = str.end();
for (; end > begin && *std::prev(end) == ' '; --end) {
}
return {begin, end};
}
std::string_view substr_to(std::string_view line, size_t &pos, char to) {
std::cout << __func__ << " with " << line.substr(pos) << '\n';
auto offset = line.find(to, pos);
if (offset == std::string::npos) { if (offset == std::string::npos) {
std::cout << "value \"\"\n";
return ""; return "";
}; };
std::string_view result = line.substr(pos, offset); std::string_view result = line.substr(pos, offset);
pos += offset + 1; pos += offset + 1;
std::cout << "value " << result << "\n";
return result; return result;
} }
@ -74,19 +99,23 @@ template <typename T> using Matches = std::vector<std::pair<std::string, T>>;
// not required here) // not required here)
template <typename T> template <typename T>
ParsingResult prefix_matcher(std::string_view s, const Matches<T> &values) { ParsingResult prefix_matcher(std::string_view s, const Matches<T> &values) {
std::cout << __func__ << '\n';
for (auto &value : values) { for (auto &value : values) {
if (s.substr(0, value.first.size()) == value.first) { if (s.substr(0, value.first.size()) == value.first) {
return {value.second, s.substr(value.first.size())}; return {value.second, s.substr(value.first.size())};
} }
} }
std::cout << "can't parse prefix from " << s << '\n';
return {{}, s}; return {{}, s};
} }
ParsingResult parse_any_val(std::string_view s); ParsingResult parse_any_val(std::string_view s);
ParsingResult parse_str(std::string_view s) { ParsingResult parse_str(std::string_view s) {
std::cout << __func__ << '\n';
if (s.size() < 2 || s.front() != '"') { if (s.size() < 2 || s.front() != '"') {
std::cout << "can't parse string from " << s << '\n';
return {{}, s}; return {{}, s};
} }
@ -106,11 +135,13 @@ ParsingResult parse_str(std::string_view s) {
} }
ParsingResult parse_int(std::string_view s) { ParsingResult parse_int(std::string_view s) {
std::cout << __func__ << '\n';
int value = 0; int value = 0;
auto res = std::from_chars(s.data(), s.data() + s.size(), value); auto res = std::from_chars(s.data(), s.data() + s.size(), value);
if (res.ec == std::errc{}) { if (res.ec != std::errc{}) {
std::cout << "can't parse int from " << s << '\n';
return {{}, s}; return {{}, s};
} }
@ -118,11 +149,13 @@ ParsingResult parse_int(std::string_view s) {
} }
ParsingResult parse_bool(std::string_view s) { ParsingResult parse_bool(std::string_view s) {
std::cout << __func__ << '\n';
static const Matches<bool> bools = {{"true", true}, {"false", false}}; static const Matches<bool> bools = {{"true", true}, {"false", false}};
return prefix_matcher(s, bools); return prefix_matcher(s, bools);
} }
ParsingResult parse_opr(std::string_view s) { ParsingResult parse_opr(std::string_view s) {
std::cout << __func__ << '\n';
static const Matches<Opr> oprs = { static const Matches<Opr> oprs = {
{"+", Opr::ADD}, // + {"+", Opr::ADD}, // +
{"-", Opr::SUB}, // - {"-", Opr::SUB}, // -
@ -141,7 +174,16 @@ ParsingResult parse_opr(std::string_view s) {
return prefix_matcher(s, oprs); return prefix_matcher(s, oprs);
} }
Opr any_opr_cast(std::any value) {
auto res = parse_opr(std::any_cast<std::string>(std::move(value)));
if (!res.value.has_value()) {
throw std::bad_any_cast{};
}
return std::any_cast<Opr>(std::move(res.value));
}
ParsingResult parse_patt(std::string_view s) { ParsingResult parse_patt(std::string_view s) {
std::cout << __func__ << '\n';
static const Matches<Patt> patts = { static const Matches<Patt> patts = {
{"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED}, {"Boxed", Patt::BOXED}, {"UnBoxed", Patt::UNBOXED},
{"Array", Patt::ARRAY}, {"String", Patt::STRING}, {"Array", Patt::ARRAY}, {"String", Patt::STRING},
@ -151,9 +193,18 @@ ParsingResult parse_patt(std::string_view s) {
return prefix_matcher(s, patts); return prefix_matcher(s, patts);
} }
Patt any_patt_cast(std::any value) {
auto res = parse_patt(std::any_cast<std::string>(std::move(value)));
if (!res.value.has_value()) {
throw std::bad_any_cast{};
}
return std::any_cast<Patt>(std::move(res.value));
}
// --- // ---
ParsingResult parse_var(std::string_view s) { ParsingResult parse_var(std::string_view s) {
std::cout << __func__ << '\n';
static const std::map<std::string, std::function<ValT(std::any &&)>, static const std::map<std::string, std::function<ValT(std::any &&)>,
std::less<>> std::less<>>
vars = { vars = {
@ -183,6 +234,7 @@ ParsingResult parse_var(std::string_view s) {
auto arg_str = std::string{substr_to(s, pos, ' ')}; auto arg_str = std::string{substr_to(s, pos, ' ')};
auto arg_it = vars.find(arg_str); auto arg_it = vars.find(arg_str);
if (arg_it == vars.end()) { if (arg_it == vars.end()) {
std::cout << "can't parse var from " << s << '\n';
return {{}, s}; return {{}, s};
} }
++pos; // '(' ++pos; // '('
@ -190,18 +242,21 @@ ParsingResult parse_var(std::string_view s) {
// NOTE: s_rest starts with ')' // NOTE: s_rest starts with ')'
auto [id, s_rest] = parse_any_val(s.substr(pos)); auto [id, s_rest] = parse_any_val(s.substr(pos));
if (not id.has_value()) { if (not id.has_value()) {
std::cout << "any val: can't parse int from " << s << '\n';
return {{}, s}; return {{}, s};
} }
try { try {
return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')' return {arg_it->second(std::move(id)), s_rest.substr(1)}; // skip ')'
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
std::cout << "bad any cast: can't parse var from " << s << '\n';
return {{}, s}; return {{}, s};
} }
} }
// (_, _) // (_, _)
ParsingResult parse_pair(std::string_view s) { ParsingResult parse_pair(std::string_view s) {
std::cout << __func__ << '\n';
if (s.size() < 2 || s.front() != '(') { if (s.size() < 2 || s.front() != '(') {
return {}; return {};
} }
@ -215,32 +270,43 @@ ParsingResult parse_pair(std::string_view s) {
} }
// [_, ..., _] // [_, ..., _]
ParsingResult parse_array(std::string_view s, char first_symbol = '[') { ParsingResult parse_array(std::string_view s, char first_symbol = '[',
char last_symbol = ']') {
std::cout << __func__ << '\n';
if (s.size() < 2 || s.front() != first_symbol) { if (s.size() < 2 || s.front() != first_symbol) {
std::cout << "can't parse array from " << s << '\n';
return {}; return {};
} }
std::vector<std::any> values; std::vector<std::any> values;
ParsingResult res{{}, s.substr(1)}; // skip '[' (first_symbol) ParsingResult res{{}, s.substr(1)}; // skip first_symbol
while (true) {
if (res.rest.front() == ',' /*in regular arrays*/ ||
res.rest.front() == ';' /*in scopes*/) {
res.rest = res.rest.substr(2); // skip ', '
} else if (res.rest.front() == last_symbol) {
break;
}
while (not s.empty()) {
res = parse_any_val(res.rest); res = parse_any_val(res.rest);
if (not res.value.has_value()) { if (not res.value.has_value()) {
std::cout << "can't parse array elem from " << s << '\n';
return {{}, s}; return {{}, s};
} }
values.push_back(std::move(res.value)); values.push_back(std::move(res.value));
res.value = {}; // do not use moved value res.value = {}; // do not use moved value
res.rest = res.rest.substr(1); // skip ',' (or ']' at the end)
} }
return {values, res.rest}; return {values, res.rest.substr(1)};
} }
// { blab="_"; elab="_" names=[...]; subs=[...]} // { blab="_"; elab="_" names=[...]; subs=[...]}
ParsingResult parse_scope(std::string_view s) { ParsingResult parse_scope(std::string_view s) {
std::cout << __func__ << '\n';
if (s.size() < 2 || s.front() != '{') { if (s.size() < 2 || s.front() != '{') {
std::cout << "can't parse scope from " << s << '\n';
return {}; return {};
} }
@ -286,13 +352,15 @@ ParsingResult parse_scope(std::string_view s) {
res.value = {}; // do not use moved vlue res.value = {}; // do not use moved vlue
} }
return {scope, res.rest.substr(1)}; // skip '}' return {scope, res.rest.substr(3)}; // skip '; }'
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
std::cout << "bad any cast: can't parse int from " << s << '\n';
return {{}, s}; return {{}, s};
} }
} }
ParsingResult parse_any_val(std::string_view s) { ParsingResult parse_any_val(std::string_view s) {
std::cout << __func__ << " with " << s << '\n';
ParsingResult res; ParsingResult res;
if (res = parse_str(s); res.value.has_value()) { if (res = parse_str(s); res.value.has_value()) {
@ -304,12 +372,13 @@ ParsingResult parse_any_val(std::string_view s) {
if (res = parse_bool(s); res.value.has_value()) { if (res = parse_bool(s); res.value.has_value()) {
return res; return res;
} }
if (res = parse_opr(s); res.value.has_value()) { // NOTE: parsed from string later
return res; // if (res = parse_opr(s); res.value.has_value()) {
} // return res;
if (res = parse_patt(s); res.value.has_value()) { // }
return res; // if (res = parse_patt(s); res.value.has_value()) {
} // return res;
// }
if (res = parse_var(s); res.value.has_value()) { if (res = parse_var(s); res.value.has_value()) {
return res; return res;
} }
@ -327,11 +396,11 @@ struct SMInstrBuilder {
public: public:
SMInstrBuilder(SMInstr instr) : instr(instr) {} SMInstrBuilder(SMInstr instr) : instr(instr) {}
std::optional<SMInstr> build() { Result build() {
// TODO: check too many args ?? // TODO: check too many args ??
try { try {
// TODO: check for all args present // TODO: check for all args present
return {std::visit<SMInstr>( // return Result::success(std::visit<SMInstr>( //
utils::multifunc{ utils::multifunc{
// //
[&args = args](SMInstr::PUBLIC x) -> SMInstr { [&args = args](SMInstr::PUBLIC x) -> SMInstr {
@ -347,7 +416,7 @@ public:
return x; return x;
}, },
[&args = args](SMInstr::CLOSURE x) -> SMInstr { [&args = args](SMInstr::CLOSURE x) -> SMInstr {
x.name = std::any_cast<int>(args.at(0)); x.name = std::any_cast<std::string>(args.at(0));
x.closure = any_array_cast<ValT>(args.at(1)); x.closure = any_array_cast<ValT>(args.at(1));
return x; return x;
}, },
@ -374,7 +443,7 @@ public:
[](SMInstr::STA x) -> SMInstr { return x; }, [](SMInstr::STA x) -> SMInstr { return x; },
[](SMInstr::STI x) -> SMInstr { return x; }, [](SMInstr::STI x) -> SMInstr { return x; },
[&args = args](SMInstr::BINOP x) -> SMInstr { [&args = args](SMInstr::BINOP x) -> SMInstr {
x.opr = std::any_cast<Opr>(args.at(0)); x.opr = any_opr_cast(args.at(0));
return x; return x;
}, },
[&args = args](SMInstr::LABEL x) -> SMInstr { [&args = args](SMInstr::LABEL x) -> SMInstr {
@ -417,8 +486,8 @@ public:
return x; return x;
}, },
[&args = args](SMInstr::CALLC x) -> SMInstr { [&args = args](SMInstr::CALLC x) -> SMInstr {
x.n = std::any_cast<int>(args.at(1)); x.n = std::any_cast<int>(args.at(0));
x.tail = std::any_cast<bool>(args.at(2)); x.tail = std::any_cast<bool>(args.at(1));
return x; return x;
}, },
[&args = args](SMInstr::SEXP x) -> SMInstr { [&args = args](SMInstr::SEXP x) -> SMInstr {
@ -439,7 +508,7 @@ public:
return x; return x;
}, },
[&args = args](SMInstr::PATT x) -> SMInstr { [&args = args](SMInstr::PATT x) -> SMInstr {
x.patt = std::any_cast<Patt>(args.at(0)); x.patt = any_patt_cast(args.at(0));
return x; return x;
}, },
[&args = args](SMInstr::LINE x) -> SMInstr { [&args = args](SMInstr::LINE x) -> SMInstr {
@ -456,11 +525,11 @@ public:
// throw std::bad_any_cast{}; // create another error ? // throw std::bad_any_cast{}; // create another error ?
// }, // },
}, },
*instr)}; *instr));
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
return {}; return Result::failure("build: bad any cast");
} catch (const std::out_of_range &) { } catch (const std::out_of_range &) {
return {}; return Result::failure("build: out of range");
} }
} }
@ -471,7 +540,8 @@ private:
std::vector<std::any> args; std::vector<std::any> args;
}; };
std::optional<SMInstr> parse_sm(const std::string &line) { utils::Result<SMInstr> parse_sm(const std::string &line) {
std::cout << __func__ << '\n';
std::unordered_map<std::string, SMInstr> to_instr = { std::unordered_map<std::string, SMInstr> to_instr = {
{"BINOP", SMInstr{SMInstr::BINOP{}}}, {"BINOP", SMInstr{SMInstr::BINOP{}}},
{"CONST", SMInstr{SMInstr::CONST{}}}, {"CONST", SMInstr{SMInstr::CONST{}}},
@ -510,9 +580,14 @@ std::optional<SMInstr> parse_sm(const std::string &line) {
size_t pos = 0; size_t pos = 0;
auto cmd = std::string{substr_to(line, pos, ' ')}; auto cmd = std::string{substr_to(line, pos, ' ')};
if (cmd.empty()) {
cmd = line;
pos = line.size();
}
auto instr_it = to_instr.find(cmd); auto instr_it = to_instr.find(cmd);
if (instr_it == to_instr.end()) { if (instr_it == to_instr.end()) {
return std::nullopt; return Result::failure("instr name not found: `" + cmd + "`");
} }
SMInstrBuilder instr{instr_it->second}; SMInstrBuilder instr{instr_it->second};
@ -522,14 +597,19 @@ std::optional<SMInstr> parse_sm(const std::string &line) {
} }
// (_, ..., _) - args // (_, ..., _) - args
ParsingResult args_res = parse_array({line.data(), line.data() + pos}); ParsingResult args_res =
parse_array({line.data() + pos, line.data() + line.size()}, '(', ')');
if (!args_res.value.has_value()) {
return Result::failure("arguments list parsing error");
}
try { try {
auto args = std::any_cast<std::vector<std::any>>(std::move(args_res.value)); auto args = std::any_cast<std::vector<std::any>>(std::move(args_res.value));
args_res.value = {}; args_res.value = {};
if (not args_res.rest.empty()) { if (not args_res.rest.empty()) {
return std::nullopt; return Result::failure("extra symbols after instr parsed args");
} }
// TODO: put all array at once // TODO: put all array at once
@ -538,7 +618,7 @@ std::optional<SMInstr> parse_sm(const std::string &line) {
} }
args = {}; args = {};
} catch (const std::bad_any_cast &) { } catch (const std::bad_any_cast &) {
return std::nullopt; return Result::failure("bad any cast: parsed argument list is not array");
} }
return instr.build(); return instr.build();