From 696a9c3a1afef4f395263f26a872fbb1cb041d11 Mon Sep 17 00:00:00 2001 From: ProgramSnail Date: Thu, 20 Jul 2023 14:38:44 +0300 Subject: [PATCH] basic node builders --- include/basic_builders.hpp | 45 ++++++++ include/basic_nodes.hpp | 2 + include/docs.hpp | 4 +- include/expression_builders.hpp | 3 + include/expression_nodes.hpp | 17 +-- include/name_tree.hpp | 69 ++++++++++++ include/statement_nodes.hpp | 97 ++++++++++++++++- include/tree_sitter_wrapper.hpp | 44 ++++---- include/type_nodes.hpp | 9 +- src/basic_builders.cpp | 186 ++++++++++++++++++++++++++++++++ src/expression_builders.cpp | 3 + 11 files changed, 442 insertions(+), 37 deletions(-) create mode 100644 include/basic_builders.hpp create mode 100644 include/expression_builders.hpp create mode 100644 src/basic_builders.cpp create mode 100644 src/expression_builders.cpp diff --git a/include/basic_builders.hpp b/include/basic_builders.hpp new file mode 100644 index 0000000..8830d0e --- /dev/null +++ b/include/basic_builders.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include "basic_nodes.hpp" +#include "tree_sitter_wrapper.hpp" + +namespace builders { + +nodes::Node buildNode(parser::ParseTree::Node parser_node); + +// --- literals + +nodes::Literal buildFloatNumberLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildNumberLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildStringLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildCharLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildBoolLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildUnitLiteral(parser::ParseTree::Node parser_node); + +nodes::Literal buildNullLiteral(parser::ParseTree::Node parser_node); + +// --- identifiers + +nodes::Identifier buildSimpleName(parser::ParseTree::Node parser_node); + +nodes::Identifier buildSimpleType(parser::ParseTree::Node parser_node); + +nodes::Identifier buildTypeclass(parser::ParseTree::Node parser_node); + +nodes::Identifier buildArgumentName(parser::ParseTree::Node parser_node); + +nodes::Identifier buildArgumentType(parser::ParseTree::Node parser_node); + +// Annotations are used as strings +std::string buildAnnotation(parser::ParseTree::Node parser_node); + +nodes::Identifier buildOperator(parser::ParseTree::Node parser_node); + +nodes::Identifier buildPlaceholder(parser::ParseTree::Node parser_node); + +} // namespace builders diff --git a/include/basic_nodes.hpp b/include/basic_nodes.hpp index 0d698bd..bcc7da5 100644 --- a/include/basic_nodes.hpp +++ b/include/basic_nodes.hpp @@ -7,6 +7,8 @@ namespace nodes { +enum class ReferenceType { REF, IN, OUT, NONE }; + class Node { public: Node(std::pair start_position, diff --git a/include/docs.hpp b/include/docs.hpp index 9975da8..4b82cc7 100644 --- a/include/docs.hpp +++ b/include/docs.hpp @@ -4,7 +4,7 @@ #include #include -namespace file { +namespace nodes { class SymbolDocs { public: @@ -49,4 +49,4 @@ private: std::unordered_map annotations_info_; }; -} // namespace file +} // namespace nodes diff --git a/include/expression_builders.hpp b/include/expression_builders.hpp new file mode 100644 index 0000000..dc2fc60 --- /dev/null +++ b/include/expression_builders.hpp @@ -0,0 +1,3 @@ +#pragma once + +namespace builders {} // namespace builders diff --git a/include/expression_nodes.hpp b/include/expression_nodes.hpp index 3261591..534f374 100644 --- a/include/expression_nodes.hpp +++ b/include/expression_nodes.hpp @@ -13,14 +13,19 @@ class Expression; class ExpressionStorage; class ExpressionProxy { + friend ExpressionStorage; + public: - ExpressionProxy(ExpressionStorage &expression_storage, size_t id) - : expression_storage_(expression_storage), id_(id) {} + ExpressionProxy() = delete; Expression *get(); const Expression *get() const; +private: + ExpressionProxy(ExpressionStorage &expression_storage, size_t id) + : expression_storage_(expression_storage), id_(id) {} + private: ExpressionStorage &expression_storage_; size_t id_; @@ -551,13 +556,13 @@ class ExpressionStorage { friend ExpressionProxy; public: - ExpressionProxy add_expression(const Expression &type) { - storage_.push_back(type); + ExpressionProxy add_expression(const Expression &expression) { + storage_.push_back(expression); return ExpressionProxy(*this, storage_.size() - 1); } - ExpressionProxy add_expression(Expression &&type) { - storage_.push_back(std::move(type)); + ExpressionProxy add_expression(Expression &&expression) { + storage_.push_back(std::move(expression)); return ExpressionProxy(*this, storage_.size() - 1); } diff --git a/include/name_tree.hpp b/include/name_tree.hpp index 0e785b8..66b3422 100644 --- a/include/name_tree.hpp +++ b/include/name_tree.hpp @@ -2,10 +2,79 @@ #include "tree_sitter_wrapper.hpp" +#include + +namespace names { + // IN PROGRESS class NameTree { public: + struct Node {}; + NameTree() {} + bool insert_path(const std::vector &path, Node node) {} + private: }; + +class NameStorage; + +class NameProxy { + friend NameStorage; + +public: + NameProxy(NameStorage *name_storage, size_t id) + : name_storage_(name_storage), id_(id) {} + + std::string *get(); + + const std::string *get() const; + + bool operator==(const NameProxy &other) const { + return name_storage_ == other.name_storage_ && id_ == other.id_; + } + + bool operator<(const NameProxy &other) const { + return name_storage_ < other.name_storage_ || + (name_storage_ == other.name_storage_ && id_ < other.id_); + } + +private: + NameStorage *name_storage_; + size_t id_; +}; + +class NameStorage { + friend NameProxy; + +public: + NameProxy add_expression(const std::string &name) { + storage_.push_back(name); + return NameProxy(this, storage_.size() - 1); + } + + NameProxy add_expression(std::string &&name) { + storage_.push_back(std::move(name)); + return NameProxy(this, storage_.size() - 1); + } + +private: + std::string *get_expression(size_t id) { return &storage_.at(id); } + + const std::string *get_expression(size_t id) const { + return &storage_.at(id); + } + +private: + std::vector storage_; +}; + +std::vector string_to_path(const std::string &str) { + std::vector path; + for (;;) { + } + return path; +} // IN PROGRESS + +} // namespace names diff --git a/include/statement_nodes.hpp b/include/statement_nodes.hpp index 7f2c3eb..1bdaa58 100644 --- a/include/statement_nodes.hpp +++ b/include/statement_nodes.hpp @@ -1,6 +1,9 @@ #pragma once #include "basic_nodes.hpp" +#include "docs.hpp" +#include "expression_nodes.hpp" +#include "type_nodes.hpp" #include @@ -8,20 +11,106 @@ namespace nodes { +// IN PROGRESS: add another constructors ?? class Import : public Node { public: + Import(Node node, const Identifier &import_name, + const Identifier &module_name, std::vector &&symbols = {}) + : Node(node), import_name_(import_name), module_name_(module_name), + symbols_(std::move(symbols)) {} + + size_t symbols_size() const { return symbols_.size(); } + + std::string *get_symbol(size_t id) { return symbols_.at(id).get(); } + + const std::string *get_symbol(size_t id) const { + return symbols_.at(id).get(); + } + private: Identifier import_name_; Identifier module_name_; std::vector symbols_; +}; + +class Constraint : public Node { +public: + Constraint(Node node, ExpressionProxy expression) + : Node(node), expression_(expression) {} + + Expression *get_expression() { return expression_.get(); } + + const Expression *get_expression() const { return expression_.get(); } + +private: + ExpressionProxy expression_; +}; + +class FunctionDefinition : public Node { +public: + enum ModifierType { + STATIC, + LET, + VAR, + }; + + FunctionDefinition(Node node, SymbolDocs &&docs, + std::vector &&constraints, + ModifierType modifier, const Identifier &name, + std::vector &&annotations, + std::vector &&arguments, + std::vector &&reference_types, + std::vector &&types) + : Node(node), docs_(std::move(docs)), + constraints_(std::move(constraints)), modifier_(modifier), name_(name), + annotations_(std::move(annotations)), arguments_(std::move(arguments)), + reference_types_(std::move(reference_types)), types_(std::move(types)) { + } + +private: + SymbolDocs docs_; + std::vector constraints_; + ModifierType modifier_; + Identifier name_; + std::vector annotations_; + std::vector arguments_; + std::vector reference_types_; + std::vector types_; + // std::vector optional_arguments_; // ?? }; // IN PROGRESS -class Constraint : public Node {}; // IN PROGRESS +class TypeDefinition : public Node { +public: + TypeDefinition(Node node, SymbolDocs &&docs, bool is_on_heap, + const Identifier &name, std::vector &&arguments, + VariantType &&type, std::vector &&methods) + : Node(node), docs_(std::move(docs)), is_on_heap_(is_on_heap), + name_(name), arguments_(std::move(arguments)), type_(std::move(type)), + methods_(std::move(methods)) {} -class TypeDefinition : public Node {}; // IN PROGRESS +private: + SymbolDocs docs_; + bool is_on_heap_; + Identifier name_; + std::vector arguments_; + VariantType type_; // TupleType is VariantType with one variant + std::vector methods_; +}; // IN PROGRESS -class FunctionDefinition : public Node {}; // IN PROGRESS +class TypeclassDefinition : public Node { +public: + TypeclassDefinition(Node node, SymbolDocs &&docs, const Identifier &name, + std::vector &&base_typeclasses, + std::vector &&methods) + : Node(node), docs_(std::move(docs)), name_(name), + base_typeclasses_(std::move(base_typeclasses)), + methods_(std::move(methods)) {} -class TypeclassDefinition : public Node {}; // IN PROGRESS +private: + SymbolDocs docs_; + Identifier name_; + std::vector base_typeclasses_; + std::vector methods_; +}; // IN PROGRESS } // namespace nodes diff --git a/include/tree_sitter_wrapper.hpp b/include/tree_sitter_wrapper.hpp index 2c59942..3480491 100644 --- a/include/tree_sitter_wrapper.hpp +++ b/include/tree_sitter_wrapper.hpp @@ -16,63 +16,63 @@ public: Node(const TSNode &node, const std::string *source) : node_(node), source_(source) {} - std::string GetType() { return ts_node_type(node_); } + std::string get_type() { return ts_node_type(node_); } - std::pair GetStartPoint() { + std::pair get_start_point() { TSPoint point = ts_node_start_point(node_); return {point.row, point.column}; } - std::pair GetEndPoint() { + std::pair get_end_point() { TSPoint point = ts_node_end_point(node_); return {point.row, point.column}; } - std::string GetAsSExpression() { return ts_node_string(node_); } + std::string get_as_sexpression() { return ts_node_string(node_); } - std::string GetValue() { // from source + std::string get_value() { // from source size_t start = ts_node_start_byte(node_); size_t end = ts_node_end_byte(node_); return source_->substr(start, end - start); } - bool IsNull() { return ts_node_is_null(node_); } + bool is_null() { return ts_node_is_null(node_); } - bool IsNamed() { return ts_node_is_named(node_); } + bool is_named() { return ts_node_is_named(node_); } - bool IsMissing() { return ts_node_is_missing(node_); } + bool is_missing() { return ts_node_is_missing(node_); } - bool IsExtra() { // comments, etc. + bool is_extra() { // comments, etc. return ts_node_is_extra(node_); } - bool HasError() { return ts_node_has_error(node_); } + bool has_error() { return ts_node_has_error(node_); } - Node NthChild(size_t n) { return Node(ts_node_child(node_, n), source_); } + Node nth_child(size_t n) { return Node(ts_node_child(node_, n), source_); } - size_t ChildCount() { return ts_node_child_count(node_); } + size_t child_count() { return ts_node_child_count(node_); } - Node NthNamedChild(size_t n) { + Node nth_named_child(size_t n) { return Node(ts_node_named_child(node_, n), source_); } - size_t NamedChildCount() { return ts_node_named_child_count(node_); } + size_t named_child_count() { return ts_node_named_child_count(node_); } - Node ChildByFieldName(const std::string &name) { + Node child_by_field_name(const std::string &name) { return Node(ts_node_child_by_field_name(node_, name.c_str(), name.size()), source_); } - Node PreviousSibling() { + Node previous_sibling() { return Node(ts_node_prev_sibling(node_), source_); } - Node PreviousNamedSibling() { + Node previous_named_sibling() { return Node(ts_node_prev_named_sibling(node_), source_); } - Node NextSibling() { return Node(ts_node_next_sibling(node_), source_); } + Node next_sibling() { return Node(ts_node_next_sibling(node_), source_); } - Node NextNamedSibling() { + Node next_named_dibling() { return Node(ts_node_next_named_sibling(node_), source_); } @@ -94,12 +94,12 @@ public: ParseTree(const ParseTree &parse_tree) : tree_(ts_tree_copy(parse_tree.tree_)), source_(parse_tree.source_) {} - Node GetRoot() const { return Node(ts_tree_root_node(tree_), &source_); } + Node get_root() const { return Node(ts_tree_root_node(tree_), &source_); } ~ParseTree() { ts_tree_delete(tree_); } - bool IsProperlyParsed() { // TODO: find place - return !GetRoot().HasError(); + bool is_properly_parsed() { // TODO: find place + return !get_root().has_error(); } private: diff --git a/include/type_nodes.hpp b/include/type_nodes.hpp index 97325e5..a77d103 100644 --- a/include/type_nodes.hpp +++ b/include/type_nodes.hpp @@ -15,14 +15,17 @@ class Type; class TypeStorage; class TypeProxy { -public: - TypeProxy(TypeStorage &type_storage, size_t id) - : type_storage_(type_storage), id_(id) {} + friend TypeStorage; +public: Type *get(); const Type *get() const; +private: + TypeProxy(TypeStorage &type_storage, size_t id) + : type_storage_(type_storage), id_(id) {} + private: TypeStorage &type_storage_; size_t id_; diff --git a/src/basic_builders.cpp b/src/basic_builders.cpp new file mode 100644 index 0000000..61151b8 --- /dev/null +++ b/src/basic_builders.cpp @@ -0,0 +1,186 @@ +#include "../include/basic_builders.hpp" + +#include "basic_nodes.hpp" + +namespace builders { + +namespace utils { +std::optional to_escape_symbol(char symbol) { + switch (symbol) { + case 'a': + return '\a'; + case 'b': + return '\b'; + case 'e': + return '\e'; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case 's': + return ' '; + default: + return std::nullopt; + } +} + +} // namespace utils + +nodes::Node buildNode(parser::ParseTree::Node parser_node) { + return nodes::Node(parser_node.get_start_point(), + parser_node.get_end_point()); +} + +// --- literals + +nodes::Literal buildFloatNumberLiteral(parser::ParseTree::Node parser_node) { + return nodes::Literal(buildNode(parser_node), + std::stod(parser_node.get_value())); +} + +nodes::Literal buildNumberLiteral(parser::ParseTree::Node parser_node) { + return nodes::Literal(buildNode(parser_node), + std::stoll(parser_node.get_value())); +} + +nodes::Literal buildStringLiteral(parser::ParseTree::Node parser_node) { + std::string literal = parser_node.get_value(); + + // remove " from both sides ("string") + literal.pop_back(); + literal = literal.substr(1, literal.size() - 1); + + // replace escape sequences with escape characters + bool is_escape_symbol = false; + size_t j = 0; + for (size_t i = 0; i < literal.size(); ++i, ++j) { + if (literal[i] == '\\' && !is_escape_symbol) { + is_escape_symbol = true; + } else { + if (is_escape_symbol) { + --j; + auto maybe_escape_symbol = utils::to_escape_symbol(literal[i]); + if (maybe_escape_symbol.has_value()) { + literal[j] = maybe_escape_symbol.value(); + } else { + literal[j] = literal[i]; + } + } else { + if (j != i) { + literal[j] = literal[i]; + } + } + } + } + literal.resize(j); + + return nodes::Literal(buildNode(parser_node), literal); +} + +nodes::Literal buildCharLiteral(parser::ParseTree::Node parser_node) { + std::string literal = parser_node.get_value(); + + // remove '' from both sides (''x'') + literal.pop_back(); + literal.pop_back(); + literal = literal.substr(1, literal.size() - 2); + + char ch = '\0'; + + // replace escape sequence with escape character + if (literal[0] == '\\') { + auto maybe_escape_symbol = utils::to_escape_symbol(literal.back()); + if (maybe_escape_symbol.has_value()) { + ch = maybe_escape_symbol.value(); + } else { + ch = literal.back(); + } + } else { + ch = literal.back(); + } + + return nodes::Literal(buildNode(parser_node), ch); +} + +nodes::Literal buildBoolLiteral(parser::ParseTree::Node parser_node) { + std::string literal = parser_node.get_value(); + + return nodes::Literal(buildNode(parser_node), + literal == "true" ? true : false); +} + +nodes::Literal buildUnitLiteral(parser::ParseTree::Node parser_node) { + return nodes::Literal(buildNode(parser_node), nodes::unit{}); +} + +nodes::Literal buildNullLiteral(parser::ParseTree::Node parser_node) { + return nodes::Literal(buildNode(parser_node), nodes::null{}); +} + +// --- identifiers + +nodes::Identifier buildSimpleName(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), + nodes::Identifier::SIMPLE_NAME, + parser_node.get_value()); +} + +nodes::Identifier buildSimpleType(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), + nodes::Identifier::SIMPLE_TYPE, + parser_node.get_value()); +} + +nodes::Identifier buildTypeclass(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), nodes::Identifier::TYPECLASS, + parser_node.get_value()); +} + +nodes::Identifier buildArgumentName(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), + nodes::Identifier::ARGUMENT_NAME, + parser_node.get_value()); +} + +nodes::Identifier buildArgumentType(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), + nodes::Identifier::ARGUMENT_TYPE, + parser_node.get_value()); +} + +// Annotations are used as strings +std::string buildAnnotation(parser::ParseTree::Node parser_node) { + std::string identifier = parser_node.get_value(); + + // identifier.size() > 0 by parsing convention + identifier = identifier.substr(1, identifier.size() - 1); + + return identifier; +} + +nodes::Identifier buildOperator(parser::ParseTree::Node parser_node) { + std::string identifier = parser_node.get_value(); + + if (identifier.size() > 0 && identifier.front() != '.') { + // for not point only identifiers + while (identifier.size() > 0 && identifier.back() == '.') { + identifier.pop_back(); + } + } + + return nodes::Identifier(buildNode(parser_node), nodes::Identifier::OPERATOR, + identifier); +} + +nodes::Identifier buildPlaceholder(parser::ParseTree::Node parser_node) { + return nodes::Identifier(buildNode(parser_node), + nodes::Identifier::PLACEHOLDER, "_"); +} + +} // namespace builders diff --git a/src/expression_builders.cpp b/src/expression_builders.cpp new file mode 100644 index 0000000..5936b2c --- /dev/null +++ b/src/expression_builders.cpp @@ -0,0 +1,3 @@ +#include "../include/expression_builders.hpp" + +namespace builders {} // namespace builders