# Dynamic Source Code Parser - Technical Specification ## Core Interfaces ### Parser Interface ```cpp // parser_interface.h #pragma once #include #include #include // Forward declarations class ASTNode; struct SourceLocation; /** * @brief Interface for language-specific parsers * * Each language parser implements this interface to provide * language-agnostic access to parsed source code structures. */ class ILanguageParser { public: virtual ~ILanguageParser() = default; /** * @brief Parse a single source file and return AST nodes * @param filePath Path to the source file * @return Vector of AST nodes representing the file contents */ virtual std::vector> parseFile(const std::string& filePath) = 0; /** * @brief Get the programming language name * @return String identifier for the language */ virtual std::string getLanguage() const = 0; /** * @brief Get file extensions supported by this parser * @return Vector of file extensions (e.g., {".cpp", ".h", ".hpp"}) */ virtual std::vector getFileExtensions() const = 0; /** * @brief Check if a file can be parsed by this parser * @param filePath Path to check * @return true if the file extension is supported */ virtual bool canParse(const std::string& filePath) const = 0; }; ``` ### AST Node Hierarchy ```cpp // ast_nodes.h #pragma once #include #include #include #include /** * @brief Source code location information */ struct SourceLocation { std::string filePath; int line = 0; int column = 0; int endLine = 0; int endColumn = 0; }; /** * @brief Documentation comment with tags */ struct DocumentationComment { enum class Type { DOXYLINE, /// < line comment DOXYBLOCK, /** */ block comment JAVADOC, /** JavaDoc style */ UNKNOWN }; Type type = Type::UNKNOWN; std::string rawContent; std::string brief; std::string detailed; std::vector tags; // @param, @return, @brief, etc. SourceLocation location; }; /** * @brief Base class for all AST nodes */ class ASTNode { public: enum class NodeType { NAMESPACE, CLASS, STRUCT, FUNCTION, METHOD, VARIABLE, ENUM, ENUM_VALUE, TEMPLATE, MODULE, UNKNOWN }; protected: NodeType nodeType; std::string name; DocumentationComment documentation; SourceLocation location; std::vector> children; public: explicit ASTNode(NodeType type) : nodeType(type) {} virtual ~ASTNode() = default; // Getters NodeType getType() const { return nodeType; } const std::string& getName() const { return name; } const DocumentationComment& getDocumentation() const { return documentation; } const SourceLocation& getLocation() const { return location; } const std::vector>& getChildren() const { return children; } // Setters void setName(const std::string& n) { name = n; } void setDocumentation(const DocumentationComment& doc) { documentation = doc; } void setLocation(const SourceLocation& loc) { location = loc; } // Child management void addChild(std::unique_ptr child) { children.push_back(std::move(child)); } // Visitor pattern support virtual void accept(class ASTVisitor& visitor) = 0; }; /** * @brief Namespace or module node */ class NamespaceNode : public ASTNode { public: NamespaceNode() : ASTNode(NodeType::NAMESPACE) {} void accept(ASTVisitor& visitor) override; }; /** * @brief Class or struct definition */ class ClassNode : public ASTNode { private: bool isStruct = false; std::vector baseClasses; std::vector templateParameters; std::string accessSpecifier; // public, protected, private public: ClassNode() : ASTNode(NodeType::CLASS) {} bool getIsStruct() const { return isStruct; } void setIsStruct(bool s) { isStruct = s; } const std::vector& getBaseClasses() const { return baseClasses; } void addBaseClass(const std::string& base) { baseClasses.push_back(base); } const std::vector& getTemplateParameters() const { return templateParameters; } void addTemplateParameter(const std::string& param) { templateParameters.push_back(param); } const std::string& getAccessSpecifier() const { return accessSpecifier; } void setAccessSpecifier(const std::string& access) { accessSpecifier = access; } void accept(ASTVisitor& visitor) override; }; /** * @brief Function or method parameter */ struct Parameter { std::string type; std::string name; std::string defaultValue; DocumentationComment documentation; }; /** * @brief Function or method definition */ class FunctionNode : public ASTNode { private: std::string returnType; std::vector parameters; bool isStatic = false; bool isVirtual = false; bool isConst = false; bool isConstructor = false; bool isDestructor = false; std::string accessSpecifier; public: FunctionNode() : ASTNode(NodeType::FUNCTION) {} const std::string& getReturnType() const { return returnType; } void setReturnType(const std::string& type) { returnType = type; } const std::vector& getParameters() const { return parameters; } void addParameter(const Parameter& param) { parameters.push_back(param); } bool getIsStatic() const { return isStatic; } void setIsStatic(bool s) { isStatic = s; } bool getIsVirtual() const { return isVirtual; } void setIsVirtual(bool v) { isVirtual = v; } bool getIsConst() const { return isConst; } void setIsConst(bool c) { isConst = c; } bool getIsConstructor() const { return isConstructor; } void setIsConstructor(bool c) { isConstructor = c; } bool getIsDestructor() const { return isDestructor; } void setIsDestructor(bool d) { isDestructor = d; } const std::string& getAccessSpecifier() const { return accessSpecifier; } void setAccessSpecifier(const std::string& access) { accessSpecifier = access; } void accept(ASTVisitor& visitor) override; }; /** * @brief Variable or field declaration */ class VariableNode : public ASTNode { private: std::string type; std::string defaultValue; bool isStatic = false; bool isConst = false; std::string accessSpecifier; public: VariableNode() : ASTNode(NodeType::VARIABLE) {} const std::string& getType() const { return type; } void setType(const std::string& t) { type = t; } const std::string& getDefaultValue() const { return defaultValue; } void setDefaultValue(const std::string& value) { defaultValue = value; } bool getIsStatic() const { return isStatic; } void setIsStatic(bool s) { isStatic = s; } bool getIsConst() const { return isConst; } void setIsConst(bool c) { isConst = c; } const std::string& getAccessSpecifier() const { return accessSpecifier; } void setAccessSpecifier(const std::string& access) { accessSpecifier = access; } void accept(ASTVisitor& visitor) override; }; /** * @brief Enum definition */ class EnumNode : public ASTNode { private: bool isEnumClass = false; std::string underlyingType; std::vector> values; // name, value public: EnumNode() : ASTNode(NodeType::ENUM) {} bool getIsEnumClass() const { return isEnumClass; } void setIsEnumClass(bool e) { isEnumClass = e; } const std::string& getUnderlyingType() const { return underlyingType; } void setUnderlyingType(const std::string& type) { underlyingType = type; } const std::vector>& getValues() const { return values; } void addValue(const std::string& name, const std::string& value = "") { values.emplace_back(name, value); } void accept(ASTVisitor& visitor) override; }; ``` ### Visitor Pattern Implementation ```cpp // visitor.h #pragma once #include "ast_nodes.h" /** * @brief Visitor interface for AST traversal */ class ASTVisitor { public: virtual ~ASTVisitor() = default; // Node visitors virtual void visit(NamespaceNode& node) = 0; virtual void visit(ClassNode& node) = 0; virtual void visit(FunctionNode& node) = 0; virtual void visit(VariableNode& node) = 0; virtual void visit(EnumNode& node) = 0; // Default traversal behavior virtual void traverse(ASTNode& node) { for (auto& child : node.getChildren()) { child->accept(*this); } } }; // Visitor method implementations inline void NamespaceNode::accept(ASTVisitor& visitor) { visitor.visit(*this); visitor.traverse(*this); } inline void ClassNode::accept(ASTVisitor& visitor) { visitor.visit(*this); visitor.traverse(*this); } inline void FunctionNode::accept(ASTVisitor& visitor) { visitor.visit(*this); visitor.traverse(*this); } inline void VariableNode::accept(ASTVisitor& visitor) { visitor.visit(*this); visitor.traverse(*this); } inline void EnumNode::accept(ASTVisitor& visitor) { visitor.visit(*this); visitor.traverse(*this); } ``` ### Documentation Generator ```cpp // documentation_generator.h #pragma once #include "ast_nodes.h" #include "visitor.h" #include #include #include #include /** * @brief Configuration for documentation generation */ struct DocumentationConfig { std::string outputDirectory = "docs"; std::string indexTitle = "Documentation"; std::string generatorName = "docs-parser"; bool generateIndex = true; bool generateModuleIndexes = true; bool includePrivate = false; bool includeSourceLinks = false; std::string sourceRootPath; std::string theme = "material"; // material, github, etc. }; /** * @brief Markdown documentation generator */ class DocumentationGenerator : public ASTVisitor { private: DocumentationConfig config; std::filesystem::path outputPath; std::vector moduleStack; std::ofstream currentFile; std::string currentContent; struct ModuleInfo { std::string name; std::string path; std::vector classes; std::vector functions; std::vector submodules; }; std::vector modules; public: explicit DocumentationGenerator(const DocumentationConfig& cfg); /** * @brief Generate documentation from parsed AST * @param nodes Root AST nodes from parser */ void generate(const std::vector>& nodes); // Visitor methods void visit(NamespaceNode& node) override; void visit(ClassNode& node) override; void visit(FunctionNode& node) override; void visit(VariableNode& node) override; void visit(EnumNode& node) override; private: // File operations void createDirectoryStructure(); void openFile(const std::filesystem::path& filePath); void closeFile(); void writeContent(const std::string& content); // Generation methods void generateIndex(const std::vector>& nodes); void generateModuleIndexes(); void generateClassFile(ClassNode& classNode); void generateNamespaceFile(NamespaceNode& namespaceNode); // Content formatting std::string generateFunctionTable(const std::vector& functions); std::string generateFunctionDetails(const std::vector& functions); std::string generateClassHeader(ClassNode& classNode); std::string generateInheritanceDiagram(ClassNode& classNode); std::string formatDocumentation(const DocumentationComment& doc); std::string formatParameters(const std::vector& params); std::string escapeMarkdown(const std::string& text); // Utility methods std::string getModulePath(const std::string& moduleName); std::string getAnchorLink(const std::string& name); std::string getMaterialIcon(const std::string& type); std::string formatFunctionSignature(const FunctionNode& func); // Module management void enterModule(const std::string& moduleName); void exitModule(); ModuleInfo& getCurrentModule(); void addToCurrentModule(const std::string& type, const std::string& name); }; ``` ### C++ Parser Implementation Sketch ```cpp // cpp_parser.h #pragma once #include "parser_interface.h" #include #include /** * @brief C++ language parser */ class CppParser : public ILanguageParser { private: // Token types enum class TokenType { KEYWORD, IDENTIFIER, SYMBOL, COMMENT, PREPROCESSOR, STRING_LITERAL, NUMBER, WHITESPACE, UNKNOWN }; struct Token { TokenType type; std::string value; SourceLocation location; }; // Comment tracking struct CommentInfo { DocumentationComment comment; bool attached = false; }; std::map, CommentInfo> pendingComments; public: std::vector> parseFile(const std::string& filePath) override; std::string getLanguage() const override { return "cpp"; } std::vector getFileExtensions() const override { return {".cpp", ".h", ".hpp", ".cxx", ".cc", ".c"}; } bool canParse(const std::string& filePath) const override; private: // Tokenization std::vector tokenize(const std::string& source, const std::string& filePath); // Parsing methods std::vector> parseTokens(const std::vector& tokens); std::unique_ptr parseClass(const std::vector& tokens, size_t& pos); std::unique_ptr parseFunction(const std::vector& tokens, size_t& pos); std::unique_ptr parseNamespace(const std::vector& tokens, size_t& pos); std::unique_ptr parseVariable(const std::vector& tokens, size_t& pos); std::unique_ptr parseEnum(const std::vector& tokens, size_t& pos); // Comment processing void extractComments(const std::string& source); DocumentationComment parseComment(const std::string& content, SourceLocation location); void attachComments(ASTNode* node, int line); // Utility methods bool isKeyword(const std::string& token) const; bool isTypeQualifier(const std::string& token) const; bool isAccessSpecifier(const std::string& token) const; std::string parseQualifiedName(const std::vector& tokens, size_t& pos); Parameter parseParameter(const std::vector& tokens, size_t& pos); // Regular expressions std::regex commentRegex = std::regex(R"((/\*\*.*?\*/|///.*?$|//!.*?$))", std::regex::dotall); std::regex doxygenTagRegex = std::regex(R"(@(\w+)(?:\s+(.+?))?(?=\s+@|\s*$))"); }; ``` ## Usage Example ```cpp // main.cpp #include "cpp_parser.h" #include "documentation_generator.h" int main(int argc, char* argv[]) { if (argc < 3) { std::cerr << "Usage: docs-parser " << std::endl; return 1; } // Configure documentation generation DocumentationConfig config; config.outputDirectory = argv[2]; config.indexTitle = "My Project Documentation"; config.generatorName = "docs-parser"; // Create C++ parser CppParser parser; DocumentationGenerator generator(config); // Parse source files std::vector> allNodes; std::vector sourceFiles = findSourceFiles(argv[1], {"cpp", "h", "hpp"}); for (const auto& file : sourceFiles) { auto nodes = parser.parseFile(file); for (auto& node : nodes) { allNodes.push_back(std::move(node)); } } // Generate documentation generator.generate(allNodes); std::cout << "Documentation generated in " << config.outputDirectory << std::endl; return 0; } ``` This technical specification provides the foundation for implementing a robust, extensible documentation parser with clean separation of concerns and a pluggable architecture for supporting multiple programming languages.