technical-specification.md 16 KB

Dynamic Source Code Parser - Technical Specification

Core Interfaces

Parser Interface

// parser_interface.h
#pragma once

#include <memory>
#include <vector>
#include <string>

// Forward declarations
class ASTNode;
struct SourceLocation;

/**
 * @brief Interface for language-specific parsers
 * 
 * Each language parser implements this interface to provide
 * language-agnostic access to parsed source code structures.
 */
class ILanguageParser {
public:
    virtual ~ILanguageParser() = default;
    
    /**
     * @brief Parse a single source file and return AST nodes
     * @param filePath Path to the source file
     * @return Vector of AST nodes representing the file contents
     */
    virtual std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) = 0;
    
    /**
     * @brief Get the programming language name
     * @return String identifier for the language
     */
    virtual std::string getLanguage() const = 0;
    
    /**
     * @brief Get file extensions supported by this parser
     * @return Vector of file extensions (e.g., {".cpp", ".h", ".hpp"})
     */
    virtual std::vector<std::string> getFileExtensions() const = 0;
    
    /**
     * @brief Check if a file can be parsed by this parser
     * @param filePath Path to check
     * @return true if the file extension is supported
     */
    virtual bool canParse(const std::string& filePath) const = 0;
};

AST Node Hierarchy

// ast_nodes.h
#pragma once

#include <string>
#include <vector>
#include <memory>
#include <optional>

/**
 * @brief Source code location information
 */
struct SourceLocation {
    std::string filePath;
    int line = 0;
    int column = 0;
    int endLine = 0;
    int endColumn = 0;
};

/**
 * @brief Documentation comment with tags
 */
struct DocumentationComment {
    enum class Type {
        DOXYLINE,    /// < line comment
        DOXYBLOCK,   /** */ block comment
        JAVADOC,     /** JavaDoc style */
        UNKNOWN
    };
    
    Type type = Type::UNKNOWN;
    std::string rawContent;
    std::string brief;
    std::string detailed;
    std::vector<std::string> tags;  // @param, @return, @brief, etc.
    SourceLocation location;
};

/**
 * @brief Base class for all AST nodes
 */
class ASTNode {
public:
    enum class NodeType {
        NAMESPACE,
        CLASS,
        STRUCT,
        FUNCTION,
        METHOD,
        VARIABLE,
        ENUM,
        ENUM_VALUE,
        TEMPLATE,
        MODULE,
        UNKNOWN
    };
    
protected:
    NodeType nodeType;
    std::string name;
    DocumentationComment documentation;
    SourceLocation location;
    std::vector<std::unique_ptr<ASTNode>> children;
    
public:
    explicit ASTNode(NodeType type) : nodeType(type) {}
    virtual ~ASTNode() = default;
    
    // Getters
    NodeType getType() const { return nodeType; }
    const std::string& getName() const { return name; }
    const DocumentationComment& getDocumentation() const { return documentation; }
    const SourceLocation& getLocation() const { return location; }
    const std::vector<std::unique_ptr<ASTNode>>& getChildren() const { return children; }
    
    // Setters
    void setName(const std::string& n) { name = n; }
    void setDocumentation(const DocumentationComment& doc) { documentation = doc; }
    void setLocation(const SourceLocation& loc) { location = loc; }
    
    // Child management
    void addChild(std::unique_ptr<ASTNode> child) { children.push_back(std::move(child)); }
    
    // Visitor pattern support
    virtual void accept(class ASTVisitor& visitor) = 0;
};

/**
 * @brief Namespace or module node
 */
class NamespaceNode : public ASTNode {
public:
    NamespaceNode() : ASTNode(NodeType::NAMESPACE) {}
    void accept(ASTVisitor& visitor) override;
};

/**
 * @brief Class or struct definition
 */
class ClassNode : public ASTNode {
private:
    bool isStruct = false;
    std::vector<std::string> baseClasses;
    std::vector<std::string> templateParameters;
    std::string accessSpecifier;  // public, protected, private
    
public:
    ClassNode() : ASTNode(NodeType::CLASS) {}
    
    bool getIsStruct() const { return isStruct; }
    void setIsStruct(bool s) { isStruct = s; }
    
    const std::vector<std::string>& getBaseClasses() const { return baseClasses; }
    void addBaseClass(const std::string& base) { baseClasses.push_back(base); }
    
    const std::vector<std::string>& getTemplateParameters() const { return templateParameters; }
    void addTemplateParameter(const std::string& param) { templateParameters.push_back(param); }
    
    const std::string& getAccessSpecifier() const { return accessSpecifier; }
    void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
    
    void accept(ASTVisitor& visitor) override;
};

/**
 * @brief Function or method parameter
 */
struct Parameter {
    std::string type;
    std::string name;
    std::string defaultValue;
    DocumentationComment documentation;
};

/**
 * @brief Function or method definition
 */
class FunctionNode : public ASTNode {
private:
    std::string returnType;
    std::vector<Parameter> parameters;
    bool isStatic = false;
    bool isVirtual = false;
    bool isConst = false;
    bool isConstructor = false;
    bool isDestructor = false;
    std::string accessSpecifier;
    
public:
    FunctionNode() : ASTNode(NodeType::FUNCTION) {}
    
    const std::string& getReturnType() const { return returnType; }
    void setReturnType(const std::string& type) { returnType = type; }
    
    const std::vector<Parameter>& getParameters() const { return parameters; }
    void addParameter(const Parameter& param) { parameters.push_back(param); }
    
    bool getIsStatic() const { return isStatic; }
    void setIsStatic(bool s) { isStatic = s; }
    
    bool getIsVirtual() const { return isVirtual; }
    void setIsVirtual(bool v) { isVirtual = v; }
    
    bool getIsConst() const { return isConst; }
    void setIsConst(bool c) { isConst = c; }
    
    bool getIsConstructor() const { return isConstructor; }
    void setIsConstructor(bool c) { isConstructor = c; }
    
    bool getIsDestructor() const { return isDestructor; }
    void setIsDestructor(bool d) { isDestructor = d; }
    
    const std::string& getAccessSpecifier() const { return accessSpecifier; }
    void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
    
    void accept(ASTVisitor& visitor) override;
};

/**
 * @brief Variable or field declaration
 */
class VariableNode : public ASTNode {
private:
    std::string type;
    std::string defaultValue;
    bool isStatic = false;
    bool isConst = false;
    std::string accessSpecifier;
    
public:
    VariableNode() : ASTNode(NodeType::VARIABLE) {}
    
    const std::string& getType() const { return type; }
    void setType(const std::string& t) { type = t; }
    
    const std::string& getDefaultValue() const { return defaultValue; }
    void setDefaultValue(const std::string& value) { defaultValue = value; }
    
    bool getIsStatic() const { return isStatic; }
    void setIsStatic(bool s) { isStatic = s; }
    
    bool getIsConst() const { return isConst; }
    void setIsConst(bool c) { isConst = c; }
    
    const std::string& getAccessSpecifier() const { return accessSpecifier; }
    void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
    
    void accept(ASTVisitor& visitor) override;
};

/**
 * @brief Enum definition
 */
class EnumNode : public ASTNode {
private:
    bool isEnumClass = false;
    std::string underlyingType;
    std::vector<std::pair<std::string, std::string>> values; // name, value
    
public:
    EnumNode() : ASTNode(NodeType::ENUM) {}
    
    bool getIsEnumClass() const { return isEnumClass; }
    void setIsEnumClass(bool e) { isEnumClass = e; }
    
    const std::string& getUnderlyingType() const { return underlyingType; }
    void setUnderlyingType(const std::string& type) { underlyingType = type; }
    
    const std::vector<std::pair<std::string, std::string>>& getValues() const { return values; }
    void addValue(const std::string& name, const std::string& value = "") {
        values.emplace_back(name, value);
    }
    
    void accept(ASTVisitor& visitor) override;
};

Visitor Pattern Implementation

// visitor.h
#pragma once

#include "ast_nodes.h"

/**
 * @brief Visitor interface for AST traversal
 */
class ASTVisitor {
public:
    virtual ~ASTVisitor() = default;
    
    // Node visitors
    virtual void visit(NamespaceNode& node) = 0;
    virtual void visit(ClassNode& node) = 0;
    virtual void visit(FunctionNode& node) = 0;
    virtual void visit(VariableNode& node) = 0;
    virtual void visit(EnumNode& node) = 0;
    
    // Default traversal behavior
    virtual void traverse(ASTNode& node) {
        for (auto& child : node.getChildren()) {
            child->accept(*this);
        }
    }
};

// Visitor method implementations
inline void NamespaceNode::accept(ASTVisitor& visitor) {
    visitor.visit(*this);
    visitor.traverse(*this);
}

inline void ClassNode::accept(ASTVisitor& visitor) {
    visitor.visit(*this);
    visitor.traverse(*this);
}

inline void FunctionNode::accept(ASTVisitor& visitor) {
    visitor.visit(*this);
    visitor.traverse(*this);
}

inline void VariableNode::accept(ASTVisitor& visitor) {
    visitor.visit(*this);
    visitor.traverse(*this);
}

inline void EnumNode::accept(ASTVisitor& visitor) {
    visitor.visit(*this);
    visitor.traverse(*this);
}

Documentation Generator

// documentation_generator.h
#pragma once

#include "ast_nodes.h"
#include "visitor.h"
#include <string>
#include <vector>
#include <fstream>
#include <filesystem>

/**
 * @brief Configuration for documentation generation
 */
struct DocumentationConfig {
    std::string outputDirectory = "docs";
    std::string indexTitle = "Documentation";
    std::string generatorName = "docs-parser";
    bool generateIndex = true;
    bool generateModuleIndexes = true;
    bool includePrivate = false;
    bool includeSourceLinks = false;
    std::string sourceRootPath;
    std::string theme = "material";  // material, github, etc.
};

/**
 * @brief Markdown documentation generator
 */
class DocumentationGenerator : public ASTVisitor {
private:
    DocumentationConfig config;
    std::filesystem::path outputPath;
    std::vector<std::string> moduleStack;
    std::ofstream currentFile;
    std::string currentContent;
    
    struct ModuleInfo {
        std::string name;
        std::string path;
        std::vector<std::string> classes;
        std::vector<std::string> functions;
        std::vector<std::string> submodules;
    };
    
    std::vector<ModuleInfo> modules;
    
public:
    explicit DocumentationGenerator(const DocumentationConfig& cfg);
    
    /**
     * @brief Generate documentation from parsed AST
     * @param nodes Root AST nodes from parser
     */
    void generate(const std::vector<std::unique_ptr<ASTNode>>& nodes);
    
    // Visitor methods
    void visit(NamespaceNode& node) override;
    void visit(ClassNode& node) override;
    void visit(FunctionNode& node) override;
    void visit(VariableNode& node) override;
    void visit(EnumNode& node) override;
    
private:
    // File operations
    void createDirectoryStructure();
    void openFile(const std::filesystem::path& filePath);
    void closeFile();
    void writeContent(const std::string& content);
    
    // Generation methods
    void generateIndex(const std::vector<std::unique_ptr<ASTNode>>& nodes);
    void generateModuleIndexes();
    void generateClassFile(ClassNode& classNode);
    void generateNamespaceFile(NamespaceNode& namespaceNode);
    
    // Content formatting
    std::string generateFunctionTable(const std::vector<FunctionNode*>& functions);
    std::string generateFunctionDetails(const std::vector<FunctionNode*>& functions);
    std::string generateClassHeader(ClassNode& classNode);
    std::string generateInheritanceDiagram(ClassNode& classNode);
    std::string formatDocumentation(const DocumentationComment& doc);
    std::string formatParameters(const std::vector<Parameter>& params);
    std::string escapeMarkdown(const std::string& text);
    
    // Utility methods
    std::string getModulePath(const std::string& moduleName);
    std::string getAnchorLink(const std::string& name);
    std::string getMaterialIcon(const std::string& type);
    std::string formatFunctionSignature(const FunctionNode& func);
    
    // Module management
    void enterModule(const std::string& moduleName);
    void exitModule();
    ModuleInfo& getCurrentModule();
    void addToCurrentModule(const std::string& type, const std::string& name);
};

C++ Parser Implementation Sketch

// cpp_parser.h
#pragma once

#include "parser_interface.h"
#include <regex>
#include <map>

/**
 * @brief C++ language parser
 */
class CppParser : public ILanguageParser {
private:
    // Token types
    enum class TokenType {
        KEYWORD,
        IDENTIFIER,
        SYMBOL,
        COMMENT,
        PREPROCESSOR,
        STRING_LITERAL,
        NUMBER,
        WHITESPACE,
        UNKNOWN
    };
    
    struct Token {
        TokenType type;
        std::string value;
        SourceLocation location;
    };
    
    // Comment tracking
    struct CommentInfo {
        DocumentationComment comment;
        bool attached = false;
    };
    
    std::map<std::pair<int, int>, CommentInfo> pendingComments;
    
public:
    std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) override;
    std::string getLanguage() const override { return "cpp"; }
    std::vector<std::string> getFileExtensions() const override {
        return {".cpp", ".h", ".hpp", ".cxx", ".cc", ".c"};
    }
    bool canParse(const std::string& filePath) const override;
    
private:
    // Tokenization
    std::vector<Token> tokenize(const std::string& source, const std::string& filePath);
    
    // Parsing methods
    std::vector<std::unique_ptr<ASTNode>> parseTokens(const std::vector<Token>& tokens);
    std::unique_ptr<ClassNode> parseClass(const std::vector<Token>& tokens, size_t& pos);
    std::unique_ptr<FunctionNode> parseFunction(const std::vector<Token>& tokens, size_t& pos);
    std::unique_ptr<NamespaceNode> parseNamespace(const std::vector<Token>& tokens, size_t& pos);
    std::unique_ptr<VariableNode> parseVariable(const std::vector<Token>& tokens, size_t& pos);
    std::unique_ptr<EnumNode> parseEnum(const std::vector<Token>& tokens, size_t& pos);
    
    // Comment processing
    void extractComments(const std::string& source);
    DocumentationComment parseComment(const std::string& content, SourceLocation location);
    void attachComments(ASTNode* node, int line);
    
    // Utility methods
    bool isKeyword(const std::string& token) const;
    bool isTypeQualifier(const std::string& token) const;
    bool isAccessSpecifier(const std::string& token) const;
    std::string parseQualifiedName(const std::vector<Token>& tokens, size_t& pos);
    Parameter parseParameter(const std::vector<Token>& tokens, size_t& pos);
    
    // Regular expressions
    std::regex commentRegex = std::regex(R"((/\*\*.*?\*/|///.*?$|//!.*?$))", std::regex::dotall);
    std::regex doxygenTagRegex = std::regex(R"(@(\w+)(?:\s+(.+?))?(?=\s+@|\s*$))");
};

Usage Example

// main.cpp
#include "cpp_parser.h"
#include "documentation_generator.h"

int main(int argc, char* argv[]) {
    if (argc < 3) {
        std::cerr << "Usage: docs-parser <input> <output>" << std::endl;
        return 1;
    }
    
    // Configure documentation generation
    DocumentationConfig config;
    config.outputDirectory = argv[2];
    config.indexTitle = "My Project Documentation";
    config.generatorName = "docs-parser";
    
    // Create C++ parser
    CppParser parser;
    DocumentationGenerator generator(config);
    
    // Parse source files
    std::vector<std::unique_ptr<ASTNode>> allNodes;
    std::vector<std::string> sourceFiles = findSourceFiles(argv[1], {"cpp", "h", "hpp"});
    
    for (const auto& file : sourceFiles) {
        auto nodes = parser.parseFile(file);
        for (auto& node : nodes) {
            allNodes.push_back(std::move(node));
        }
    }
    
    // Generate documentation
    generator.generate(allNodes);
    
    std::cout << "Documentation generated in " << config.outputDirectory << std::endl;
    return 0;
}

This technical specification provides the foundation for implementing a robust, extensible documentation parser with clean separation of concerns and a pluggable architecture for supporting multiple programming languages.