// parser_interface.h
#pragma once
#include <memory>
#include <vector>
#include <string>
// Forward declarations
class ASTNode;
struct SourceLocation;
/**
* @brief Interface for language-specific parsers
*
* Each language parser implements this interface to provide
* language-agnostic access to parsed source code structures.
*/
class ILanguageParser {
public:
virtual ~ILanguageParser() = default;
/**
* @brief Parse a single source file and return AST nodes
* @param filePath Path to the source file
* @return Vector of AST nodes representing the file contents
*/
virtual std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) = 0;
/**
* @brief Get the programming language name
* @return String identifier for the language
*/
virtual std::string getLanguage() const = 0;
/**
* @brief Get file extensions supported by this parser
* @return Vector of file extensions (e.g., {".cpp", ".h", ".hpp"})
*/
virtual std::vector<std::string> getFileExtensions() const = 0;
/**
* @brief Check if a file can be parsed by this parser
* @param filePath Path to check
* @return true if the file extension is supported
*/
virtual bool canParse(const std::string& filePath) const = 0;
};
// ast_nodes.h
#pragma once
#include <string>
#include <vector>
#include <memory>
#include <optional>
/**
* @brief Source code location information
*/
struct SourceLocation {
std::string filePath;
int line = 0;
int column = 0;
int endLine = 0;
int endColumn = 0;
};
/**
* @brief Documentation comment with tags
*/
struct DocumentationComment {
enum class Type {
DOXYLINE, /// < line comment
DOXYBLOCK, /** */ block comment
JAVADOC, /** JavaDoc style */
UNKNOWN
};
Type type = Type::UNKNOWN;
std::string rawContent;
std::string brief;
std::string detailed;
std::vector<std::string> tags; // @param, @return, @brief, etc.
SourceLocation location;
};
/**
* @brief Base class for all AST nodes
*/
class ASTNode {
public:
enum class NodeType {
NAMESPACE,
CLASS,
STRUCT,
FUNCTION,
METHOD,
VARIABLE,
ENUM,
ENUM_VALUE,
TEMPLATE,
MODULE,
UNKNOWN
};
protected:
NodeType nodeType;
std::string name;
DocumentationComment documentation;
SourceLocation location;
std::vector<std::unique_ptr<ASTNode>> children;
public:
explicit ASTNode(NodeType type) : nodeType(type) {}
virtual ~ASTNode() = default;
// Getters
NodeType getType() const { return nodeType; }
const std::string& getName() const { return name; }
const DocumentationComment& getDocumentation() const { return documentation; }
const SourceLocation& getLocation() const { return location; }
const std::vector<std::unique_ptr<ASTNode>>& getChildren() const { return children; }
// Setters
void setName(const std::string& n) { name = n; }
void setDocumentation(const DocumentationComment& doc) { documentation = doc; }
void setLocation(const SourceLocation& loc) { location = loc; }
// Child management
void addChild(std::unique_ptr<ASTNode> child) { children.push_back(std::move(child)); }
// Visitor pattern support
virtual void accept(class ASTVisitor& visitor) = 0;
};
/**
* @brief Namespace or module node
*/
class NamespaceNode : public ASTNode {
public:
NamespaceNode() : ASTNode(NodeType::NAMESPACE) {}
void accept(ASTVisitor& visitor) override;
};
/**
* @brief Class or struct definition
*/
class ClassNode : public ASTNode {
private:
bool isStruct = false;
std::vector<std::string> baseClasses;
std::vector<std::string> templateParameters;
std::string accessSpecifier; // public, protected, private
public:
ClassNode() : ASTNode(NodeType::CLASS) {}
bool getIsStruct() const { return isStruct; }
void setIsStruct(bool s) { isStruct = s; }
const std::vector<std::string>& getBaseClasses() const { return baseClasses; }
void addBaseClass(const std::string& base) { baseClasses.push_back(base); }
const std::vector<std::string>& getTemplateParameters() const { return templateParameters; }
void addTemplateParameter(const std::string& param) { templateParameters.push_back(param); }
const std::string& getAccessSpecifier() const { return accessSpecifier; }
void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
void accept(ASTVisitor& visitor) override;
};
/**
* @brief Function or method parameter
*/
struct Parameter {
std::string type;
std::string name;
std::string defaultValue;
DocumentationComment documentation;
};
/**
* @brief Function or method definition
*/
class FunctionNode : public ASTNode {
private:
std::string returnType;
std::vector<Parameter> parameters;
bool isStatic = false;
bool isVirtual = false;
bool isConst = false;
bool isConstructor = false;
bool isDestructor = false;
std::string accessSpecifier;
public:
FunctionNode() : ASTNode(NodeType::FUNCTION) {}
const std::string& getReturnType() const { return returnType; }
void setReturnType(const std::string& type) { returnType = type; }
const std::vector<Parameter>& getParameters() const { return parameters; }
void addParameter(const Parameter& param) { parameters.push_back(param); }
bool getIsStatic() const { return isStatic; }
void setIsStatic(bool s) { isStatic = s; }
bool getIsVirtual() const { return isVirtual; }
void setIsVirtual(bool v) { isVirtual = v; }
bool getIsConst() const { return isConst; }
void setIsConst(bool c) { isConst = c; }
bool getIsConstructor() const { return isConstructor; }
void setIsConstructor(bool c) { isConstructor = c; }
bool getIsDestructor() const { return isDestructor; }
void setIsDestructor(bool d) { isDestructor = d; }
const std::string& getAccessSpecifier() const { return accessSpecifier; }
void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
void accept(ASTVisitor& visitor) override;
};
/**
* @brief Variable or field declaration
*/
class VariableNode : public ASTNode {
private:
std::string type;
std::string defaultValue;
bool isStatic = false;
bool isConst = false;
std::string accessSpecifier;
public:
VariableNode() : ASTNode(NodeType::VARIABLE) {}
const std::string& getType() const { return type; }
void setType(const std::string& t) { type = t; }
const std::string& getDefaultValue() const { return defaultValue; }
void setDefaultValue(const std::string& value) { defaultValue = value; }
bool getIsStatic() const { return isStatic; }
void setIsStatic(bool s) { isStatic = s; }
bool getIsConst() const { return isConst; }
void setIsConst(bool c) { isConst = c; }
const std::string& getAccessSpecifier() const { return accessSpecifier; }
void setAccessSpecifier(const std::string& access) { accessSpecifier = access; }
void accept(ASTVisitor& visitor) override;
};
/**
* @brief Enum definition
*/
class EnumNode : public ASTNode {
private:
bool isEnumClass = false;
std::string underlyingType;
std::vector<std::pair<std::string, std::string>> values; // name, value
public:
EnumNode() : ASTNode(NodeType::ENUM) {}
bool getIsEnumClass() const { return isEnumClass; }
void setIsEnumClass(bool e) { isEnumClass = e; }
const std::string& getUnderlyingType() const { return underlyingType; }
void setUnderlyingType(const std::string& type) { underlyingType = type; }
const std::vector<std::pair<std::string, std::string>>& getValues() const { return values; }
void addValue(const std::string& name, const std::string& value = "") {
values.emplace_back(name, value);
}
void accept(ASTVisitor& visitor) override;
};
// visitor.h
#pragma once
#include "ast_nodes.h"
/**
* @brief Visitor interface for AST traversal
*/
class ASTVisitor {
public:
virtual ~ASTVisitor() = default;
// Node visitors
virtual void visit(NamespaceNode& node) = 0;
virtual void visit(ClassNode& node) = 0;
virtual void visit(FunctionNode& node) = 0;
virtual void visit(VariableNode& node) = 0;
virtual void visit(EnumNode& node) = 0;
// Default traversal behavior
virtual void traverse(ASTNode& node) {
for (auto& child : node.getChildren()) {
child->accept(*this);
}
}
};
// Visitor method implementations
inline void NamespaceNode::accept(ASTVisitor& visitor) {
visitor.visit(*this);
visitor.traverse(*this);
}
inline void ClassNode::accept(ASTVisitor& visitor) {
visitor.visit(*this);
visitor.traverse(*this);
}
inline void FunctionNode::accept(ASTVisitor& visitor) {
visitor.visit(*this);
visitor.traverse(*this);
}
inline void VariableNode::accept(ASTVisitor& visitor) {
visitor.visit(*this);
visitor.traverse(*this);
}
inline void EnumNode::accept(ASTVisitor& visitor) {
visitor.visit(*this);
visitor.traverse(*this);
}
// documentation_generator.h
#pragma once
#include "ast_nodes.h"
#include "visitor.h"
#include <string>
#include <vector>
#include <fstream>
#include <filesystem>
/**
* @brief Configuration for documentation generation
*/
struct DocumentationConfig {
std::string outputDirectory = "docs";
std::string indexTitle = "Documentation";
std::string generatorName = "docs-parser";
bool generateIndex = true;
bool generateModuleIndexes = true;
bool includePrivate = false;
bool includeSourceLinks = false;
std::string sourceRootPath;
std::string theme = "material"; // material, github, etc.
};
/**
* @brief Markdown documentation generator
*/
class DocumentationGenerator : public ASTVisitor {
private:
DocumentationConfig config;
std::filesystem::path outputPath;
std::vector<std::string> moduleStack;
std::ofstream currentFile;
std::string currentContent;
struct ModuleInfo {
std::string name;
std::string path;
std::vector<std::string> classes;
std::vector<std::string> functions;
std::vector<std::string> submodules;
};
std::vector<ModuleInfo> modules;
public:
explicit DocumentationGenerator(const DocumentationConfig& cfg);
/**
* @brief Generate documentation from parsed AST
* @param nodes Root AST nodes from parser
*/
void generate(const std::vector<std::unique_ptr<ASTNode>>& nodes);
// Visitor methods
void visit(NamespaceNode& node) override;
void visit(ClassNode& node) override;
void visit(FunctionNode& node) override;
void visit(VariableNode& node) override;
void visit(EnumNode& node) override;
private:
// File operations
void createDirectoryStructure();
void openFile(const std::filesystem::path& filePath);
void closeFile();
void writeContent(const std::string& content);
// Generation methods
void generateIndex(const std::vector<std::unique_ptr<ASTNode>>& nodes);
void generateModuleIndexes();
void generateClassFile(ClassNode& classNode);
void generateNamespaceFile(NamespaceNode& namespaceNode);
// Content formatting
std::string generateFunctionTable(const std::vector<FunctionNode*>& functions);
std::string generateFunctionDetails(const std::vector<FunctionNode*>& functions);
std::string generateClassHeader(ClassNode& classNode);
std::string generateInheritanceDiagram(ClassNode& classNode);
std::string formatDocumentation(const DocumentationComment& doc);
std::string formatParameters(const std::vector<Parameter>& params);
std::string escapeMarkdown(const std::string& text);
// Utility methods
std::string getModulePath(const std::string& moduleName);
std::string getAnchorLink(const std::string& name);
std::string getMaterialIcon(const std::string& type);
std::string formatFunctionSignature(const FunctionNode& func);
// Module management
void enterModule(const std::string& moduleName);
void exitModule();
ModuleInfo& getCurrentModule();
void addToCurrentModule(const std::string& type, const std::string& name);
};
// cpp_parser.h
#pragma once
#include "parser_interface.h"
#include <regex>
#include <map>
/**
* @brief C++ language parser
*/
class CppParser : public ILanguageParser {
private:
// Token types
enum class TokenType {
KEYWORD,
IDENTIFIER,
SYMBOL,
COMMENT,
PREPROCESSOR,
STRING_LITERAL,
NUMBER,
WHITESPACE,
UNKNOWN
};
struct Token {
TokenType type;
std::string value;
SourceLocation location;
};
// Comment tracking
struct CommentInfo {
DocumentationComment comment;
bool attached = false;
};
std::map<std::pair<int, int>, CommentInfo> pendingComments;
public:
std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) override;
std::string getLanguage() const override { return "cpp"; }
std::vector<std::string> getFileExtensions() const override {
return {".cpp", ".h", ".hpp", ".cxx", ".cc", ".c"};
}
bool canParse(const std::string& filePath) const override;
private:
// Tokenization
std::vector<Token> tokenize(const std::string& source, const std::string& filePath);
// Parsing methods
std::vector<std::unique_ptr<ASTNode>> parseTokens(const std::vector<Token>& tokens);
std::unique_ptr<ClassNode> parseClass(const std::vector<Token>& tokens, size_t& pos);
std::unique_ptr<FunctionNode> parseFunction(const std::vector<Token>& tokens, size_t& pos);
std::unique_ptr<NamespaceNode> parseNamespace(const std::vector<Token>& tokens, size_t& pos);
std::unique_ptr<VariableNode> parseVariable(const std::vector<Token>& tokens, size_t& pos);
std::unique_ptr<EnumNode> parseEnum(const std::vector<Token>& tokens, size_t& pos);
// Comment processing
void extractComments(const std::string& source);
DocumentationComment parseComment(const std::string& content, SourceLocation location);
void attachComments(ASTNode* node, int line);
// Utility methods
bool isKeyword(const std::string& token) const;
bool isTypeQualifier(const std::string& token) const;
bool isAccessSpecifier(const std::string& token) const;
std::string parseQualifiedName(const std::vector<Token>& tokens, size_t& pos);
Parameter parseParameter(const std::vector<Token>& tokens, size_t& pos);
// Regular expressions
std::regex commentRegex = std::regex(R"((/\*\*.*?\*/|///.*?$|//!.*?$))", std::regex::dotall);
std::regex doxygenTagRegex = std::regex(R"(@(\w+)(?:\s+(.+?))?(?=\s+@|\s*$))");
};
// main.cpp
#include "cpp_parser.h"
#include "documentation_generator.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
std::cerr << "Usage: docs-parser <input> <output>" << std::endl;
return 1;
}
// Configure documentation generation
DocumentationConfig config;
config.outputDirectory = argv[2];
config.indexTitle = "My Project Documentation";
config.generatorName = "docs-parser";
// Create C++ parser
CppParser parser;
DocumentationGenerator generator(config);
// Parse source files
std::vector<std::unique_ptr<ASTNode>> allNodes;
std::vector<std::string> sourceFiles = findSourceFiles(argv[1], {"cpp", "h", "hpp"});
for (const auto& file : sourceFiles) {
auto nodes = parser.parseFile(file);
for (auto& node : nodes) {
allNodes.push_back(std::move(node));
}
}
// Generate documentation
generator.generate(allNodes);
std::cout << "Documentation generated in " << config.outputDirectory << std::endl;
return 0;
}
This technical specification provides the foundation for implementing a robust, extensible documentation parser with clean separation of concerns and a pluggable architecture for supporting multiple programming languages.