quick-start-implementation.md 18 KB

Quick Start Implementation Guide

Project Setup Commands

# Create project structure
mkdir docs-parser && cd docs-parser
mkdir -p src/{core,parsers,ast,utils} include tests examples config docs

# Initialize CMake project
cat > CMakeLists.txt << 'EOF'
cmake_minimum_required(VERSION 3.16)
project(DocsParser VERSION 1.0.0 LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Include directories
include_directories(include)

# Find required packages
find_package(GTest REQUIRED)

# Add subdirectories
add_subdirectory(src)
add_subdirectory(tests)

enable_testing()
EOF

# Create src CMakeLists.txt
cat > src/CMakeLists.txt << 'EOF'
# Core library sources
set(CORE_SOURCES
    core/parser_interface.cpp
    core/documentation_generator.cpp
    core/comment_parser.cpp
    core/config.cpp
)

# AST sources
set(AST_SOURCES
    ast/ast_nodes.cpp
    ast/visitor.cpp
)

# Parser sources
set(PARSER_SOURCES
    parsers/cpp_parser.cpp
)

# Utility sources
set(UTIL_SOURCES
    utils/file_utils.cpp
    utils/string_utils.cpp
    utils/markdown_utils.cpp
)

# Create static library
add_library(docs_parser_lib
    ${CORE_SOURCES}
    ${AST_SOURCES}
    ${PARSER_SOURCES}
    ${UTIL_SOURCES}
)

target_include_directories(docs_parser_lib PUBLIC include)

# Link required libraries
target_link_libraries(docs_parser_lib PRIVATE GTest::gtest)

# Create main executable
add_executable(docs_parser main.cpp)
target_link_libraries(docs_parser docs_parser_lib)
EOF

# Create tests CMakeLists.txt
cat > tests/CMakeLists.txt << 'EOF'
# Test sources
file(GLOB_RECURSE TEST_SOURCES "*.cpp")

# Create test executable
add_executable(run_tests ${TEST_SOURCES})
target_link_libraries(run_tests docs_parser_lib GTest::gtest_main GTest::gtest)

# Register tests
add_test(NAME AllTests COMMAND run_tests)
EOF

Core Implementation Files

1. Core Interface (include/parser_interface.h)

#pragma once

#include <memory>
#include <vector>
#include <string>

struct SourceLocation {
    std::string filePath;
    int line = 0;
    int column = 0;
};

class ASTNode {
public:
    enum class NodeType { CLASS, FUNCTION, NAMESPACE, UNKNOWN };
    
    virtual ~ASTNode() = default;
    virtual NodeType getType() const = 0;
    virtual std::string getName() const = 0;
    virtual std::string getDocumentation() const = 0;
    virtual SourceLocation getLocation() const = 0;
};

class ILanguageParser {
public:
    virtual ~ILanguageParser() = default;
    virtual std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) = 0;
    virtual std::string getLanguage() const = 0;
    virtual std::vector<std::string> getFileExtensions() const = 0;
    virtual bool canParse(const std::string& filePath) const = 0;
};

2. Simple C++ Parser (src/parsers/cpp_parser.cpp)

#include "parser_interface.h"
#include <regex>
#include <fstream>
#include <sstream>

class SimpleClassNode : public ASTNode {
private:
    std::string name;
    std::string documentation;
    SourceLocation location;
    std::vector<std::unique_ptr<ASTNode>> children;
    
public:
    SimpleClassNode(const std::string& n, const std::string& doc, SourceLocation loc)
        : name(n), documentation(doc), location(loc) {}
    
    NodeType getType() const override { return NodeType::CLASS; }
    std::string getName() const override { return name; }
    std::string getDocumentation() const override { return documentation; }
    SourceLocation getLocation() const override { return location; }
    
    void addChild(std::unique_ptr<ASTNode> child) {
        children.push_back(std::move(child));
    }
};

class SimpleFunctionNode : public ASTNode {
private:
    std::string name;
    std::string documentation;
    std::string signature;
    SourceLocation location;
    
public:
    SimpleFunctionNode(const std::string& n, const std::string& doc, 
                      const std::string& sig, SourceLocation loc)
        : name(n), documentation(doc), signature(sig), location(loc) {}
    
    NodeType getType() const override { return NodeType::FUNCTION; }
    std::string getName() const override { return name; }
    std::string getDocumentation() const override { return documentation; }
    SourceLocation getLocation() const override { return location; }
    
    const std::string& getSignature() const { return signature; }
};

class CppParser : public ILanguageParser {
public:
    std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) override {
        std::vector<std::unique_ptr<ASTNode>> nodes;
        
        std::ifstream file(filePath);
        if (!file.is_open()) return nodes;
        
        std::string content((std::istreambuf_iterator<char>(file)),
                           std::istreambuf_iterator<char>());
        
        // Extract comments first
        std::map<int, std::string> comments;
        extractComments(content, comments);
        
        // Simple regex parsing for classes and functions
        parseClasses(content, comments, nodes, filePath);
        parseFunctions(content, comments, nodes, filePath);
        
        return nodes;
    }
    
    std::string getLanguage() const override { return "cpp"; }
    
    std::vector<std::string> getFileExtensions() const override {
        return {".cpp", ".h", ".hpp", ".cxx", ".cc", ".c"};
    }
    
    bool canParse(const std::string& filePath) const override {
        std::string ext = filePath.substr(filePath.find_last_of('.'));
        auto extensions = getFileExtensions();
        return std::find(extensions.begin(), extensions.end(), ext) != extensions.end();
    }
    
private:
    void extractComments(const std::string& content, std::map<int, std::string>& comments) {
        std::regex commentRegex(R"((/\*\*.*?\*/|///.*?$|//!.*?$))", std::regex::dotall);
        std::sregex_iterator iter(content.begin(), content.end(), commentRegex);
        std::sregex_iterator end;
        
        std::istringstream stream(content);
        std::string line;
        int lineNum = 0;
        
        while (std::getline(stream, line)) {
            lineNum++;
            for (auto it = iter; it != end; ++it) {
                std::smatch match = *it;
                if (match.position() < line.length() + stream.tellg()) {
                    std::string comment = match.str();
                    if (comment.find("/**") == 0 || comment.find("///") == 0 || comment.find("//!") == 0) {
                        // Clean up comment
                        std::string clean = std::regex_replace(comment, std::regex(R"(/\*\*|\*/|///|//!|\*)"), "");
                        clean = std::regex_replace(clean, std::regex(R"(^\s+)"), "");
                        comments[lineNum] = clean;
                    }
                }
            }
        }
    }
    
    void parseClasses(const std::string& content, const std::map<int, std::string>& comments,
                     std::vector<std::unique_ptr<ASTNode>>& nodes, const std::string& filePath) {
        std::regex classRegex(R"(class\s+(\w+)[^{]*\{)");
        std::sregex_iterator iter(content.begin(), content.end(), classRegex);
        std::sregex_iterator end;
        
        for (auto it = iter; it != end; ++it) {
            std::smatch match = *it;
            std::string className = match[1].str();
            
            // Find line number
            std::string before = content.substr(0, match.position());
            int line = std::count(before.begin(), before.end(), '\n') + 1;
            
            // Find preceding comment
            std::string doc;
            auto commentIt = comments.find(line - 1);
            if (commentIt != comments.end()) {
                doc = commentIt->second;
            }
            
            SourceLocation loc{filePath, line};
            nodes.push_back(std::make_unique<SimpleClassNode>(className, doc, loc));
        }
    }
    
    void parseFunctions(const std::string& content, const std::map<int, std::string>& comments,
                       std::vector<std::unique_ptr<ASTNode>>& nodes, const std::string& filePath) {
        std::regex functionRegex(R"((\w+\s+)*(\w+)\s*\([^)]*\)\s*(?:\{|;))");
        std::sregex_iterator iter(content.begin(), content.end(), functionRegex);
        std::sregex_iterator end;
        
        for (auto it = iter; it != end; ++it) {
            std::smatch match = *it;
            std::string functionName = match[2].str();
            std::string signature = match.str();
            
            // Skip class definitions
            if (signature.find("class ") == 0) continue;
            
            // Find line number
            std::string before = content.substr(0, match.position());
            int line = std::count(before.begin(), before.end(), '\n') + 1;
            
            // Find preceding comment
            std::string doc;
            auto commentIt = comments.find(line - 1);
            if (commentIt != comments.end()) {
                doc = commentIt->second;
            }
            
            SourceLocation loc{filePath, line};
            nodes.push_back(std::make_unique<SimpleFunctionNode>(functionName, doc, signature, loc));
        }
    }
};

3. Documentation Generator (src/core/documentation_generator.cpp)

#include "parser_interface.h"
#include <fstream>
#include <filesystem>
#include <iostream>

struct DocumentationConfig {
    std::string outputDirectory = "docs";
    std::string indexTitle = "Documentation";
    std::string generatorName = "docs-parser";
};

class DocumentationGenerator {
private:
    DocumentationConfig config;
    std::string currentContent;
    
public:
    explicit DocumentationGenerator(const DocumentationConfig& cfg) : config(cfg) {}
    
    void generate(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
        // Create output directory
        std::filesystem::create_directories(config.outputDirectory);
        
        // Generate main index
        generateIndex(nodes);
        
        // Generate individual files for classes
        generateClassFiles(nodes);
        
        // Generate module organization
        generateModuleStructure(nodes);
    }
    
private:
    void generateIndex(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
        std::string content = R"(---
generator: )" + config.generatorName + R"(---

# )" + config.indexTitle + R"(

)";
        
        content += generateModuleList(nodes);
        
        std::ofstream indexFile(config.outputDirectory + "/index.md");
        indexFile << content;
        indexFile.close();
    }
    
    std::string generateModuleList(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
        std::stringstream ss;
        
        // Group classes and functions
        std::vector<ASTNode*> classes;
        std::vector<ASTNode*> functions;
        
        for (const auto& node : nodes) {
            if (node->getType() == ASTNode::NodeType::CLASS) {
                classes.push_back(node.get());
            } else if (node->getType() == ASTNode::NodeType::FUNCTION) {
                functions.push_back(node.get());
            }
        }
        
        if (!classes.empty()) {
            ss << "## Classes\n\n";
            for (const auto* cls : classes) {
                ss << "- [" << cls->getName() << "](" << cls->getName() << ".md)";
                if (!cls->getDocumentation().empty()) {
                    ss << " - " << extractBrief(cls->getDocumentation());
                }
                ss << "\n";
            }
            ss << "\n";
        }
        
        if (!functions.empty()) {
            ss << "## Functions\n\n";
            for (const auto* func : functions) {
                ss << "- [" << func->getName() << "](" << func->getName() << ".md)";
                if (!func->getDocumentation().empty()) {
                    ss << " - " << extractBrief(func->getDocumentation());
                }
                ss << "\n";
            }
            ss << "\n";
        }
        
        return ss.str();
    }
    
    void generateClassFiles(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
        for (const auto& node : nodes) {
            if (node->getType() == ASTNode::NodeType::CLASS) {
                generateClassFile(static_cast<SimpleClassNode*>(node.get()));
            } else if (node->getType() == ASTNode::NodeType::FUNCTION) {
                generateFunctionFile(static_cast<SimpleFunctionNode*>(node.get()));
            }
        }
    }
    
    void generateClassFile(const SimpleClassNode* classNode) {
        std::string filename = config.outputDirectory + "/" + classNode->getName() + ".md";
        std::ofstream file(filename);
        
        file << R"(---
generator: )" << config.generatorName << R"(---

# )" << classNode->getName() << R"(

**class )" << classNode->getName() << R"(**

)";
        
        if (!classNode->getDocumentation().empty()) {
            file << formatDocumentation(classNode->getDocumentation()) << "\n\n";
        }
        
        file << "## Source Location\n\n";
        file << "File: `" << classNode->getLocation().filePath;
        file << "`:" << classNode->getLocation().line << "\n\n";
        
        file.close();
    }
    
    void generateFunctionFile(const SimpleFunctionNode* funcNode) {
        std::string filename = config.outputDirectory + "/" + funcNode->getName() + ".md";
        std::ofstream file(filename);
        
        file << R"(---
generator: )" << config.generatorName << R"(---

# )" << funcNode->getName() << R"(

**function )" << funcNode->getName() << R"(**

)";
        
        if (!funcNode->getDocumentation().empty()) {
            file << formatDocumentation(funcNode->getDocumentation()) << "\n\n";
        }
        
        file << "## Signature\n\n";
        file << "```cpp\n" << funcNode->getSignature() << "\n```\n\n";
        
        file << "## Source Location\n\n";
        file << "File: `" << funcNode->getLocation().filePath;
        file << "`:" << funcNode->getLocation().line << "\n\n";
        
        file.close();
    }
    
    void generateModuleStructure(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
        // For now, just put everything in the root directory
        // Later we'll organize this into subdirectories based on namespaces
    }
    
    std::string extractBrief(const std::string& doc) {
        // Extract first sentence as brief description
        size_t pos = doc.find('.');
        if (pos != std::string::npos && pos < 100) {
            return doc.substr(0, pos + 1);
        }
        return doc.length() > 80 ? doc.substr(0, 77) + "..." : doc;
    }
    
    std::string formatDocumentation(const std::string& doc) {
        std::string formatted = doc;
        
        // Replace @brief and similar tags
        formatted = std::regex_replace(formatted, std::regex(R"(@brief\s+)"), "");
        
        // Format paragraphs
        formatted = std::regex_replace(formatted, std::regex(R"(\n\s*\n)"), "\n\n");
        
        return formatted;
    }
};

4. Main Application (main.cpp)

#include "parser_interface.h"
#include <iostream>
#include <filesystem>
#include <vector>

// Forward declarations
std::unique_ptr<ILanguageParser> createCppParser();

int main(int argc, char* argv[]) {
    if (argc < 2) {
        std::cerr << "Usage: docs-parser <input-path> [output-path]" << std::endl;
        return 1;
    }
    
    std::string inputPath = argv[1];
    std::string outputPath = argc > 2 ? argv[2] : "docs";
    
    // Configuration
    DocumentationConfig config;
    config.outputDirectory = outputPath;
    config.indexTitle = "Source Code Documentation";
    config.generatorName = "docs-parser";
    
    // Create parser
    auto parser = createCppParser();
    
    // Find source files
    std::vector<std::string> sourceFiles;
    for (const auto& entry : std::filesystem::recursive_directory_iterator(inputPath)) {
        if (entry.is_regular_file() && parser->canParse(entry.path().string())) {
            sourceFiles.push_back(entry.path().string());
        }
    }
    
    std::cout << "Found " << sourceFiles.size() << " source files" << std::endl;
    
    // Parse all files
    std::vector<std::unique_ptr<ASTNode>> allNodes;
    for (const auto& file : sourceFiles) {
        std::cout << "Parsing: " << file << std::endl;
        auto nodes = parser->parseFile(file);
        for (auto& node : nodes) {
            allNodes.push_back(std::move(node));
        }
    }
    
    std::cout << "Found " << allNodes.size() << " code elements" << std::endl;
    
    // Generate documentation
    DocumentationGenerator generator(config);
    generator.generate(allNodes);
    
    std::cout << "Documentation generated in: " << outputPath << std::endl;
    return 0;
}

// Factory function
std::unique_ptr<ILanguageParser> createCppParser() {
    return std::make_unique<CppParser>();
}

Build and Run Instructions

# Build the project
mkdir build && cd build
cmake ..
make

# Run on example C++ code
./docs_parser /path/to/cpp/source ./output/docs

# Test on the TUI project
./docs_parser /data/tui/src ./tui-docs

Test Example File

Create test_example.cpp:

#include <iostream>

/**
 * @brief A simple calculator class
 * This class provides basic arithmetic operations.
 */
class Calculator {
public:
    /// @brief Add two numbers
    int add(int a, int b);
    
    /**
     * @brief Multiply two numbers
     * @param a First number
     * @param b Second number
     * @return Product of a and b
     */
    int multiply(int a, int b);
};

/**
 * @brief Global utility function
 * This function demonstrates global documentation.
 */
void utilityFunction() {
    std::cout << "Utility function called" << std::endl;
}

Run the parser:

./docs_parser . ./test-docs

Expected output structure:

test-docs/
├── index.md
├── Calculator.md
└── utilityFunction.md

This minimal implementation provides a working foundation that can be extended with more sophisticated parsing, better documentation extraction, and enhanced formatting to match the exact style of the /data/tui/docs structure.