# Create project structure
mkdir docs-parser && cd docs-parser
mkdir -p src/{core,parsers,ast,utils} include tests examples config docs
# Initialize CMake project
cat > CMakeLists.txt << 'EOF'
cmake_minimum_required(VERSION 3.16)
project(DocsParser VERSION 1.0.0 LANGUAGES CXX)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# Include directories
include_directories(include)
# Find required packages
find_package(GTest REQUIRED)
# Add subdirectories
add_subdirectory(src)
add_subdirectory(tests)
enable_testing()
EOF
# Create src CMakeLists.txt
cat > src/CMakeLists.txt << 'EOF'
# Core library sources
set(CORE_SOURCES
core/parser_interface.cpp
core/documentation_generator.cpp
core/comment_parser.cpp
core/config.cpp
)
# AST sources
set(AST_SOURCES
ast/ast_nodes.cpp
ast/visitor.cpp
)
# Parser sources
set(PARSER_SOURCES
parsers/cpp_parser.cpp
)
# Utility sources
set(UTIL_SOURCES
utils/file_utils.cpp
utils/string_utils.cpp
utils/markdown_utils.cpp
)
# Create static library
add_library(docs_parser_lib
${CORE_SOURCES}
${AST_SOURCES}
${PARSER_SOURCES}
${UTIL_SOURCES}
)
target_include_directories(docs_parser_lib PUBLIC include)
# Link required libraries
target_link_libraries(docs_parser_lib PRIVATE GTest::gtest)
# Create main executable
add_executable(docs_parser main.cpp)
target_link_libraries(docs_parser docs_parser_lib)
EOF
# Create tests CMakeLists.txt
cat > tests/CMakeLists.txt << 'EOF'
# Test sources
file(GLOB_RECURSE TEST_SOURCES "*.cpp")
# Create test executable
add_executable(run_tests ${TEST_SOURCES})
target_link_libraries(run_tests docs_parser_lib GTest::gtest_main GTest::gtest)
# Register tests
add_test(NAME AllTests COMMAND run_tests)
EOF
include/parser_interface.h)#pragma once
#include <memory>
#include <vector>
#include <string>
struct SourceLocation {
std::string filePath;
int line = 0;
int column = 0;
};
class ASTNode {
public:
enum class NodeType { CLASS, FUNCTION, NAMESPACE, UNKNOWN };
virtual ~ASTNode() = default;
virtual NodeType getType() const = 0;
virtual std::string getName() const = 0;
virtual std::string getDocumentation() const = 0;
virtual SourceLocation getLocation() const = 0;
};
class ILanguageParser {
public:
virtual ~ILanguageParser() = default;
virtual std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) = 0;
virtual std::string getLanguage() const = 0;
virtual std::vector<std::string> getFileExtensions() const = 0;
virtual bool canParse(const std::string& filePath) const = 0;
};
src/parsers/cpp_parser.cpp)#include "parser_interface.h"
#include <regex>
#include <fstream>
#include <sstream>
class SimpleClassNode : public ASTNode {
private:
std::string name;
std::string documentation;
SourceLocation location;
std::vector<std::unique_ptr<ASTNode>> children;
public:
SimpleClassNode(const std::string& n, const std::string& doc, SourceLocation loc)
: name(n), documentation(doc), location(loc) {}
NodeType getType() const override { return NodeType::CLASS; }
std::string getName() const override { return name; }
std::string getDocumentation() const override { return documentation; }
SourceLocation getLocation() const override { return location; }
void addChild(std::unique_ptr<ASTNode> child) {
children.push_back(std::move(child));
}
};
class SimpleFunctionNode : public ASTNode {
private:
std::string name;
std::string documentation;
std::string signature;
SourceLocation location;
public:
SimpleFunctionNode(const std::string& n, const std::string& doc,
const std::string& sig, SourceLocation loc)
: name(n), documentation(doc), signature(sig), location(loc) {}
NodeType getType() const override { return NodeType::FUNCTION; }
std::string getName() const override { return name; }
std::string getDocumentation() const override { return documentation; }
SourceLocation getLocation() const override { return location; }
const std::string& getSignature() const { return signature; }
};
class CppParser : public ILanguageParser {
public:
std::vector<std::unique_ptr<ASTNode>> parseFile(const std::string& filePath) override {
std::vector<std::unique_ptr<ASTNode>> nodes;
std::ifstream file(filePath);
if (!file.is_open()) return nodes;
std::string content((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
// Extract comments first
std::map<int, std::string> comments;
extractComments(content, comments);
// Simple regex parsing for classes and functions
parseClasses(content, comments, nodes, filePath);
parseFunctions(content, comments, nodes, filePath);
return nodes;
}
std::string getLanguage() const override { return "cpp"; }
std::vector<std::string> getFileExtensions() const override {
return {".cpp", ".h", ".hpp", ".cxx", ".cc", ".c"};
}
bool canParse(const std::string& filePath) const override {
std::string ext = filePath.substr(filePath.find_last_of('.'));
auto extensions = getFileExtensions();
return std::find(extensions.begin(), extensions.end(), ext) != extensions.end();
}
private:
void extractComments(const std::string& content, std::map<int, std::string>& comments) {
std::regex commentRegex(R"((/\*\*.*?\*/|///.*?$|//!.*?$))", std::regex::dotall);
std::sregex_iterator iter(content.begin(), content.end(), commentRegex);
std::sregex_iterator end;
std::istringstream stream(content);
std::string line;
int lineNum = 0;
while (std::getline(stream, line)) {
lineNum++;
for (auto it = iter; it != end; ++it) {
std::smatch match = *it;
if (match.position() < line.length() + stream.tellg()) {
std::string comment = match.str();
if (comment.find("/**") == 0 || comment.find("///") == 0 || comment.find("//!") == 0) {
// Clean up comment
std::string clean = std::regex_replace(comment, std::regex(R"(/\*\*|\*/|///|//!|\*)"), "");
clean = std::regex_replace(clean, std::regex(R"(^\s+)"), "");
comments[lineNum] = clean;
}
}
}
}
}
void parseClasses(const std::string& content, const std::map<int, std::string>& comments,
std::vector<std::unique_ptr<ASTNode>>& nodes, const std::string& filePath) {
std::regex classRegex(R"(class\s+(\w+)[^{]*\{)");
std::sregex_iterator iter(content.begin(), content.end(), classRegex);
std::sregex_iterator end;
for (auto it = iter; it != end; ++it) {
std::smatch match = *it;
std::string className = match[1].str();
// Find line number
std::string before = content.substr(0, match.position());
int line = std::count(before.begin(), before.end(), '\n') + 1;
// Find preceding comment
std::string doc;
auto commentIt = comments.find(line - 1);
if (commentIt != comments.end()) {
doc = commentIt->second;
}
SourceLocation loc{filePath, line};
nodes.push_back(std::make_unique<SimpleClassNode>(className, doc, loc));
}
}
void parseFunctions(const std::string& content, const std::map<int, std::string>& comments,
std::vector<std::unique_ptr<ASTNode>>& nodes, const std::string& filePath) {
std::regex functionRegex(R"((\w+\s+)*(\w+)\s*\([^)]*\)\s*(?:\{|;))");
std::sregex_iterator iter(content.begin(), content.end(), functionRegex);
std::sregex_iterator end;
for (auto it = iter; it != end; ++it) {
std::smatch match = *it;
std::string functionName = match[2].str();
std::string signature = match.str();
// Skip class definitions
if (signature.find("class ") == 0) continue;
// Find line number
std::string before = content.substr(0, match.position());
int line = std::count(before.begin(), before.end(), '\n') + 1;
// Find preceding comment
std::string doc;
auto commentIt = comments.find(line - 1);
if (commentIt != comments.end()) {
doc = commentIt->second;
}
SourceLocation loc{filePath, line};
nodes.push_back(std::make_unique<SimpleFunctionNode>(functionName, doc, signature, loc));
}
}
};
src/core/documentation_generator.cpp)#include "parser_interface.h"
#include <fstream>
#include <filesystem>
#include <iostream>
struct DocumentationConfig {
std::string outputDirectory = "docs";
std::string indexTitle = "Documentation";
std::string generatorName = "docs-parser";
};
class DocumentationGenerator {
private:
DocumentationConfig config;
std::string currentContent;
public:
explicit DocumentationGenerator(const DocumentationConfig& cfg) : config(cfg) {}
void generate(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
// Create output directory
std::filesystem::create_directories(config.outputDirectory);
// Generate main index
generateIndex(nodes);
// Generate individual files for classes
generateClassFiles(nodes);
// Generate module organization
generateModuleStructure(nodes);
}
private:
void generateIndex(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
std::string content = R"(---
generator: )" + config.generatorName + R"(---
# )" + config.indexTitle + R"(
)";
content += generateModuleList(nodes);
std::ofstream indexFile(config.outputDirectory + "/index.md");
indexFile << content;
indexFile.close();
}
std::string generateModuleList(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
std::stringstream ss;
// Group classes and functions
std::vector<ASTNode*> classes;
std::vector<ASTNode*> functions;
for (const auto& node : nodes) {
if (node->getType() == ASTNode::NodeType::CLASS) {
classes.push_back(node.get());
} else if (node->getType() == ASTNode::NodeType::FUNCTION) {
functions.push_back(node.get());
}
}
if (!classes.empty()) {
ss << "## Classes\n\n";
for (const auto* cls : classes) {
ss << "- [" << cls->getName() << "](" << cls->getName() << ".md)";
if (!cls->getDocumentation().empty()) {
ss << " - " << extractBrief(cls->getDocumentation());
}
ss << "\n";
}
ss << "\n";
}
if (!functions.empty()) {
ss << "## Functions\n\n";
for (const auto* func : functions) {
ss << "- [" << func->getName() << "](" << func->getName() << ".md)";
if (!func->getDocumentation().empty()) {
ss << " - " << extractBrief(func->getDocumentation());
}
ss << "\n";
}
ss << "\n";
}
return ss.str();
}
void generateClassFiles(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
for (const auto& node : nodes) {
if (node->getType() == ASTNode::NodeType::CLASS) {
generateClassFile(static_cast<SimpleClassNode*>(node.get()));
} else if (node->getType() == ASTNode::NodeType::FUNCTION) {
generateFunctionFile(static_cast<SimpleFunctionNode*>(node.get()));
}
}
}
void generateClassFile(const SimpleClassNode* classNode) {
std::string filename = config.outputDirectory + "/" + classNode->getName() + ".md";
std::ofstream file(filename);
file << R"(---
generator: )" << config.generatorName << R"(---
# )" << classNode->getName() << R"(
**class )" << classNode->getName() << R"(**
)";
if (!classNode->getDocumentation().empty()) {
file << formatDocumentation(classNode->getDocumentation()) << "\n\n";
}
file << "## Source Location\n\n";
file << "File: `" << classNode->getLocation().filePath;
file << "`:" << classNode->getLocation().line << "\n\n";
file.close();
}
void generateFunctionFile(const SimpleFunctionNode* funcNode) {
std::string filename = config.outputDirectory + "/" + funcNode->getName() + ".md";
std::ofstream file(filename);
file << R"(---
generator: )" << config.generatorName << R"(---
# )" << funcNode->getName() << R"(
**function )" << funcNode->getName() << R"(**
)";
if (!funcNode->getDocumentation().empty()) {
file << formatDocumentation(funcNode->getDocumentation()) << "\n\n";
}
file << "## Signature\n\n";
file << "```cpp\n" << funcNode->getSignature() << "\n```\n\n";
file << "## Source Location\n\n";
file << "File: `" << funcNode->getLocation().filePath;
file << "`:" << funcNode->getLocation().line << "\n\n";
file.close();
}
void generateModuleStructure(const std::vector<std::unique_ptr<ASTNode>>& nodes) {
// For now, just put everything in the root directory
// Later we'll organize this into subdirectories based on namespaces
}
std::string extractBrief(const std::string& doc) {
// Extract first sentence as brief description
size_t pos = doc.find('.');
if (pos != std::string::npos && pos < 100) {
return doc.substr(0, pos + 1);
}
return doc.length() > 80 ? doc.substr(0, 77) + "..." : doc;
}
std::string formatDocumentation(const std::string& doc) {
std::string formatted = doc;
// Replace @brief and similar tags
formatted = std::regex_replace(formatted, std::regex(R"(@brief\s+)"), "");
// Format paragraphs
formatted = std::regex_replace(formatted, std::regex(R"(\n\s*\n)"), "\n\n");
return formatted;
}
};
main.cpp)#include "parser_interface.h"
#include <iostream>
#include <filesystem>
#include <vector>
// Forward declarations
std::unique_ptr<ILanguageParser> createCppParser();
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cerr << "Usage: docs-parser <input-path> [output-path]" << std::endl;
return 1;
}
std::string inputPath = argv[1];
std::string outputPath = argc > 2 ? argv[2] : "docs";
// Configuration
DocumentationConfig config;
config.outputDirectory = outputPath;
config.indexTitle = "Source Code Documentation";
config.generatorName = "docs-parser";
// Create parser
auto parser = createCppParser();
// Find source files
std::vector<std::string> sourceFiles;
for (const auto& entry : std::filesystem::recursive_directory_iterator(inputPath)) {
if (entry.is_regular_file() && parser->canParse(entry.path().string())) {
sourceFiles.push_back(entry.path().string());
}
}
std::cout << "Found " << sourceFiles.size() << " source files" << std::endl;
// Parse all files
std::vector<std::unique_ptr<ASTNode>> allNodes;
for (const auto& file : sourceFiles) {
std::cout << "Parsing: " << file << std::endl;
auto nodes = parser->parseFile(file);
for (auto& node : nodes) {
allNodes.push_back(std::move(node));
}
}
std::cout << "Found " << allNodes.size() << " code elements" << std::endl;
// Generate documentation
DocumentationGenerator generator(config);
generator.generate(allNodes);
std::cout << "Documentation generated in: " << outputPath << std::endl;
return 0;
}
// Factory function
std::unique_ptr<ILanguageParser> createCppParser() {
return std::make_unique<CppParser>();
}
# Build the project
mkdir build && cd build
cmake ..
make
# Run on example C++ code
./docs_parser /path/to/cpp/source ./output/docs
# Test on the TUI project
./docs_parser /data/tui/src ./tui-docs
Create test_example.cpp:
#include <iostream>
/**
* @brief A simple calculator class
* This class provides basic arithmetic operations.
*/
class Calculator {
public:
/// @brief Add two numbers
int add(int a, int b);
/**
* @brief Multiply two numbers
* @param a First number
* @param b Second number
* @return Product of a and b
*/
int multiply(int a, int b);
};
/**
* @brief Global utility function
* This function demonstrates global documentation.
*/
void utilityFunction() {
std::cout << "Utility function called" << std::endl;
}
Run the parser:
./docs_parser . ./test-docs
Expected output structure:
test-docs/
├── index.md
├── Calculator.md
└── utilityFunction.md
This minimal implementation provides a working foundation that can be extended with more sophisticated parsing, better documentation extraction, and enhanced formatting to match the exact style of the /data/tui/docs structure.