parser-implementation-plan.md 35 KB

Dynamic Source Code Documentation Parser - TypeScript Implementation Plan

Overview

Create a modular source code parser in TypeScript that extracts documentation from comments and generates structured markdown files similar to the /data/tui/docs structure. The system will be language-agnostic with pluggable parsers, starting with C++, and will integrate into the existing docs-rag project as a new CLI command.

Architecture Design

1. Integration with Current Project

The parser will be integrated into the existing docs-rag TypeScript project structure:

src/
├── cli/
│   ├── index.ts                    # Existing CLI (add parser command)
│   └── parser-commands.ts          # New parser-specific commands
├── lib/
│   └── parser/                     # New parser library
│       ├── core/
│       │   ├── interfaces.ts       # Abstract parser interface
│       │   ├── documentation-generator.ts  # Markdown output generator
│       │   ├── comment-parser.ts   # Generic comment extraction
│       │   └── config.ts           # Configuration management
│       ├── parsers/
│       │   ├── base-parser.ts      # Base parser class
│       │   ├── cpp-parser.ts       # C++ implementation
│       │   ├── [future] python-parser.ts
│       │   └── [future] java-parser.ts
│       ├── ast/
│       │   ├── nodes.ts            # AST node definitions
│       │   └── visitor.ts          # Visitor pattern for traversal
│       └── utils/
│           ├── file-utils.ts       # File system operations
│           ├── string-utils.ts      # Text processing utilities
│           └── markdown-utils.ts    # Markdown generation helpers
├── services/
│   └── documentService.ts          # Existing service (extend for parser output)
├── config/
│   └── parser-config.ts            # Parser-specific configuration
└── types/
    └── parser-types.ts             # TypeScript type definitions

2. TypeScript Interface Design

Parser Interface (src/lib/parser/core/interfaces.ts)

// Core interfaces for language parsers
export interface SourceLocation {
  filePath: string;
  line: number;
  column: number;
  endLine?: number;
  endColumn?: number;
}

export interface DocumentationComment {
  type: 'doxline' | 'doxyblock' | 'javadoc' | 'unknown';
  rawContent: string;
  brief: string;
  detailed: string;
  tags: DocTag[];
  location: SourceLocation;
}

export interface DocTag {
  name: string;
  value: string;
}

export interface ASTNode {
  type: 'namespace' | 'class' | 'struct' | 'function' | 'method' | 
        'variable' | 'enum' | 'enum_value' | 'template' | 'module';
  name: string;
  documentation: DocumentationComment;
  location: SourceLocation;
  children: ASTNode[];
  [key: string]: any; // Allow type-specific properties
}

export interface ILanguageParser {
  getLanguage(): string;
  getFileExtensions(): string[];
  canParse(filePath: string): boolean;
  parseFile(filePath: string): Promise<ASTNode[]>;
}

export interface DocumentationConfig {
  outputDirectory: string;
  indexTitle: string;
  generatorName: string;
  generateIndex: boolean;
  generateModuleIndexes: boolean;
  includePrivate: boolean;
  includeSourceLinks: boolean;
  sourceRootPath?: string;
  theme: 'material' | 'github' | 'default';
}

export interface ParserConfig {
  languages: string[];
  includePatterns: string[];
  excludePatterns: string[];
  outputPath: string;
  watchMode: boolean;
  incremental: boolean;
}

Documentation Generator (src/lib/parser/core/documentation-generator.ts)

import { ASTNode, DocumentationConfig, SourceLocation } from './interfaces';
import { FileUtils } from '../utils/file-utils';
import { MarkdownUtils } from '../utils/markdown-utils';

export class DocumentationGenerator {
  private config: DocumentationConfig;
  private moduleStack: string[] = [];
  private modules: ModuleInfo[] = [];

  constructor(config: DocumentationConfig) {
    this.config = config;
  }

  async generate(nodes: ASTNode[]): Promise<void> {
    // Create output directory structure
    await this.createDirectoryStructure();
    
    // Generate main index
    if (this.config.generateIndex) {
      await this.generateIndex(nodes);
    }
    
    // Generate module-specific documentation
    await this.generateModuleDocumentation(nodes);
    
    // Generate individual node documentation
    await this.generateNodeFiles(nodes);
  }

  private async createDirectoryStructure(): Promise<void> {
    await FileUtils.ensureDirectory(this.config.outputDirectory);
  }

  private async generateIndex(nodes: ASTNode[]): Promise<void> {
    const content = await this.generateIndexContent(nodes);
    const indexPath = `${this.config.outputDirectory}/index.md`;
    await FileUtils.writeFile(indexPath, content);
  }

  private async generateIndexContent(nodes: ASTNode[]): Promise<string> {
    const modules = this.organizeByModule(nodes);
    let content = `---
generator: ${this.config.generatorName}
---

# ${this.config.indexTitle}

`;

    for (const [moduleName, moduleNodes] of modules.entries()) {
      content += this.generateModuleSection(moduleName, moduleNodes);
    }

    return content;
  }

  private organizeByModule(nodes: ASTNode[]): Map<string, ASTNode[]> {
    const modules = new Map<string, ASTNode[]>();
    
    for (const node of nodes) {
      const moduleName = this.extractModuleName(node);
      if (!modules.has(moduleName)) {
        modules.set(moduleName, []);
      }
      modules.get(moduleName)!.push(node);
    }
    
    return modules;
  }

  private extractModuleName(node: ASTNode): string {
    // Extract module name from namespace or file path
    if (node.type === 'namespace') {
      return node.name;
    }
    
    // Extract from file path
    const pathParts = node.location.filePath.split('/');
    const fileName = pathParts[pathParts.length - 1];
    const moduleName = fileName.replace(/\.(cpp|h|hpp|cxx|cc|c)$/, '');
    
    return moduleName;
  }

  private generateModuleSection(moduleName: string, nodes: ASTNode[]): string {
    let section = `:material-package: [${moduleName}](${moduleName}/index.md)
:   ${this.generateModuleDescription(nodes)}

`;

    const classes = nodes.filter(n => n.type === 'class' || n.type === 'struct');
    const functions = nodes.filter(n => n.type === 'function' || n.type === 'method');

    if (classes.length > 0) {
      section += '## Types\n\n| Name | Description |\n| ---- | ----------- |\n';
      for (const cls of classes) {
        const desc = cls.documentation.brief || '';
        section += `| [${cls.name}](${moduleName}/${cls.name}.md) | ${desc} |\n`;
      }
      section += '\n';
    }

    return section;
  }

  private async generateModuleDocumentation(nodes: ASTNode[]): Promise<void> {
    const modules = this.organizeByModule(nodes);
    
    for (const [moduleName, moduleNodes] of modules.entries()) {
      await this.generateModuleFile(moduleName, moduleNodes);
    }
  }

  private async generateModuleFile(moduleName: string, nodes: ASTNode[]): Promise<void> {
    const modulePath = `${this.config.outputDirectory}/${moduleName}`;
    await FileUtils.ensureDirectory(modulePath);
    
    let content = `---
generator: ${this.config.generatorName}
---

# ${moduleName}

`;

    const classes = nodes.filter(n => n.type === 'class' || n.type === 'struct');
    const functions = nodes.filter(n => n.type === 'function' || n.type === 'method');

    if (classes.length > 0) {
      content += '## Types\n\n| Name | Description |\n| ---- | ----------- |\n';
      for (const cls of classes) {
        const desc = cls.documentation.brief || '';
        content += `| [${cls.name}](${cls.name}.md) | ${desc} |\n`;
      }
      content += '\n';
    }

    const indexPath = `${this.config.outputDirectory}/${moduleName}/index.md`;
    await FileUtils.writeFile(indexPath, content);
  }

  private async generateNodeFiles(nodes: ASTNode[]): Promise<void> {
    for (const node of nodes) {
      if (node.type === 'class' || node.type === 'struct') {
        await this.generateClassFile(node);
      } else if (node.type === 'function' || node.type === 'method') {
        await this.generateFunctionFile(node);
      }
    }
  }

  private async generateClassFile(node: ASTNode): Promise<void> {
    const moduleName = this.extractModuleName(node);
    const filePath = `${this.config.outputDirectory}/${moduleName}/${node.name}.md`;
    
    let content = `---
generator: ${this.config.generatorName}
---

# ${node.name}

**${node.type} ${node.name}**

${this.formatDocumentation(node.documentation)}

`;

    // Add functions
    const methods = node.children.filter(n => n.type === 'function' || n.type === 'method');
    if (methods.length > 0) {
      content += '## Functions\n\n';
      content += this.generateFunctionTable(methods);
      content += '\n## Function Details\n\n';
      content += this.generateFunctionDetails(methods);
    }

    await FileUtils.writeFile(filePath, content);
  }

  private async generateFunctionFile(node: ASTNode): Promise<void> {
    const moduleName = this.extractModuleName(node);
    const filePath = `${this.config.outputDirectory}/${moduleName}/${node.name}.md`;
    
    const content = `---
generator: ${this.config.generatorName}
---

# ${node.name}

**function ${node.name}**

${this.formatDocumentation(node.documentation)}

## Signature

\`\`\`${this.getLanguageForSyntax()}
${node.signature || node.name}
\`\`\`

## Source Location

File: \`${node.location.filePath}\`:${node.location.line}

`;

    await FileUtils.writeFile(filePath, content);
  }

  private generateFunctionTable(functions: ASTNode[]): string {
    let table = '| Name | Description |\n| ---- | ----------- |\n';
    
    for (const func of functions) {
      const desc = func.documentation.brief || '';
      const anchor = this.generateAnchor(func.name);
      table += `| [${func.name}](#${anchor}) | ${desc} |\n`;
    }
    
    return table;
  }

  private generateFunctionDetails(functions: ASTNode[]): string {
    let details = '';
    
    for (const func of functions) {
      const anchor = this.generateAnchor(func.name);
      details += `### ${func.name}<a name="${anchor}"></a>\n`;
      details += `!!! function "${this.formatFunctionSignature(func)}"\n\n`;
      details += `    ${this.formatDocumentation(func.documentation).replace(/\n/g, '\n    ')}\n\n`;
    }
    
    return details;
  }

  private formatDocumentation(doc: DocumentationComment): string {
    if (!doc.brief && !doc.detailed) return '';
    
    let formatted = '';
    if (doc.brief) formatted += `@brief ${doc.brief}\n\n`;
    if (doc.detailed) formatted += `${doc.detailed}\n\n`;
    
    return formatted;
  }

  private generateAnchor(name: string): string {
    return name.toLowerCase().replace(/[^a-z0-9]/g, '-');
  }

  private getLanguageForSyntax(): string {
    return 'cpp'; // Will be dynamic based on parser language
  }

  private formatFunctionSignature(func: ASTNode): string {
    return func.signature || `${func.name}()`;
  }

  private generateModuleDescription(nodes: ASTNode[]): string {
    const descriptions = nodes
      .map(n => n.documentation.brief)
      .filter(Boolean)
      .slice(0, 2);
    
    return descriptions.join(' ') || 'Module containing various components and utilities.';
  }
}

interface ModuleInfo {
  name: string;
  path: string;
  classes: string[];
  functions: string[];
  submodules: string[];
}

Implementation Phases

Phase 1: Foundation (Week 1)

  1. Project Integration

    • Update package.json with new dependencies
    • Create parser directory structure under src/lib/parser/
    • Add TypeScript types and interfaces
    • Configure build system for new modules
  2. Core Interfaces & Types

    • Define parser interface (src/lib/parser/core/interfaces.ts)
    • Create AST node definitions (src/lib/parser/ast/nodes.ts)
    • Implement base parser class (src/lib/parser/parsers/base-parser.ts)
    • Add configuration management (src/lib/parser/core/config.ts)
  3. Utility Components

    • File system utilities (src/lib/parser/utils/file-utils.ts)
    • String processing helpers (src/lib/parser/utils/string-utils.ts)
    • Markdown formatting utilities (src/lib/parser/utils/markdown-utils.ts)

Phase 2: C++ Parser (Week 2)

  1. C++ Comment Parser

    • Detect and extract documentation comments (/** */, ///, //!)
    • Parse common documentation tags (@brief, @param, @return, etc.)
    • Handle multi-line comments with proper formatting
  2. C++ AST Builder

    • Parse class/struct definitions using regex patterns
    • Extract function/method signatures
    • Identify inheritance relationships
    • Handle templates and namespaces
  3. Integration Testing

    • Test with sample C++ files
    • Verify AST structure correctness
    • Validate comment extraction

Phase 3: CLI Integration (Week 3)

  1. CLI Command Integration

    • Add parser commands to existing CLI (src/cli/parser-commands.ts)
    • Integrate with existing commander.js structure
    • Add configuration options and validation
  2. New CLI Commands

    docs-rag parse --language cpp --input ./src --output ./docs
    docs-rag parse --config parser-config.json
    docs-rag parse --watch --incremental
    docs-rag parse --list-languages
    
  3. Configuration System

    • Parser-specific configuration (src/config/parser-config.ts)
    • Integration with existing configuration pattern
    • Support for project-specific parser settings

Phase 4: Documentation Generation (Week 4)

  1. Markdown Output Generator

    • Replicate the structure from /data/tui/docs
    • Generate index files with package references
    • Create detailed class documentation pages
    • Implement function detail sections
  2. Output Structure

    docs/
    ├── index.md                    # Main index
    ├── module1/
    │   ├── index.md               # Module index
    │   ├── Class1.md
    │   ├── Class2.md
    │   └── SubModule/
    │       └── index.md
    └── module2/
       └── ...
    
  3. Integration with DocumentService

    • Extend DocumentService to handle parser-generated markdown
    • Add parser output to existing RAG functionality
    • Enable search across generated documentation

Phase 5: Advanced Features (Week 5-6)

  1. Enhanced Comment Parsing

    • Support for custom comment tags
    • Example code block extraction
    • TODO/FIXME note extraction
    • Version change detection
  2. Quality Improvements

    • Cross-reference generation
    • Inheritance diagrams (ASCII)
    • Namespace/module organization
    • Search-friendly indexing
  3. Performance Optimization

    • Parallel file processing using worker threads
    • Incremental parsing (only changed files)
    • Memory-efficient AST building

Phase 6: Extensibility Framework (Week 7-8)

  1. Plugin Architecture

    • Dynamic parser loading
    • Configuration-driven language selection
    • Custom output formatters
  2. Future Language Support

    • Create interfaces for Python parser
    • Design Java parser structure
    • Document extension guidelines
  3. Integration Enhancements

    • Integration with existing MCP server
    • Add parser commands to MCP interface
    • Enable programmatic parser usage via API

Detailed Implementation Plan

Package.json Updates

{
  "dependencies": {
    "@types/node": "^24.10.1",
    "commander": "^14.0.2",
    "fs-extra": "^11.3.2",
    "glob": "^13.0.0",
    "typescript": "^5.9.3",
    "zod": "^4.1.12",
    "chokidar": "^4.0.1",           // For watch mode
    "typescript-eslint-parser": "^6.0.0", // For future TS parsing
    "@babel/parser": "^7.23.0",      // For future JS parsing
    "acorn": "^8.11.0"               // For future JS parsing
  },
  "scripts": {
    "build": "tsc",
    "dev": "tsx src/cli/index.ts",
    "parse": "node dist/cli/index.js parse",
    "parse:dev": "tsx src/cli/index.ts parse"
  }
}

C++ Parser Implementation (src/lib/parser/parsers/cpp-parser.ts)

import { BaseParser } from './base-parser';
import { ILanguageParser, ASTNode, DocumentationComment } from '../core/interfaces';
import { FileUtils } from '../utils/file-utils';

export class CppParser extends BaseParser implements ILanguageParser {
  getLanguage(): string {
    return 'cpp';
  }

  getFileExtensions(): string[] {
    return ['.cpp', '.h', '.hpp', '.cxx', '.cc', '.c'];
  }

  canParse(filePath: string): boolean {
    const ext = filePath.substring(filePath.lastIndexOf('.'));
    return this.getFileExtensions().includes(ext);
  }

  async parseFile(filePath: string): Promise<ASTNode[]> {
    const content = await FileUtils.readFile(filePath);
    const lines = content.split('\n');
    
    // Extract comments first
    const comments = this.extractComments(lines);
    
    // Parse code elements
    const nodes = await this.parseCodeElements(lines, comments, filePath);
    
    return nodes;
  }

  private extractComments(lines: string[]): Map<number, DocumentationComment> {
    const comments = new Map<number, DocumentationComment>();
    
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      const trimmed = line.trim();
      
      // Detect different comment types
      if (trimmed.startsWith('/**')) {
        const comment = this.parseBlockComment(lines, i);
        if (comment) {
          comments.set(i + 1, comment);
          i += this.getCommentHeight(comment.rawContent) - 1;
        }
      } else if (trimmed.startsWith('///') || trimmed.startsWith('//!')) {
        const comment = this.parseLineComment(lines, i);
        if (comment) {
          comments.set(i + 1, comment);
        }
      }
    }
    
    return comments;
  }

  private parseBlockComment(lines: string[], startIndex: number): DocumentationComment | null {
    let content = '';
    let i = startIndex;
    
    // Find the end of the block comment
    while (i < lines.length && !lines[i].includes('*/')) {
      content += lines[i] + '\n';
      i++;
    }
    if (i < lines.length) {
      content += lines[i] + '\n';
    }
    
    return this.parseCommentContent(content, 'doxyblock', startIndex + 1);
  }

  private parseLineComment(lines: string[], startIndex: number): DocumentationComment | null {
    const line = lines[startIndex];
    return this.parseCommentContent(line, 'doxline', startIndex + 1);
  }

  private parseCommentContent(rawContent: string, type: 'doxline' | 'doxyblock', line: number): DocumentationComment {
    // Clean up the comment markers
    const cleaned = rawContent
      .replace(/\/\*\*|\/\*|\*\/|\/\/\/|\/\!/g, '')
      .split('\n')
      .map(line => line.trim().replace(/^\*\s?/, ''))
      .join('\n')
      .trim();

    // Extract @tags
    const tagRegex = /@(\w+)(?:\s+(.+?))?(?=\s+@|$)/gs;
    const tags: { name: string; value: string }[] = [];
    let tagMatch;
    
    while ((tagMatch = tagRegex.exec(cleaned)) !== null) {
      tags.push({
        name: tagMatch[1],
        value: tagMatch[2]?.trim() || ''
      });
    }

    // Extract brief (first sentence or first line before @tags)
    const beforeTags = cleaned.split('@')[0].trim();
    const brief = this.extractBrief(beforeTags);

    return {
      type,
      rawContent,
      brief,
      detailed: beforeTags.substring(brief.length).trim(),
      tags,
      location: { filePath: '', line, column: 0 }
    };
  }

  private extractBrief(text: string): string {
    // Try to extract first sentence
    const sentences = text.split(/[.!?]/);
    if (sentences.length > 1 && sentences[0].length < 100) {
      return sentences[0].trim() + '.';
    }
    
    // Fall back to first line
    const lines = text.split('\n');
    return lines[0].trim() || text.substring(0, 80).trim();
  }

  private getCommentHeight(content: string): number {
    return content.split('\n').length;
  }

  private async parseCodeElements(
    lines: string[], 
    comments: Map<number, DocumentationComment>, 
    filePath: string
  ): Promise<ASTNode[]> {
    const nodes: ASTNode[] = [];
    
    for (let i = 0; i < lines.length; i++) {
      const line = lines[i];
      
      // Parse classes
      const classMatch = line.match(/^\s*(class|struct)\s+(\w+)/);
      if (classMatch) {
        const node = await this.parseClass(lines, i, comments, filePath);
        if (node) {
          nodes.push(node);
        }
      }
      
      // Parse functions
      const functionMatch = line.match(/^\s*(?:\w+\s+)*(\w+)\s*\([^)]*\)\s*(?:\{|;)/);
      if (functionMatch && !line.includes('class')) {
        const node = await this.parseFunction(lines, i, comments, filePath);
        if (node) {
          nodes.push(node);
        }
      }
    }
    
    return nodes;
  }

  private async parseClass(
    lines: string[], 
    startIndex: number,
    comments: Map<number, DocumentationComment>,
    filePath: string
  ): Promise<ASTNode | null> {
    const line = lines[startIndex];
    const match = line.match(/^\s*(class|struct)\s+(\w+)/);
    
    if (!match) return null;
    
    const [, type, name] = match;
    
    // Find preceding comment
    let comment = comments.get(startIndex);
    if (!comment) {
      comment = comments.get(startIndex - 1);
    }
    
    return {
      type: type as 'class' | 'struct',
      name,
      documentation: comment || this.emptyDocumentation(),
      location: { filePath, line: startIndex + 1, column: 0 },
      children: [],
      isStruct: type === 'struct'
    };
  }

  private async parseFunction(
    lines: string[], 
    startIndex: number,
    comments: Map<number, DocumentationComment>,
    filePath: string
  ): Promise<ASTNode | null> {
    const line = lines[startIndex];
    const match = line.match(/^\s*(?:\w+\s+)*(\w+)\s*\([^)]*\)\s*(?:\{|;)/);
    
    if (!match) return null;
    
    const [, name] = match;
    
    // Find preceding comment
    let comment = comments.get(startIndex);
    if (!comment) {
      comment = comments.get(startIndex - 1);
    }
    
    return {
      type: 'function',
      name,
      documentation: comment || this.emptyDocumentation(),
      location: { filePath, line: startIndex + 1, column: 0 },
      children: [],
      signature: line.trim()
    };
  }

  private emptyDocumentation(): DocumentationComment {
    return {
      type: 'unknown',
      rawContent: '',
      brief: '',
      detailed: '',
      tags: [],
      location: { filePath: '', line: 0, column: 0 }
    };
  }
}

CLI Integration (src/cli/parser-commands.ts)

#!/usr/bin/env node

import { Command } from 'commander';
import { ParseService } from '../services/parseService';
import { existsSync } from 'fs';
import { resolve } from 'path';

const parseService = new ParseService();

export const parserCommands = (program: Command) => {
  program
    .command('parse')
    .description('Generate documentation from source code comments')
    .requiredOption('-i, --input <path>', 'Input directory containing source files')
    .requiredOption('-o, --output <path>', 'Output directory for generated documentation')
    .option('-l, --languages <languages>', 'Comma-separated list of languages to parse', 'cpp')
    .option('-c, --config <config>', 'Configuration file path')
    .option('-w, --watch', 'Watch for file changes and regenerate', false)
    .option('--incremental', 'Only process changed files', false)
    .option('--include-private', 'Include private members', false)
    .option('--dry-run', 'Show what would be parsed without generating files', false)
    .action(async (options) => {
      if (!existsSync(options.input)) {
        console.error(`Error: Input directory '${options.input}' does not exist`);
        process.exit(1);
      }

      try {
        const result = await parseService.generateDocumentation({
          inputPath: resolve(options.input),
          outputPath: resolve(options.output),
          languages: options.languages.split(',').map((l: string) => l.trim()),
          configPath: options.config ? resolve(options.config) : undefined,
          watch: options.watch,
          incremental: options.incremental,
          includePrivate: options.includePrivate,
          dryRun: options.dryRun
        });

        if (options.dryRun) {
          console.log('Dry run results:');
          console.log(`  Files to process: ${result.filesToProcess.length}`);
          console.log(`  Estimated output files: ${result.estimatedFiles}`);
          result.filesToProcess.forEach(file => console.log(`    - ${file}`));
        } else {
          console.log(`Documentation generated successfully!`);
          console.log(`  Processed ${result.processedFiles} files`);
          console.log(`  Generated ${result.generatedFiles} documentation files`);
          console.log(`  Output directory: ${options.output}`);
          
          if (result.errors.length > 0) {
            console.log(`  Warnings/Errors: ${result.errors.length}`);
            result.errors.forEach(error => console.log(`    - ${error}`));
          }
        }
      } catch (error) {
        console.error('Error:', error);
        process.exit(1);
      }
    });

  program
    .command('parse-list-languages')
    .description('List all supported parser languages')
    .action(() => {
      const languages = parseService.getSupportedLanguages();
      console.log('Supported languages:');
      languages.forEach(lang => {
        console.log(`  ${lang.name}: ${lang.description}`);
        console.log(`    Extensions: ${lang.extensions.join(', ')}`);
        console.log();
      });
    });

  program
    .command('parse-validate')
    .description('Validate configuration and source files')
    .requiredOption('-i, --input <path>', 'Input directory to validate')
    .option('-c, --config <config>', 'Configuration file to validate')
    .action(async (options) => {
      if (!existsSync(options.input)) {
        console.error(`Error: Input directory '${options.input}' does not exist`);
        process.exit(1);
      }

      try {
        const validation = await parseService.validateConfiguration({
          inputPath: resolve(options.input),
          configPath: options.config ? resolve(options.config) : undefined
        });

        console.log('Validation results:');
        console.log(`  Input directory: ${validation.inputValid ? '✓' : '✗'}`);
        console.log(`  Configuration: ${validation.configValid ? '✓' : '✗'}`);
        console.log(`  Supported files: ${validation.supportedFiles.length}`);
        
        if (validation.unsupportedFiles.length > 0) {
          console.log(`  Unsupported files: ${validation.unsupportedFiles.length}`);
          validation.unsupportedFiles.forEach(file => console.log(`    - ${file}`));
        }
        
        if (validation.warnings.length > 0) {
          console.log(`  Warnings: ${validation.warnings.length}`);
          validation.warnings.forEach(warning => console.log(`    - ${warning}`));
        }
      } catch (error) {
        console.error('Error:', error);
        process.exit(1);
      }
    });
};

Parse Service (src/services/parseService.ts)

import { 
  ILanguageParser, 
  DocumentationConfig, 
  ParserConfig, 
  ASTNode 
} from '../lib/parser/core/interfaces';
import { CppParser } from '../lib/parser/parsers/cpp-parser';
import { DocumentationGenerator } from '../lib/parser/core/documentation-generator';
import { FileUtils } from '../lib/parser/utils/file-utils';
import { ConfigLoader } from '../lib/parser/core/config';
import { chokidar } from 'chokidar';
import { resolve, basename, extname } from 'path';

interface ParseOptions {
  inputPath: string;
  outputPath: string;
  languages: string[];
  configPath?: string;
  watch?: boolean;
  incremental?: boolean;
  includePrivate?: boolean;
  dryRun?: boolean;
}

interface ParseResult {
  processedFiles: number;
  generatedFiles: number;
  filesToProcess: string[];
  errors: string[];
  estimatedFiles: number;
}

interface ValidationResult {
  inputValid: boolean;
  configValid: boolean;
  supportedFiles: string[];
  unsupportedFiles: string[];
  warnings: string[];
}

interface LanguageInfo {
  name: string;
  description: string;
  extensions: string[];
}

export class ParseService {
  private parsers: Map<string, ILanguageParser> = new Map();
  private configLoader: ConfigLoader;

  constructor() {
    this.configLoader = new ConfigLoader();
    this.registerDefaultParsers();
  }

  private registerDefaultParsers(): void {
    this.parsers.set('cpp', new CppParser());
    // Future: this.parsers.set('python', new PythonParser());
    // Future: this.parsers.set('java', new JavaParser());
  }

  getSupportedLanguages(): LanguageInfo[] {
    return Array.from(this.parsers.entries()).map(([key, parser]) => ({
      name: key,
      description: `${key.toUpperCase()} source code parser`,
      extensions: parser.getFileExtensions()
    }));
  }

  async generateDocumentation(options: ParseOptions): Promise<ParseResult> {
    // Load configuration
    const config = await this.loadConfiguration(options);
    
    // Find source files
    const sourceFiles = await this.findSourceFiles(options.inputPath, options.languages);
    
    if (options.dryRun) {
      return {
        processedFiles: 0,
        generatedFiles: 0,
        filesToProcess: sourceFiles,
        errors: [],
        estimatedFiles: this.estimateOutputFiles(sourceFiles)
      };
    }

    // Parse files
    const nodes: ASTNode[] = [];
    const errors: string[] = [];

    for (const filePath of sourceFiles) {
      try {
        const fileNodes = await this.parseFile(filePath);
        nodes.push(...fileNodes);
      } catch (error) {
        errors.push(`Failed to parse ${filePath}: ${error}`);
      }
    }

    // Generate documentation
    const generator = new DocumentationGenerator(config);
    await generator.generate(nodes);

    const generatedFiles = await this.countGeneratedFiles(config.outputDirectory);

    return {
      processedFiles: sourceFiles.length,
      generatedFiles,
      filesToProcess: sourceFiles,
      errors
    };
  }

  async validateConfiguration(options: {
    inputPath: string;
    configPath?: string;
  }): Promise<ValidationResult> {
    const inputValid = await FileUtils.exists(options.inputPath);
    
    let configValid = true;
    let warnings: string[] = [];

    try {
      if (options.configPath) {
        await this.configLoader.loadConfig(options.configPath);
      }
    } catch (error) {
      configValid = false;
      warnings.push(`Configuration error: ${error}`);
    }

    const allFiles = await FileUtils.getAllFiles(options.inputPath);
    const supportedFiles = allFiles.filter(file => {
      const ext = extname(file);
      return Array.from(this.parsers.values()).some(parser => 
        parser.getFileExtensions().includes(ext)
      );
    });

    const unsupportedFiles = allFiles.filter(file => !supportedFiles.includes(file));

    return {
      inputValid,
      configValid,
      supportedFiles,
      unsupportedFiles,
      warnings
    };
  }

  private async loadConfiguration(options: ParseOptions): Promise<DocumentationConfig> {
    let config: DocumentationConfig;

    if (options.configPath) {
      config = await this.configLoader.loadConfig(options.configPath);
    } else {
      config = this.createDefaultConfig(options);
    }

    return config;
  }

  private createDefaultConfig(options: ParseOptions): DocumentationConfig {
    return {
      outputDirectory: options.outputPath,
      indexTitle: 'Source Code Documentation',
      generatorName: 'docs-rag-parser',
      generateIndex: true,
      generateModuleIndexes: true,
      includePrivate: options.includePrivate || false,
      includeSourceLinks: true,
      sourceRootPath: options.inputPath,
      theme: 'material'
    };
  }

  private async findSourceFiles(inputPath: string, languages: string[]): Promise<string[]> {
    const allFiles = await FileUtils.getAllFiles(inputPath);
    
    return allFiles.filter(file => {
      const parser = this.getParserForFile(file);
      return parser && languages.includes(parser.getLanguage());
    });
  }

  private getParserForFile(filePath: string): ILanguageParser | null {
    const ext = filePath.substring(filePath.lastIndexOf('.'));
    
    for (const parser of this.parsers.values()) {
      if (parser.getFileExtensions().includes(ext)) {
        return parser;
      }
    }
    
    return null;
  }

  private async parseFile(filePath: string): Promise<ASTNode[]> {
    const parser = this.getParserForFile(filePath);
    if (!parser) {
      throw new Error(`No parser found for file: ${filePath}`);
    }

    return await parser.parseFile(filePath);
  }

  private estimateOutputFiles(sourceFiles: string[]): number {
    // Rough estimation: one module index per unique file name + one per class/function
    const uniqueNames = new Set(sourceFiles.map(file => 
      basename(file).replace(/\.(cpp|h|hpp|cxx|cc|c)$/, '')
    ));
    return uniqueNames.size + Math.floor(sourceFiles.length * 2); // Estimated 2 items per file
  }

  private async countGeneratedFiles(outputDirectory: string): Promise<number> {
    try {
      const files = await FileUtils.getAllFiles(outputDirectory);
      return files.filter(file => file.endsWith('.md')).length;
    } catch {
      return 0;
    }
  }
}

Update Main CLI (src/cli/index.ts)

#!/usr/bin/env node

import { Command } from 'commander';
import { DocumentService } from '../services/documentService';
import { existsSync } from 'fs';
import { parserCommands } from './parser-commands';

const program = new Command();
const documentService = new DocumentService();

program
  .name('docs-rag')
  .description('CLI tool for managing markdown documents in Qdrant and parsing source code')
  .version('1.0.0');

// Existing commands...
// ... (keep all existing commands)

// Add parser commands
parserCommands(program);

program.parse();

Success Criteria

  1. Functional Requirements

    • Parse C++ files with 95% comment extraction accuracy
    • Generate documentation matching /data/tui/docs format
    • Process projects with 1000+ files efficiently
    • Support incremental rebuilds
  2. Integration Requirements

    • Seamlessly integrate into existing CLI structure
    • Follow existing TypeScript patterns and conventions
    • Use existing dependencies where possible
    • Maintain compatibility with existing commands
  3. Quality Requirements

    • Produce syntactically correct Markdown
    • Generate cross-references between modules
    • Handle complex C++ constructs (templates, namespaces)
    • Maintain source code privacy (no code leakage in docs)

Future Enhancements

Short Term (Next 3 months)

  • Python language parser
  • Integration with DocumentService for RAG
  • HTML documentation output
  • Enhanced watch mode with better performance

Long Term (6-12 months)

  • Multi-language project support
  • Interactive documentation web UI
  • AI-powered comment generation suggestions
  • Integration with CI/CD pipelines
  • Documentation quality scoring

This updated plan fully integrates the source code parser into the existing TypeScript project while following the established patterns and conventions.