fszontagh
/
stable-diffusion.cpp-rest


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
							#ifndef GENERATION_QUEUE_H
#define GENERATION_QUEUE_H

#include <chrono>
#include <functional>
#include <future>
#include <map>
#include <memory>
#include <string>
#include <vector>

/**
 * @brief Job type enumeration
 */
enum class JobType {
    GENERATION,  ///< Image generation job
    HASHING,     ///< Model hashing job
    CONVERSION   ///< Model conversion/quantization job
};

/**
 * @brief Generation status enumeration
 */
enum class GenerationStatus {
    QUEUED,         ///< Request is queued and waiting to be processed
    MODEL_LOADING,  ///< Model is being loaded
    PROCESSING,     ///< Request is currently being processed
    COMPLETED,      ///< Request completed successfully
    FAILED          ///< Request failed during processing
};

/**
 * @brief Sampling method enumeration (matching stable-diffusion.cpp)
 */
enum class SamplingMethod {
    EULER,
    EULER_A,
    HEUN,
    DPM2,
    DPMPP2S_A,
    DPMPP2M,
    DPMPP2MV2,
    IPNDM,
    IPNDM_V,
    LCM,
    DDIM_TRAILING,
    TCD,
    DEFAULT  ///< Use model default
};

/**
 * @brief Scheduler enumeration (matching stable-diffusion.cpp)
 */
enum class Scheduler {
    DISCRETE,
    KARRAS,
    EXPONENTIAL,
    AYS,
    GITS,
    SMOOTHSTEP,
    SGM_UNIFORM,
    SIMPLE,
    DEFAULT  ///< Use model default
};

/**
 * @brief Generation request structure with all stable-diffusion.cpp parameters
 */
struct GenerationRequest {
    // Basic parameters
    std::string id;              ///< Unique request ID
    std::string modelName;       ///< Name of the model to use
    std::string prompt;          ///< Text prompt for generation
    std::string negativePrompt;  ///< Negative prompt (optional)

    // Image parameters
    int width      = 512;  ///< Image width
    int height     = 512;  ///< Image height
    int batchCount = 1;    ///< Number of images to generate

    // Sampling parameters
    int steps                     = 20;                       ///< Number of diffusion steps
    float cfgScale                = 7.5f;                     ///< CFG scale
    SamplingMethod samplingMethod = SamplingMethod::DEFAULT;  ///< Sampling method
    Scheduler scheduler           = Scheduler::DEFAULT;       ///< Scheduler

    // Seed control
    std::string seed = "42";  ///< Seed for generation ("random" for random)

    // Model paths (for advanced usage)
    std::string clipLPath;           ///< Path to CLIP-L model
    std::string clipGPath;           ///< Path to CLIP-G model
    std::string clipVisionPath;      ///< Path to CLIP-Vision model
    std::string t5xxlPath;           ///< Path to T5-XXL model
    std::string qwen2vlPath;         ///< Path to Qwen2VL model
    std::string qwen2vlVisionPath;   ///< Path to Qwen2VL Vision model
    std::string diffusionModelPath;  ///< Path to standalone diffusion model
    std::string vaePath;             ///< Path to VAE model
    std::string taesdPath;           ///< Path to TAESD model
    std::string controlNetPath;      ///< Path to ControlNet model
    std::string embeddingDir;        ///< Path to embeddings directory
    std::string loraModelDir;        ///< Path to LoRA model directory

    // Advanced parameters
    int clipSkip                = -1;         ///< CLIP skip layers
    std::vector<int> skipLayers = {7, 8, 9};  ///< Layers to skip for SLG
    float strength              = 0.75f;      ///< Strength for img2img
    float controlStrength       = 0.9f;       ///< ControlNet strength

    // Performance parameters
    int nThreads             = -1;     ///< Number of threads (-1 for auto)
    bool offloadParamsToCpu  = false;  ///< Offload parameters to CPU
    bool clipOnCpu           = false;  ///< Keep CLIP on CPU
    bool vaeOnCpu            = false;  ///< Keep VAE on CPU
    bool diffusionFlashAttn  = false;  ///< Use flash attention
    bool diffusionConvDirect = false;  ///< Use direct convolution
    bool vaeConvDirect       = false;  ///< Use direct VAE convolution

    // Output parameters
    std::string outputPath;  ///< Output path for generated images

    // Image-to-image parameters
    std::string initImagePath;           ///< Path to init image for img2img (can be file path or base64)
    std::vector<uint8_t> initImageData;  ///< Init image data (decoded)
    int initImageWidth    = 0;           ///< Init image width
    int initImageHeight   = 0;           ///< Init image height
    int initImageChannels = 3;           ///< Init image channels

    // ControlNet parameters
    std::string controlImagePath;           ///< Path to control image for ControlNet
    std::vector<uint8_t> controlImageData;  ///< Control image data (decoded)
    int controlImageWidth    = 0;           ///< Control image width
    int controlImageHeight   = 0;           ///< Control image height
    int controlImageChannels = 3;           ///< Control image channels

    // Upscaler parameters
    std::string esrganPath;      ///< Path to ESRGAN model for upscaling
    uint32_t upscaleFactor = 4;  ///< Upscale factor (2 or 4)

    // Inpainting parameters
    std::string maskImagePath;           ///< Path to mask image for inpainting
    std::vector<uint8_t> maskImageData;  ///< Mask image data (decoded)
    int maskImageWidth    = 0;           ///< Mask image width
    int maskImageHeight   = 0;           ///< Mask image height
    int maskImageChannels = 1;           ///< Mask image channels (grayscale)

    // Request type
    enum class RequestType {
        TEXT2IMG,
        IMG2IMG,
        CONTROLNET,
        UPSCALER,
        INPAINTING
    } requestType = RequestType::TEXT2IMG;

    // Callback for completion
    std::function<void(const std::string&, const std::string&)> callback;  ///< Callback for completion
};

/**
 * @brief Generation result structure
 */
struct GenerationResult {
    std::string requestId;                ///< ID of the original request
    GenerationStatus status;              ///< Final status of the generation
    bool success;                         ///< Whether generation was successful
    std::vector<std::string> imagePaths;  ///< Paths to generated images (multiple for batch)
    std::string errorMessage;             ///< Error message if generation failed
    uint64_t generationTime;              ///< Time taken for generation in milliseconds
    int64_t actualSeed;                   ///< Actual seed used for generation
};

/**
 * @brief Hash request structure for model hashing jobs
 */
struct HashRequest {
    std::string id;                       ///< Unique request ID
    std::vector<std::string> modelNames;  ///< Model names to hash (empty = hash all unhashed)
    bool forceRehash = false;             ///< Force rehash even if hash exists
};

/**
 * @brief Hash result structure
 */
struct HashResult {
    std::string requestId;                           ///< ID of the original request
    GenerationStatus status;                         ///< Final status
    bool success;                                    ///< Whether hashing was successful
    std::map<std::string, std::string> modelHashes;  ///< Map of model names to their hashes
    std::string errorMessage;                        ///< Error message if hashing failed
    uint64_t hashingTime;                            ///< Time taken for hashing in milliseconds
    int modelsHashed;                                ///< Number of models successfully hashed
};

/**
 * @brief Conversion request structure for model quantization/conversion jobs
 */
struct ConversionRequest {
    std::string id;                ///< Unique request ID
    std::string modelName;         ///< Model name to convert
    std::string modelPath;         ///< Full path to model file
    std::string outputPath;        ///< Output path for converted model
    std::string quantizationType;  ///< Quantization type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)
};

/**
 * @brief Conversion result structure
 */
struct ConversionResult {
    std::string requestId;      ///< ID of the original request
    GenerationStatus status;    ///< Final status
    bool success;               ///< Whether conversion was successful
    std::string outputPath;     ///< Path to converted model file
    std::string errorMessage;   ///< Error message if conversion failed
    uint64_t conversionTime;    ///< Time taken for conversion in milliseconds
    std::string originalSize;   ///< Original model file size
    std::string convertedSize;  ///< Converted model file size
};

/**
 * @brief Job information for queue status
 */
struct JobInfo {
    std::string id;                                    ///< Job ID
    JobType type;                                      ///< Job type (generation or hashing)
    GenerationStatus status;                           ///< Current status
    std::string prompt;                                ///< Job prompt (full text for generation, or model name for hashing)
    std::chrono::system_clock::time_point queuedTime;  ///< When job was queued
    std::chrono::system_clock::time_point startTime;   ///< When job started processing
    std::chrono::system_clock::time_point endTime;     ///< When job completed/failed
    int position;                                      ///< Position in queue (for queued jobs)
    std::vector<std::string> outputFiles;              ///< Paths to generated output files
    std::string errorMessage;                          ///< Error message if job failed
    float progress               = 0.0f;               ///< Overall progress (0.0 to 1.0) - kept for backward compatibility
    float modelLoadProgress      = 0.0f;               ///< Model loading progress (0.0 to 1.0)
    int currentStep              = 0;                  ///< Current step in generation
    int totalSteps               = 0;                  ///< Total steps in generation
    int64_t timeElapsed          = 0;                  ///< Time elapsed in milliseconds
    int64_t timeRemaining        = 0;                  ///< Estimated time remaining in milliseconds
    float speed                  = 0.0f;               ///< Generation speed in steps per second
    bool firstGenerationCallback = true;               ///< Flag to track if this is the first generation callback

    // Enhanced fields for repeatable generation
    std::string modelName;                                    ///< Name of the model used
    std::string modelHash;                                    ///< SHA256 hash of the model
    std::string modelPath;                                    ///< Full path to the model file
    std::string negativePrompt;                               ///< Negative prompt used
    int width                     = 512;                      ///< Image width
    int height                    = 512;                      ///< Image height
    int batchCount                = 1;                        ///< Number of images generated
    int steps                     = 20;                       ///< Number of diffusion steps
    float cfgScale                = 7.5f;                     ///< CFG scale
    SamplingMethod samplingMethod = SamplingMethod::DEFAULT;  ///< Sampling method used
    Scheduler scheduler           = Scheduler::DEFAULT;       ///< Scheduler used
    std::string seed;                                         ///< Seed used for generation
    int64_t actualSeed = 0;                                   ///< Actual seed that was used (for random seeds)
    std::string requestType;                                  ///< Request type: text2img, img2img, controlnet, upscaler, inpainting
    float strength           = 0.75f;                         ///< Strength for img2img
    float controlStrength    = 0.9f;                          ///< ControlNet strength
    int clipSkip             = -1;                            ///< CLIP skip layers
    int nThreads             = -1;                            ///< Number of threads used
    bool offloadParamsToCpu  = false;                         ///< Offload parameters to CPU setting
    bool clipOnCpu           = false;                         ///< Keep CLIP on CPU setting
    bool vaeOnCpu            = false;                         ///< Keep VAE on CPU setting
    bool diffusionFlashAttn  = false;                         ///< Use flash attention setting
    bool diffusionConvDirect = false;                         ///< Use direct convolution setting
    bool vaeConvDirect       = false;                         ///< Use direct VAE convolution setting
    uint64_t generationTime  = 0;                             ///< Total generation time in milliseconds

    // Image data for complex operations (base64 encoded)
    std::string initImageData;     ///< Init image data for img2img (base64)
    int initImageWidth = 0;       ///< Init image width
    int initImageHeight = 0;      ///< Init image height
    int initImageChannels = 3;      ///< Init image channels
    std::string controlImageData;  ///< Control image data for ControlNet (base64)
    int controlImageWidth = 0;      ///< Control image width
    int controlImageHeight = 0;     ///< Control image height
    int controlImageChannels = 3;     ///< Control image channels
    std::string maskImageData;     ///< Mask image data for inpainting (base64)
    int maskImageWidth = 0;        ///< Mask image width
    int maskImageHeight = 0;       ///< Mask image height
    int maskImageChannels = 1;       ///< Mask image channels (grayscale)

    // Model paths for advanced usage
    std::string clipLPath;       ///< Path to CLIP-L model
    std::string clipGPath;       ///< Path to CLIP-G model
    std::string vaePath;         ///< Path to VAE model
    std::string taesdPath;       ///< Path to TAESD model
    std::string controlNetPath;  ///< Path to ControlNet model
    std::string embeddingDir;    ///< Path to embeddings directory
    std::string loraModelDir;    ///< Path to LoRA model directory
    std::string esrganPath;      ///< Path to ESRGAN model for upscaling
    uint32_t upscaleFactor = 4;  ///< Upscale factor used
};

/**
 * @brief Generation queue class for managing image generation requests
 *
 * This class manages a queue of image generation requests, processes them
 * asynchronously, and provides thread-safe access to the queue and results.
 * Only one generation job is processed at a time as specified in requirements.
 */
class GenerationQueue {
public:
    /**
     * @brief Construct a new Generation Queue object
     *
     * @param modelManager Pointer to the model manager
     * @param maxConcurrentGenerations Maximum number of concurrent generations (should be 1)
     * @param queueDir Directory to store job persistence files
     * @param outputDir Directory to store generated output files
     */
    explicit GenerationQueue(class ModelManager* modelManager, int maxConcurrentGenerations = 1, const std::string& queueDir = "./queue", const std::string& outputDir = "./output");

    /**
     * @brief Destroy the Generation Queue object
     */
    virtual ~GenerationQueue();

    /**
     * @brief Add a generation request to the queue
     *
     * @param request The generation request
     * @return std::future<GenerationResult> Future for the generation result
     */
    std::future<GenerationResult> enqueueRequest(const GenerationRequest& request);

    /**
     * @brief Add a hash request to the queue
     *
     * @param request The hash request
     * @return std::future<HashResult> Future for the hash result
     */
    std::future<HashResult> enqueueHashRequest(const HashRequest& request);

    /**
     * @brief Add a conversion request to the queue
     *
     * @param request The conversion request
     * @return std::future<ConversionResult> Future for the conversion result
     */
    std::future<ConversionResult> enqueueConversionRequest(const ConversionRequest& request);

    /**
     * @brief Get the current queue size
     *
     * @return size_t Number of requests in the queue
     */
    size_t getQueueSize() const;

    /**
     * @brief Get the number of active generations
     *
     * @return size_t Number of currently processing requests
     */
    size_t getActiveGenerations() const;

    /**
     * @brief Get detailed queue status
     *
     * @return std::vector<JobInfo> List of all jobs with their status
     */
    std::vector<JobInfo> getQueueStatus() const;

    /**
     * @brief Get job information by ID
     *
     * @param jobId The job ID to look up
     * @return JobInfo Job information, or empty if not found
     */
    JobInfo getJobInfo(const std::string& jobId) const;

    /**
     * @brief Cancel a pending job
     *
     * @param jobId The job ID to cancel
     * @return true if job was cancelled, false if not found or already processing
     */
    bool cancelJob(const std::string& jobId);

    /**
     * @brief Clear all pending requests
     */
    void clearQueue();

    /**
     * @brief Start the queue processing thread
     */
    void start();

    /**
     * @brief Stop the queue processing thread
     */
    void stop();

    /**
     * @brief Check if the queue is running
     *
     * @return true if the queue is running, false otherwise
     */
    bool isRunning() const;

    /**
     * @brief Set the maximum number of concurrent generations
     *
     * @param maxConcurrent Maximum number of concurrent generations
     */
    void setMaxConcurrentGenerations(int maxConcurrent);

private:
    class Impl;
    std::unique_ptr<Impl> pImpl;  // Pimpl idiom
};

#endif  // GENERATION_QUEUE_H