|
@@ -360,16 +360,30 @@ ModelArchitecture ModelDetector::analyzeArchitecture(
|
|
|
return ModelArchitecture::QWEN2VL;
|
|
return ModelArchitecture::QWEN2VL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ // Check text encoder dimensions first (more reliable than UNet channel count)
|
|
|
|
|
+ if (textEncoderOutputDim == 768) {
|
|
|
|
|
+ return ModelArchitecture::SD_1_5;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (textEncoderOutputDim >= 1024 && textEncoderOutputDim < 1280) {
|
|
|
|
|
+ return ModelArchitecture::SD_2_1;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (textEncoderOutputDim == 1280) {
|
|
|
|
|
+ return ModelArchitecture::SDXL_BASE;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // Only use UNet channel count as a last resort when text encoder dimensions are unclear
|
|
|
if (maxUNetChannels >= 2048) {
|
|
if (maxUNetChannels >= 2048) {
|
|
|
return ModelArchitecture::SDXL_BASE;
|
|
return ModelArchitecture::SDXL_BASE;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- // Distinguish between SD1.x and SD2.x by text encoder dimension
|
|
|
|
|
- if (textEncoderOutputDim >= 1024 || maxUNetChannels == 1280) {
|
|
|
|
|
|
|
+
|
|
|
|
|
+ // Fallback detection based on UNet channels when text encoder info is unavailable
|
|
|
|
|
+ if (maxUNetChannels == 1280) {
|
|
|
return ModelArchitecture::SD_2_1;
|
|
return ModelArchitecture::SD_2_1;
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- if (textEncoderOutputDim == 768 || maxUNetChannels <= 1280) {
|
|
|
|
|
|
|
+
|
|
|
|
|
+ if (maxUNetChannels <= 1280) {
|
|
|
return ModelArchitecture::SD_1_5;
|
|
return ModelArchitecture::SD_1_5;
|
|
|
}
|
|
}
|
|
|
|
|
|