清理

anyshu · anyshu · commit 71bf10b00790 · 2025-10-09T15:30:34.000+08:00
diff --git a/tools/server/server-diffusion.cpp b/tools/server/server-diffusion.cpp
@@ -11,15 +11,6 @@
 #include "mtmd.h"
 #include "mtmd-helper.h"
 
-#include <limits.h>
-#include <algorithm>
-#include <cmath>
-#include <cstring>
-#include <limits>
-#include <random>
-#include <string>
-#include <vector>
-
 // mime type for sending response
 #define MIMETYPE_JSON "application/json; charset=utf-8"
 
@@ -3594,7 +3585,6 @@ struct server_context {
                         }
 
                         // TODO: support memory-less logits computation
-                        // Allow diffusion tasks to proceed as they handle logits differently
                         if (slot.need_logits() && !llama_get_memory(ctx) && slot.task_type != SERVER_TASK_TYPE_DIFFUSION) {
                             slot.release();
                             send_error(slot, "the current context does not logits computation. skipping", ERROR_TYPE_SERVER);
@@ -5411,6 +5401,8 @@ int main(int argc, char ** argv) {
             const std::function<bool()> & is_connection_closed,
             httplib::Response & res,
             oaicompat_type oaicompat) -> void {
+                
+        type = llama_model_is_diffusion(ctx_server.model) ? SERVER_TASK_TYPE_DIFFUSION : type;
         GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL || type == SERVER_TASK_TYPE_DIFFUSION);
 
         auto completion_id = gen_chatcmplid();
@@ -5532,17 +5524,11 @@ int main(int argc, char ** argv) {
             OAICOMPAT_TYPE_NONE);
     };
 
-    const auto handle_completions_oai = [&ctx_server,&handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
+    const auto handle_completions_oai = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) {
         json data = oaicompat_completion_params_parse(json::parse(req.body));
         std::vector<raw_buffer> files; // dummy
-        
-        // Check if this is a diffusion request by looking for diffusion-specific parameters
-        bool is_diffusion = llama_model_is_diffusion(ctx_server.model);
-        
-        server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION;
-        
         handle_completions_impl(
-            task_type,
+            SERVER_TASK_TYPE_COMPLETION,
             data,
             files,
             req.is_connection_closed,
@@ -5639,13 +5625,8 @@ int main(int argc, char ** argv) {
             ctx_server.oai_parser_opt,
             files);
 
-        // Check if this is a diffusion request by looking for diffusion-specific parameters
-        bool is_diffusion = llama_model_is_diffusion(ctx_server.model);
-        
-        server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION;
-
         handle_completions_impl(
-            task_type,
+            SERVER_TASK_TYPE_COMPLETION,
             data,
             files,
             req.is_connection_closed,