|
11 | 11 | #include "mtmd.h" |
12 | 12 | #include "mtmd-helper.h" |
13 | 13 |
|
14 | | -#include <limits.h> |
15 | | -#include <algorithm> |
16 | | -#include <cmath> |
17 | | -#include <cstring> |
18 | | -#include <limits> |
19 | | -#include <random> |
20 | | -#include <string> |
21 | | -#include <vector> |
22 | | - |
23 | 14 | // mime type for sending response |
24 | 15 | #define MIMETYPE_JSON "application/json; charset=utf-8" |
25 | 16 |
|
@@ -3594,7 +3585,6 @@ struct server_context { |
3594 | 3585 | } |
3595 | 3586 |
|
3596 | 3587 | // TODO: support memory-less logits computation |
3597 | | - // Allow diffusion tasks to proceed as they handle logits differently |
3598 | 3588 | if (slot.need_logits() && !llama_get_memory(ctx) && slot.task_type != SERVER_TASK_TYPE_DIFFUSION) { |
3599 | 3589 | slot.release(); |
3600 | 3590 | send_error(slot, "the current context does not logits computation. skipping", ERROR_TYPE_SERVER); |
@@ -5411,6 +5401,8 @@ int main(int argc, char ** argv) { |
5411 | 5401 | const std::function<bool()> & is_connection_closed, |
5412 | 5402 | httplib::Response & res, |
5413 | 5403 | oaicompat_type oaicompat) -> void { |
| 5404 | + |
| 5405 | + type = llama_model_is_diffusion(ctx_server.model) ? SERVER_TASK_TYPE_DIFFUSION : type; |
5414 | 5406 | GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL || type == SERVER_TASK_TYPE_DIFFUSION); |
5415 | 5407 |
|
5416 | 5408 | auto completion_id = gen_chatcmplid(); |
@@ -5532,17 +5524,11 @@ int main(int argc, char ** argv) { |
5532 | 5524 | OAICOMPAT_TYPE_NONE); |
5533 | 5525 | }; |
5534 | 5526 |
|
5535 | | - const auto handle_completions_oai = [&ctx_server,&handle_completions_impl](const httplib::Request & req, httplib::Response & res) { |
| 5527 | + const auto handle_completions_oai = [&handle_completions_impl](const httplib::Request & req, httplib::Response & res) { |
5536 | 5528 | json data = oaicompat_completion_params_parse(json::parse(req.body)); |
5537 | 5529 | std::vector<raw_buffer> files; // dummy |
5538 | | - |
5539 | | - // Check if this is a diffusion request by looking for diffusion-specific parameters |
5540 | | - bool is_diffusion = llama_model_is_diffusion(ctx_server.model); |
5541 | | - |
5542 | | - server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION; |
5543 | | - |
5544 | 5530 | handle_completions_impl( |
5545 | | - task_type, |
| 5531 | + SERVER_TASK_TYPE_COMPLETION, |
5546 | 5532 | data, |
5547 | 5533 | files, |
5548 | 5534 | req.is_connection_closed, |
@@ -5639,13 +5625,8 @@ int main(int argc, char ** argv) { |
5639 | 5625 | ctx_server.oai_parser_opt, |
5640 | 5626 | files); |
5641 | 5627 |
|
5642 | | - // Check if this is a diffusion request by looking for diffusion-specific parameters |
5643 | | - bool is_diffusion = llama_model_is_diffusion(ctx_server.model); |
5644 | | - |
5645 | | - server_task_type task_type = is_diffusion ? SERVER_TASK_TYPE_DIFFUSION : SERVER_TASK_TYPE_COMPLETION; |
5646 | | - |
5647 | 5628 | handle_completions_impl( |
5648 | | - task_type, |
| 5629 | + SERVER_TASK_TYPE_COMPLETION, |
5649 | 5630 | data, |
5650 | 5631 | files, |
5651 | 5632 | req.is_connection_closed, |
|
0 commit comments