diff --git a/README.md b/README.md index cff3bd4370e..2e44ae7d0c7 100644 --- a/README.md +++ b/README.md @@ -613,3 +613,4 @@ $ echo "source ~/.llama-completion.bash" >> ~/.bashrc - [linenoise.cpp](./tools/run/linenoise.cpp/linenoise.cpp) - C++ library that provides readline-like line editing capabilities, used by `llama-run` - BSD 2-Clause License - [curl](https://curl.se/) - Client-side URL transfer library, used by various tools/examples - [CURL License](https://curl.se/docs/copyright.html) - [miniaudio.h](https://github.com/mackron/miniaudio) - Single-header audio format decoder, used by multimodal subsystem - Public domain +- [subprocess.h](https://github.com/sheredom/subprocess.h) - Single-header process launching solution for C and C++ - Public domain diff --git a/common/arg.cpp b/common/arg.cpp index 9f3c8a97546..cf69890d146 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -212,13 +212,13 @@ struct handle_model_result { static handle_model_result common_params_handle_model( struct common_params_model & model, const std::string & bearer_token, - const std::string & model_path_default, bool offline) { handle_model_result result; // handle pre-fill default model path and url based on hf_repo and hf_file { if (!model.docker_repo.empty()) { // Handle Docker URLs by resolving them to local paths model.path = common_docker_resolve_model(model.docker_repo); + model.name = model.docker_repo; // set name for consistency } else if (!model.hf_repo.empty()) { // short-hand to avoid specifying --hf-file -> default it to --model if (model.hf_file.empty()) { @@ -227,7 +227,8 @@ static handle_model_result common_params_handle_model( if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) { exit(1); // built without CURL, error message already printed } - model.hf_repo = auto_detected.repo; + model.name = model.hf_repo; // repo name with tag + model.hf_repo = auto_detected.repo; // repo name without tag model.hf_file = auto_detected.ggufFile; if (!auto_detected.mmprojFile.empty()) { result.found_mmproj = true; @@ -257,8 +258,6 @@ static handle_model_result common_params_handle_model( model.path = fs_get_cache_file(string_split(f, '/').back()); } - } else if (model.path.empty()) { - model.path = model_path_default; } } @@ -405,7 +404,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // handle model and download { - auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline); + auto res = common_params_handle_model(params.model, params.hf_token, params.offline); if (params.no_mmproj) { params.mmproj = {}; } else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) { @@ -415,12 +414,18 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context // only download mmproj if the current example is using it for (auto & ex : mmproj_examples) { if (ctx_arg.ex == ex) { - common_params_handle_model(params.mmproj, params.hf_token, "", params.offline); + common_params_handle_model(params.mmproj, params.hf_token, params.offline); break; } } - common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline); - common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline); + common_params_handle_model(params.speculative.model, params.hf_token, params.offline); + common_params_handle_model(params.vocoder.model, params.hf_token, params.offline); + } + + // model is required (except for server) + // TODO @ngxson : maybe show a list of available models in CLI in this case + if (params.model.path.empty() && ctx_arg.ex != LLAMA_EXAMPLE_SERVER) { + throw std::invalid_argument("error: --model is required\n"); } if (params.escape) { @@ -2090,11 +2095,8 @@ common_params_context common_params_parser_init(common_params & params, llama_ex add_opt(common_arg( {"-m", "--model"}, "FNAME", ex == LLAMA_EXAMPLE_EXPORT_LORA - ? std::string("model path from which to load base model") - : string_format( - "model path (default: `models/$filename` with filename from `--hf-file` " - "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH - ), + ? "model path from which to load base model" + : "model path to load", [](common_params & params, const std::string & value) { params.model.path = value; } @@ -2492,6 +2494,27 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } } ).set_examples({LLAMA_EXAMPLE_SERVER})); + add_opt(common_arg( + {"--models-dir"}, "PATH", + "directory containing models for the router server (default: disabled)", + [](common_params & params, const std::string & value) { + params.models_dir = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR")); + add_opt(common_arg( + {"--models-max"}, "N", + string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max), + [](common_params & params, int value) { + params.models_max = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_MAX")); + add_opt(common_arg( + {"--no-models-autoload"}, + "disables automatic loading of models (default: enabled)", + [](common_params & params) { + params.models_autoload = false; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD")); add_opt(common_arg( {"--jinja"}, string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"), diff --git a/common/common.cpp b/common/common.cpp index 0d7fd9a9371..10001f54697 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -912,7 +912,7 @@ std::string fs_get_cache_file(const std::string & filename) { return cache_directory + filename; } -std::vector fs_list_files(const std::string & path) { +std::vector fs_list(const std::string & path, bool include_directories) { std::vector files; if (path.empty()) return files; @@ -927,14 +927,22 @@ std::vector fs_list_files(const std::string & path) { const auto & p = entry.path(); if (std::filesystem::is_regular_file(p)) { common_file_info info; - info.path = p.string(); - info.name = p.filename().string(); + info.path = p.string(); + info.name = p.filename().string(); + info.is_dir = false; try { info.size = static_cast(std::filesystem::file_size(p)); } catch (const std::filesystem::filesystem_error &) { info.size = 0; } files.push_back(std::move(info)); + } else if (include_directories && std::filesystem::is_directory(p)) { + common_file_info info; + info.path = p.string(); + info.name = p.filename().string(); + info.size = 0; // Directories have no size + info.is_dir = true; + files.push_back(std::move(info)); } } catch (const std::filesystem::filesystem_error &) { // skip entries we cannot inspect diff --git a/common/common.h b/common/common.h index 2f23d0baa83..1225aa191da 100644 --- a/common/common.h +++ b/common/common.h @@ -26,8 +26,6 @@ fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ } while(0) -#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" - struct common_time_meas { common_time_meas(int64_t & t_acc, bool disable = false); ~common_time_meas(); @@ -223,6 +221,7 @@ struct common_params_model { std::string hf_repo = ""; // HF repo // NOLINT std::string hf_file = ""; // HF file // NOLINT std::string docker_repo = ""; // Docker repo // NOLINT + std::string name = ""; // in format /[:] (tag is optional) // NOLINT }; struct common_params_speculative { @@ -478,6 +477,11 @@ struct common_params { bool endpoint_props = false; // only control POST requests, not GET bool endpoint_metrics = false; + // router server configs + std::string models_dir = ""; // directory containing models for the router server + int models_max = 4; // maximum number of models to load simultaneously + bool models_autoload = true; // automatically load models when requested via the router server + bool log_json = false; std::string slot_save_path; @@ -641,8 +645,9 @@ struct common_file_info { std::string path; std::string name; size_t size = 0; // in bytes + bool is_dir = false; }; -std::vector fs_list_files(const std::string & path); +std::vector fs_list(const std::string & path, bool include_directories); // // Model utils diff --git a/common/download.cpp b/common/download.cpp index 099eaa059b0..93a0d974461 100644 --- a/common/download.cpp +++ b/common/download.cpp @@ -1054,7 +1054,7 @@ std::string common_docker_resolve_model(const std::string &) { std::vector common_list_cached_models() { std::vector models; const std::string cache_dir = fs_get_cache_directory(); - const std::vector files = fs_list_files(cache_dir); + const std::vector files = fs_list(cache_dir, false); for (const auto & file : files) { if (string_starts_with(file.name, "manifest=") && string_ends_with(file.name, ".json")) { common_cached_model_info model_info; diff --git a/common/download.h b/common/download.h index 45a6bd6bba8..d1321e6e90e 100644 --- a/common/download.h +++ b/common/download.h @@ -14,8 +14,10 @@ struct common_cached_model_info { std::string model; std::string tag; size_t size = 0; // GGUF size in bytes + // return string representation like "user/model:tag" + // if tag is "latest", it will be omitted std::string to_string() const { - return user + "/" + model + ":" + tag; + return user + "/" + model + (tag == "latest" ? "" : ":" + tag); } }; diff --git a/scripts/sync_vendor.py b/scripts/sync_vendor.py index 88f45862b67..637f4cdc186 100755 --- a/scripts/sync_vendor.py +++ b/scripts/sync_vendor.py @@ -17,6 +17,8 @@ "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h", "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.28.0/httplib.h": "vendor/cpp-httplib/httplib.h", + + "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h", } for url, filename in vendor.items(): diff --git a/tests/test-quantize-stats.cpp b/tests/test-quantize-stats.cpp index a284a1f0c5e..de587d456d0 100644 --- a/tests/test-quantize-stats.cpp +++ b/tests/test-quantize-stats.cpp @@ -23,7 +23,7 @@ #endif struct quantize_stats_params { - std::string model = DEFAULT_MODEL_PATH; + std::string model = "models/7B/ggml-model-f16.gguf"; bool verbose = false; bool per_layer_stats = false; bool print_histogram = false; diff --git a/tools/server/CMakeLists.txt b/tools/server/CMakeLists.txt index d8623621f3f..fb71c7aa7be 100644 --- a/tools/server/CMakeLists.txt +++ b/tools/server/CMakeLists.txt @@ -15,6 +15,8 @@ set(TARGET_SRCS server.cpp server-http.cpp server-http.h + server-models.cpp + server-models.h server-task.cpp server-task.h server-queue.cpp diff --git a/tools/server/README.md b/tools/server/README.md index f42bc7921c2..20939c5f67e 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -93,7 +93,7 @@ The project is under active development, and we are [looking for feedback and co | `--control-vector FNAME` | add a control vector
note: this argument can be repeated to add multiple control vectors | | `--control-vector-scaled FNAME SCALE` | add a control vector with user defined scaling SCALE
note: this argument can be repeated to add multiple scaled control vectors | | `--control-vector-layer-range START END` | layer range to apply the control vector(s) to, start and end inclusive | -| `-m, --model FNAME` | model path (default: `models/$filename` with filename from `--hf-file` or `--model-url` if set, otherwise models/7B/ggml-model-f16.gguf)
(env: LLAMA_ARG_MODEL) | +| `-m, --model FNAME` | model path to load
(env: LLAMA_ARG_MODEL) | | `-mu, --model-url MODEL_URL` | model download url (default: unused)
(env: LLAMA_ARG_MODEL_URL) | | `-dr, --docker-repo [/][:quant]` | Docker Hub model repository. repo is optional, default to ai/. quant is optional, default to :latest.
example: gemma3
(default: unused)
(env: LLAMA_ARG_DOCKER_REPO) | | `-hf, -hfr, --hf-repo /[:quant]` | Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.
mmproj is also downloaded automatically if available. to disable, add --no-mmproj
example: unsloth/phi-4-GGUF:q4_k_m
(default: unused)
(env: LLAMA_ARG_HF_REPO) | @@ -196,6 +196,10 @@ The project is under active development, and we are [looking for feedback and co | `--slots` | enable slots monitoring endpoint (default: enabled)
(env: LLAMA_ARG_ENDPOINT_SLOTS) | | `--no-slots` | disables slots monitoring endpoint
(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) | | `--slot-save-path PATH` | path to save slot kv cache (default: disabled) | +| `--models-dir PATH` | directory containing models for the router server (default: disabled)
(env: LLAMA_ARG_MODELS_DIR) | +| `--models-max N` | for router server, maximum number of models to load simultaneously (default: 4, 0 = unlimited)
(env: LLAMA_ARG_MODELS_MAX) | +| `--models-allow-extra-args` | for router server, allow extra arguments for models; important: some arguments can allow users to access local file system, use with caution (default: disabled)
(env: LLAMA_ARG_MODELS_ALLOW_EXTRA_ARGS) | +| `--no-models-autoload` | disables automatic loading of models (default: enabled)
(env: LLAMA_ARG_NO_MODELS_AUTOLOAD) | | `--jinja` | use jinja template for chat (default: enabled)

(env: LLAMA_ARG_JINJA) | | `--no-jinja` | disable jinja template for chat (default: enabled)

(env: LLAMA_ARG_NO_JINJA) | | `--reasoning-format FORMAT` | controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned; one of:
- none: leaves thoughts unparsed in `message.content`
- deepseek: puts thoughts in `message.reasoning_content`
- deepseek-legacy: keeps `` tags in `message.content` while also populating `message.reasoning_content`
(default: auto)
(env: LLAMA_ARG_THINK) | @@ -287,38 +291,66 @@ For more details, please refer to [multimodal documentation](../../docs/multimod ## Web UI -The project includes a web-based user interface that enables interaction with the model through the `/v1/chat/completions` endpoint. +The project includes a web-based user interface for interacting with `llama-server`. It supports both single-model (`MODEL` mode) and multi-model (`ROUTER` mode) operation. -The web UI is developed using: -- `react` framework for frontend development -- `tailwindcss` and `daisyui` for styling -- `vite` for build tooling +### Features -A pre-built version is available as a single HTML file under `/public` directory. +- **Chat interface** with streaming responses +- **Multi-model support** (ROUTER mode) - switch between models, auto-load on selection +- **Modality validation** - ensures selected model supports conversation's attachments (images, audio) +- **Conversation management** - branching, regeneration, editing with history preservation +- **Attachment support** - images, audio, PDFs (with vision/text fallback) +- **Configurable parameters** - temperature, top_p, etc. synced with server defaults +- **Dark/light theme** -To build or to run the dev server (with hot reload): +### Tech Stack + +- **SvelteKit** - frontend framework with Svelte 5 runes for reactive state +- **TailwindCSS** + **shadcn-svelte** - styling and UI components +- **Vite** - build tooling +- **IndexedDB** (Dexie) - local storage for conversations +- **LocalStorage** - user settings persistence + +### Architecture + +The WebUI follows a layered architecture: + +``` +Routes → Components → Hooks → Stores → Services → Storage/API +``` + +- **Stores** - reactive state management (`chatStore`, `conversationsStore`, `modelsStore`, `serverStore`, `settingsStore`) +- **Services** - stateless API/database communication (`ChatService`, `ModelsService`, `PropsService`, `DatabaseService`) +- **Hooks** - reusable logic (`useModelChangeValidation`, `useProcessingState`) + +For detailed architecture diagrams, see [`tools/server/webui/docs/`](webui/docs/): + +- `high-level-architecture.mmd` - full architecture with all modules +- `high-level-architecture-simplified.mmd` - simplified overview +- `data-flow-simplified-model-mode.mmd` - data flow for single-model mode +- `data-flow-simplified-router-mode.mmd` - data flow for multi-model mode +- `flows/*.mmd` - detailed per-domain flows (chat, conversations, models, etc.) + +### Development ```sh -# make sure you have nodejs installed +# make sure you have Node.js installed cd tools/server/webui npm i -# to run the dev server +# run dev server (with hot reload) npm run dev -# to build the public/index.html.gz +# run tests +npm run test + +# build production bundle npm run build ``` -After `public/index.html.gz` has been generated we need to generate the c++ -headers (like build/tools/server/index.html.gz.hpp) that will be included -by server.cpp. This is done by building `llama-server` as described in the -[build](#build) section above. -NOTE: if you are using the vite dev server, you can change the API base URL to llama.cpp. To do that, run this code snippet in browser's console: +After `public/index.html.gz` has been generated, rebuild `llama-server` as described in the [build](#build) section to include the updated UI. -```js -localStorage.setItem('base', 'http://localhost:8080') -``` +**Note:** The Vite dev server automatically proxies API requests to `http://localhost:8080`. Make sure `llama-server` is running on that port during development. ## Quick Start @@ -1424,6 +1456,184 @@ curl http://localhost:8080/v1/messages/count_tokens \ {"input_tokens": 10} ``` +## Using multiple models + +`llama-server` can be launched in a **router mode** that exposes an API for dynamically loading and unloading models. The main process (the "router") automatically forwards each request to the appropriate model instance. + +To start in router mode, launch `llama-server` **without specifying any model**: + +```sh +llama-server +``` + +### Model sources + +By default, the router looks for models in the cache. You can add Hugging Face models to the cache with: + +```sh +llama-server -hf /: +``` + +*The server must be restarted after adding a new model.* + +Alternatively, you can point the router to a local directory containing your GGUF files using `--models-dir`. Example command: + +```sh +llama-server --models-dir ./models_directory +``` + +If the model contains multiple GGUF (for multimodal or multi-shard), files should be put into a subdirectory. The directory structure should look like this: + +```sh +models_directory + │ + │ # single file + ├─ llama-3.2-1b-Q4_K_M.gguf + ├─ Qwen3-8B-Q4_K_M.gguf + │ + │ # multimodal + ├─ gemma-3-4b-it-Q8_0 + │ ├─ gemma-3-4b-it-Q8_0.gguf + │ └─ mmproj-F16.gguf # file name must start with "mmproj" + │ + │ # multi-shard + ├─ Kimi-K2-Thinking-UD-IQ1_S + │ ├─ Kimi-K2-Thinking-UD-IQ1_S-00001-of-00006.gguf + │ ├─ Kimi-K2-Thinking-UD-IQ1_S-00002-of-00006.gguf + │ ├─ ... + │ └─ Kimi-K2-Thinking-UD-IQ1_S-00006-of-00006.gguf +``` + +You may also specify default arguments that will be passed to every model instance: + +```sh +llama-server -ctx 8192 -n 1024 -np 2 +``` + +Note: model instances inherit both command line arguments and environment variables from the router server. + +### Routing requests + +Requests are routed according to the requested model name. + +For **POST** endpoints (`/v1/chat/completions`, `/v1/completions`, `/infill`, etc.) The router uses the `"model"` field in the JSON body: + +```json +{ + "model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", + "messages": [ + { + "role": "user", + "content": "hello" + } + ] +} +``` + +For **GET** endpoints (`/props`, `/metrics`, etc.) The router uses the `model` query parameter (URL-encoded): + +``` +GET /props?model=ggml-org%2Fgemma-3-4b-it-GGUF%3AQ4_K_M +``` + +By default, the model will be loaded automatically if it's not loaded. To disable this, add `--no-models-autoload` when starting the server. Additionally, you can include `?autoload=true|false` in the query param to control this behavior per-request. + +### GET `/models`: List available models + +Listing all models in cache. The model metadata will also include a field to indicate the status of the model: + +```json +{ + "data": [{ + "id": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", + "in_cache": true, + "path": "/Users/REDACTED/Library/Caches/llama.cpp/ggml-org_gemma-3-4b-it-GGUF_gemma-3-4b-it-Q4_K_M.gguf", + "status": { + "value": "loaded", + "args": ["llama-server", "-ctx", "4096"] + }, + ... + }] +} +``` + +Note: For a local GGUF (stored offline in a custom directory), the model object will have `"in_cache": false`. + +The `status` object can be: + +```json +"status": { + "value": "unloaded" +} +``` + +```json +"status": { + "value": "loading", + "args": ["llama-server", "-ctx", "4096"] +} +``` + +```json +"status": { + "value": "unloaded", + "args": ["llama-server", "-ctx", "4096"], + "failed": true, + "exit_code": 1 +} +``` + +```json +"status": { + "value": "loaded", + "args": ["llama-server", "-ctx", "4096"] +} +``` + +### POST `/models/load`: Load a model + +Load a model + +Payload: +- `model`: name of the model to be loaded. +- `extra_args`: (optional) an array of additional arguments to be passed to the model instance. Note: you must start the server with `--models-allow-extra-args` to enable this feature. + +```json +{ + "model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", + "extra_args": ["-n", "128", "--top-k", "4"] +} +``` + +Response: + +```json +{ + "success": true +} +``` + + +### POST `/models/unload`: Unload a model + +Unload a model + +Payload: + +```json +{ + "model": "ggml-org/gemma-3-4b-it-GGUF:Q4_K_M", +} +``` + +Response: + +```json +{ + "success": true +} +``` + ## More examples ### Interactive mode diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index ae25b6ddf72..9b58c25a66b 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp new file mode 100644 index 00000000000..0f812ed411b --- /dev/null +++ b/tools/server/server-models.cpp @@ -0,0 +1,920 @@ +#include "server-common.h" +#include "server-models.h" + +#include "download.h" + +#include // TODO: remove this once we use HTTP client from download.h +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + +#define CMD_EXIT "exit" + +struct local_model { + std::string name; + std::string path; + std::string path_mmproj; +}; + +static std::vector list_local_models(const std::string & dir) { + if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) { + throw std::runtime_error(string_format("error: '%s' does not exist or is not a directory\n", dir.c_str())); + } + + std::vector models; + auto scan_subdir = [&models](const std::string & subdir_path, const std::string & name) { + auto files = fs_list(subdir_path, false); + common_file_info model_file; + common_file_info first_shard_file; + common_file_info mmproj_file; + for (const auto & file : files) { + if (string_ends_with(file.name, ".gguf")) { + if (file.name.find("mmproj") != std::string::npos) { + mmproj_file = file; + } else if (file.name.find("-00001-of-") != std::string::npos) { + first_shard_file = file; + } else { + model_file = file; + } + } + } + // single file model + local_model model{ + /* name */ name, + /* path */ first_shard_file.path.empty() ? model_file.path : first_shard_file.path, + /* path_mmproj */ mmproj_file.path // can be empty + }; + if (!model.path.empty()) { + models.push_back(model); + } + }; + + auto files = fs_list(dir, true); + for (const auto & file : files) { + if (file.is_dir) { + scan_subdir(file.path, file.name); + } else if (string_ends_with(file.name, ".gguf")) { + // single file model + std::string name = file.name; + string_replace_all(name, ".gguf", ""); + local_model model{ + /* name */ name, + /* path */ file.path, + /* path_mmproj */ "" + }; + models.push_back(model); + } + } + return models; +} + +// +// server_models +// + +server_models::server_models( + const common_params & params, + int argc, + char ** argv, + char ** envp) : base_params(params) { + for (int i = 0; i < argc; i++) { + base_args.push_back(std::string(argv[i])); + } + for (char ** env = envp; *env != nullptr; env++) { + base_env.push_back(std::string(*env)); + } + // TODO: allow refreshing cached model list + // add cached models + auto cached_models = common_list_cached_models(); + for (const auto & model : cached_models) { + server_model_meta meta{ + /* name */ model.to_string(), + /* path */ model.manifest_path, + /* path_mmproj */ "", // auto-detected when loading + /* in_cache */ true, + /* port */ 0, + /* status */ SERVER_MODEL_STATUS_UNLOADED, + /* last_used */ 0, + /* args */ std::vector(), + /* exit_code */ 0 + }; + mapping[meta.name] = instance_t{ + /* subproc */ std::make_shared(), + /* th */ std::thread(), + /* meta */ meta + }; + } + // add local models specificed via --models-dir + if (!params.models_dir.empty()) { + auto local_models = list_local_models(params.models_dir); + for (const auto & model : local_models) { + if (mapping.find(model.name) != mapping.end()) { + // already exists in cached models, skip + continue; + } + server_model_meta meta{ + /* name */ model.name, + /* path */ model.path, + /* path_mmproj */ model.path_mmproj, + /* in_cache */ false, + /* port */ 0, + /* status */ SERVER_MODEL_STATUS_UNLOADED, + /* last_used */ 0, + /* args */ std::vector(), + /* exit_code */ 0 + }; + mapping[meta.name] = instance_t{ + /* subproc */ std::make_shared(), + /* th */ std::thread(), + /* meta */ meta + }; + } + } +} + +void server_models::update_meta(const std::string & name, const server_model_meta & meta) { + std::lock_guard lk(mutex); + auto it = mapping.find(name); + if (it != mapping.end()) { + it->second.meta = meta; + } + cv.notify_all(); // notify wait_until_loaded +} + +bool server_models::has_model(const std::string & name) { + std::lock_guard lk(mutex); + return mapping.find(name) != mapping.end(); +} + +std::optional server_models::get_meta(const std::string & name) { + std::lock_guard lk(mutex); + auto it = mapping.find(name); + if (it != mapping.end()) { + return it->second.meta; + } + return std::nullopt; +} + +static int get_free_port() { +#ifdef _WIN32 + WSADATA wsaData; + if (WSAStartup(MAKEWORD(2, 2), &wsaData) != 0) { + return -1; + } + typedef SOCKET native_socket_t; +#define INVALID_SOCKET_VAL INVALID_SOCKET +#define CLOSE_SOCKET(s) closesocket(s) +#else + typedef int native_socket_t; +#define INVALID_SOCKET_VAL -1 +#define CLOSE_SOCKET(s) close(s) +#endif + + native_socket_t sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock == INVALID_SOCKET_VAL) { +#ifdef _WIN32 + WSACleanup(); +#endif + return -1; + } + + struct sockaddr_in serv_addr; + std::memset(&serv_addr, 0, sizeof(serv_addr)); + serv_addr.sin_family = AF_INET; + serv_addr.sin_addr.s_addr = htonl(INADDR_ANY); + serv_addr.sin_port = htons(0); + + if (bind(sock, (struct sockaddr*)&serv_addr, sizeof(serv_addr)) != 0) { + CLOSE_SOCKET(sock); +#ifdef _WIN32 + WSACleanup(); +#endif + return -1; + } + +#ifdef _WIN32 + int namelen = sizeof(serv_addr); +#else + socklen_t namelen = sizeof(serv_addr); +#endif + if (getsockname(sock, (struct sockaddr*)&serv_addr, &namelen) != 0) { + CLOSE_SOCKET(sock); +#ifdef _WIN32 + WSACleanup(); +#endif + return -1; + } + + int port = ntohs(serv_addr.sin_port); + + CLOSE_SOCKET(sock); +#ifdef _WIN32 + WSACleanup(); +#endif + + return port; +} + +// helper to convert vector to char ** +// pointers are only valid as long as the original vector is valid +static std::vector to_char_ptr_array(const std::vector & vec) { + std::vector result; + result.reserve(vec.size() + 1); + for (const auto & s : vec) { + result.push_back(const_cast(s.c_str())); + } + result.push_back(nullptr); + return result; +} + +std::vector server_models::get_all_meta() { + std::lock_guard lk(mutex); + std::vector result; + result.reserve(mapping.size()); + for (const auto & [name, inst] : mapping) { + result.push_back(inst.meta); + } + return result; +} + +void server_models::unload_lru() { + if (base_params.models_max <= 0) { + return; // no limit + } + // remove one of the servers if we passed the models_max (least recently used - LRU) + std::string lru_model_name = ""; + int64_t lru_last_used = ggml_time_ms(); + size_t count_active = 0; + { + std::lock_guard lk(mutex); + for (const auto & m : mapping) { + if (m.second.meta.is_active()) { + count_active++; + if (m.second.meta.last_used < lru_last_used) { + lru_model_name = m.first; + lru_last_used = m.second.meta.last_used; + } + } + } + } + if (!lru_model_name.empty() && count_active >= (size_t)base_params.models_max) { + SRV_INF("models_max limit reached, removing LRU name=%s\n", lru_model_name.c_str()); + unload(lru_model_name); + } +} + +static void add_or_replace_arg(std::vector & args, const std::string & key, const std::string & value) { + for (size_t i = 0; i < args.size(); i++) { + if (args[i] == key && i + 1 < args.size()) { + args[i + 1] = value; + return; + } + } + // not found, append + args.push_back(key); + args.push_back(value); +} + +void server_models::load(const std::string & name, bool auto_load) { + if (!has_model(name)) { + throw std::runtime_error("model name=" + name + " is not found"); + } + unload_lru(); + + std::lock_guard lk(mutex); + + auto meta = mapping[name].meta; + if (meta.status != SERVER_MODEL_STATUS_UNLOADED) { + SRV_INF("model %s is not ready\n", name.c_str()); + return; + } + + // prepare new instance info + instance_t inst; + inst.meta = meta; + inst.meta.port = get_free_port(); + inst.meta.status = SERVER_MODEL_STATUS_LOADING; + inst.meta.last_used = ggml_time_ms(); + + if (inst.meta.port <= 0) { + throw std::runtime_error("failed to get a port number"); + } + + inst.subproc = std::make_shared(); + { + SRV_INF("spawning server instance with name=%s on port %d\n", inst.meta.name.c_str(), inst.meta.port); + + std::vector child_args; + if (auto_load && !meta.args.empty()) { + child_args = meta.args; // copy previous args + } else { + child_args = base_args; // copy + if (inst.meta.in_cache) { + add_or_replace_arg(child_args, "-hf", inst.meta.name); + } else { + add_or_replace_arg(child_args, "-m", inst.meta.path); + if (!inst.meta.path_mmproj.empty()) { + add_or_replace_arg(child_args, "--mmproj", inst.meta.path_mmproj); + } + } + } + + // set model args + add_or_replace_arg(child_args, "--port", std::to_string(inst.meta.port)); + add_or_replace_arg(child_args, "--alias", inst.meta.name); + + std::vector child_env = base_env; // copy + child_env.push_back("LLAMA_SERVER_ROUTER_PORT=" + std::to_string(base_params.port)); + + SRV_INF("%s", "spawning server instance with args:\n"); + for (const auto & arg : child_args) { + SRV_INF(" %s\n", arg.c_str()); + } + inst.meta.args = child_args; // save for debugging + + std::vector argv = to_char_ptr_array(child_args); + std::vector envp = to_char_ptr_array(child_env); + + int options = subprocess_option_no_window | subprocess_option_combined_stdout_stderr; + int result = subprocess_create_ex(argv.data(), options, envp.data(), inst.subproc.get()); + if (result != 0) { + throw std::runtime_error("failed to spawn server instance"); + } + + inst.stdin_file = subprocess_stdin(inst.subproc.get()); + } + + // start a thread to manage the child process + // captured variables are guaranteed to be destroyed only after the thread is joined + inst.th = std::thread([this, name, child_proc = inst.subproc, port = inst.meta.port]() { + // read stdout/stderr and forward to main server log + FILE * p_stdout_stderr = subprocess_stdout(child_proc.get()); + if (p_stdout_stderr) { + char buffer[4096]; + while (fgets(buffer, sizeof(buffer), p_stdout_stderr) != nullptr) { + LOG("[%5d] %s", port, buffer); + } + } else { + SRV_ERR("failed to get stdout/stderr of child process for name=%s\n", name.c_str()); + } + // we reach here when the child process exits + int exit_code = 0; + subprocess_join(child_proc.get(), &exit_code); + subprocess_destroy(child_proc.get()); + // update PID and status + { + std::lock_guard lk(mutex); + auto it = mapping.find(name); + if (it != mapping.end()) { + auto & meta = it->second.meta; + meta.exit_code = exit_code; + meta.status = SERVER_MODEL_STATUS_UNLOADED; + } + cv.notify_all(); + } + SRV_INF("instance name=%s exited with status %d\n", name.c_str(), exit_code); + }); + + // clean up old process/thread if exists + { + auto & old_instance = mapping[name]; + // old process should have exited already, but just in case, we clean it up here + if (subprocess_alive(old_instance.subproc.get())) { + SRV_WRN("old process for model name=%s is still alive, this is unexpected\n", name.c_str()); + subprocess_terminate(old_instance.subproc.get()); // force kill + } + if (old_instance.th.joinable()) { + old_instance.th.join(); + } + } + + mapping[name] = std::move(inst); + cv.notify_all(); +} + +static void interrupt_subprocess(FILE * stdin_file) { + // because subprocess.h does not provide a way to send SIGINT, + // we will send a command to the child process to exit gracefully + if (stdin_file) { + fprintf(stdin_file, "%s\n", CMD_EXIT); + fflush(stdin_file); + } +} + +void server_models::unload(const std::string & name) { + std::lock_guard lk(mutex); + auto it = mapping.find(name); + if (it != mapping.end()) { + if (it->second.meta.is_active()) { + SRV_INF("unloading model instance name=%s\n", name.c_str()); + interrupt_subprocess(it->second.stdin_file); + // status change will be handled by the managing thread + } else { + SRV_WRN("model instance name=%s is not loaded\n", name.c_str()); + } + } +} + +void server_models::unload_all() { + std::vector to_join; + { + std::lock_guard lk(mutex); + for (auto & [name, inst] : mapping) { + if (inst.meta.is_active()) { + SRV_INF("unloading model instance name=%s\n", name.c_str()); + interrupt_subprocess(inst.stdin_file); + // status change will be handled by the managing thread + } + // moving the thread to join list to avoid deadlock + to_join.push_back(std::move(inst.th)); + } + } + for (auto & th : to_join) { + if (th.joinable()) { + th.join(); + } + } +} + +void server_models::update_status(const std::string & name, server_model_status status) { + // for now, we only allow updating to LOADED status + if (status != SERVER_MODEL_STATUS_LOADED) { + throw std::runtime_error("invalid status value"); + } + auto meta = get_meta(name); + if (meta.has_value()) { + meta->status = status; + update_meta(name, meta.value()); + } +} + +void server_models::wait_until_loaded(const std::string & name) { + std::unique_lock lk(mutex); + cv.wait(lk, [this, &name]() { + auto it = mapping.find(name); + if (it != mapping.end()) { + return it->second.meta.status != SERVER_MODEL_STATUS_LOADING; + } + return false; + }); +} + +bool server_models::ensure_model_loaded(const std::string & name) { + auto meta = get_meta(name); + if (!meta.has_value()) { + throw std::runtime_error("model name=" + name + " is not found"); + } + if (meta->status == SERVER_MODEL_STATUS_LOADED) { + return false; // already loaded + } + if (meta->status == SERVER_MODEL_STATUS_UNLOADED) { + SRV_INF("model name=%s is not loaded, loading...\n", name.c_str()); + load(name, true); + } + + SRV_INF("waiting until model name=%s is fully loaded...\n", name.c_str()); + wait_until_loaded(name); + + // check final status + meta = get_meta(name); + if (!meta.has_value() || meta->is_failed()) { + throw std::runtime_error("model name=" + name + " failed to load"); + } + + return true; +} + +server_http_res_ptr server_models::proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used) { + auto meta = get_meta(name); + if (!meta.has_value()) { + throw std::runtime_error("model name=" + name + " is not found"); + } + if (meta->status != SERVER_MODEL_STATUS_LOADED) { + throw std::invalid_argument("model name=" + name + " is not loaded"); + } + if (update_last_used) { + std::unique_lock lk(mutex); + mapping[name].meta.last_used = ggml_time_ms(); + } + SRV_INF("proxying request to model %s on port %d\n", name.c_str(), meta->port); + auto proxy = std::make_unique( + method, + base_params.hostname, + meta->port, + req.path, + req.headers, + req.body, + req.should_stop); + return proxy; +} + +std::thread server_models::setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function & shutdown_handler) { + // send a notification to the router server that a model instance is ready + // TODO @ngxson : use HTTP client from libcommon + httplib::Client cli(base_params.hostname, router_port); + cli.set_connection_timeout(0, 200000); // 200 milliseconds + + httplib::Request req; + req.method = "POST"; + req.path = "/models/status"; + req.set_header("Content-Type", "application/json"); + if (!base_params.api_keys.empty()) { + req.set_header("Authorization", "Bearer " + base_params.api_keys[0]); + } + + json body; + body["model"] = name; + body["value"] = server_model_status_to_string(SERVER_MODEL_STATUS_LOADED); + req.body = body.dump(); + + SRV_INF("notifying router server (port=%d) that model %s is ready\n", router_port, name.c_str()); + auto result = cli.send(std::move(req)); + if (result.error() != httplib::Error::Success) { + auto err_str = httplib::to_string(result.error()); + SRV_ERR("failed to notify router server: %s\n", err_str.c_str()); + exit(1); // force exit + } + + // setup thread for monitoring stdin + return std::thread([shutdown_handler]() { + // wait for EOF on stdin + SRV_INF("%s", "child server monitoring thread started, waiting for EOF on stdin...\n"); + bool eof = false; + while (true) { + std::string line; + if (!std::getline(std::cin, line)) { + // EOF detected, that means the router server is unexpectedly exit or killed + eof = true; + break; + } + if (line.find(CMD_EXIT) != std::string::npos) { + SRV_INF("%s", "exit command received, exiting...\n"); + shutdown_handler(0); + break; + } + } + if (eof) { + SRV_INF("%s", "EOF on stdin detected, forcing shutdown...\n"); + exit(1); + } + }); +} + + + +// +// server_models_routes +// + +static void res_ok(std::unique_ptr & res, const json & response_data) { + res->status = 200; + res->data = safe_json_to_str(response_data); +} + +static void res_error(std::unique_ptr & res, const json & error_data) { + res->status = json_value(error_data, "code", 500); + res->data = safe_json_to_str({{ "error", error_data }}); +} + +static bool router_validate_model(const std::string & name, server_models & models, bool models_autoload, std::unique_ptr & res) { + if (name.empty()) { + res_error(res, format_error_response("model name is missing from the request", ERROR_TYPE_INVALID_REQUEST)); + return false; + } + auto meta = models.get_meta(name); + if (!meta.has_value()) { + res_error(res, format_error_response("model not found", ERROR_TYPE_INVALID_REQUEST)); + return false; + } + if (models_autoload) { + models.ensure_model_loaded(name); + } else { + if (meta->status != SERVER_MODEL_STATUS_LOADED) { + res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST)); + return false; + } + } + return true; +} + +static bool is_autoload(const common_params & params, const server_http_req & req) { + std::string autoload = req.get_param("autoload"); + if (autoload.empty()) { + return params.models_autoload; + } else { + return autoload == "true" || autoload == "1"; + } +} + +void server_models_routes::init_routes() { + this->get_router_props = [this](const server_http_req & req) { + std::string name = req.get_param("model"); + if (name.empty()) { + // main instance + auto res = std::make_unique(); + res_ok(res, { + // TODO: add support for this on web UI + {"role", "router"}, + {"max_instances", 4}, // dummy value for testing + // this is a dummy response to make sure webui doesn't break + {"model_alias", "llama-server"}, + {"model_path", "none"}, + {"default_generation_settings", { + {"params", json{}}, + {"n_ctx", 0}, + }}, + }); + return res; + } + return proxy_get(req); + }; + + this->proxy_get = [this](const server_http_req & req) { + std::string method = "GET"; + std::string name = req.get_param("model"); + bool autoload = is_autoload(params, req); + auto error_res = std::make_unique(); + if (!router_validate_model(name, models, autoload, error_res)) { + return error_res; + } + return models.proxy_request(req, method, name, false); + }; + + this->proxy_post = [this](const server_http_req & req) { + std::string method = "POST"; + json body = json::parse(req.body); + std::string name = json_value(body, "model", std::string()); + bool autoload = is_autoload(params, req); + auto error_res = std::make_unique(); + if (!router_validate_model(name, models, autoload, error_res)) { + return error_res; + } + return models.proxy_request(req, method, name, true); // update last usage for POST request only + }; + + this->get_router_models = [this](const server_http_req &) { + auto res = std::make_unique(); + json models_json = json::array(); + auto all_models = models.get_all_meta(); + std::time_t t = std::time(0); + for (const auto & meta : all_models) { + json status { + {"value", server_model_status_to_string(meta.status)}, + {"args", meta.args}, + }; + if (meta.is_failed()) { + status["exit_code"] = meta.exit_code; + status["failed"] = true; + } + models_json.push_back(json { + {"id", meta.name}, + {"object", "model"}, // for OAI-compat + {"owned_by", "llamacpp"}, // for OAI-compat + {"created", t}, // for OAI-compat + {"in_cache", meta.in_cache}, + {"path", meta.path}, + {"status", status}, + // TODO: add other fields, may require reading GGUF metadata + }); + } + res_ok(res, { + {"data", models_json}, + {"object", "list"}, + }); + return res; + }; + + this->post_router_models_load = [this](const server_http_req & req) { + auto res = std::make_unique(); + json body = json::parse(req.body); + std::string name = json_value(body, "model", std::string()); + auto model = models.get_meta(name); + if (!model.has_value()) { + res_error(res, format_error_response("model is not found", ERROR_TYPE_NOT_FOUND)); + return res; + } + if (model->status == SERVER_MODEL_STATUS_LOADED) { + res_error(res, format_error_response("model is already loaded", ERROR_TYPE_INVALID_REQUEST)); + return res; + } + models.load(name, false); + res_ok(res, {{"success", true}}); + return res; + }; + + // used by child process to notify the router about status change + // TODO @ngxson : maybe implement authentication for this endpoint in the future + this->post_router_models_status = [this](const server_http_req & req) { + auto res = std::make_unique(); + json body = json::parse(req.body); + std::string model = json_value(body, "model", std::string()); + std::string value = json_value(body, "value", std::string()); + models.update_status(model, server_model_status_from_string(value)); + res_ok(res, {{"success", true}}); + return res; + }; + + this->get_router_models = [this](const server_http_req &) { + auto res = std::make_unique(); + json models_json = json::array(); + auto all_models = models.get_all_meta(); + std::time_t t = std::time(0); + for (const auto & meta : all_models) { + json status { + {"value", server_model_status_to_string(meta.status)}, + {"args", meta.args}, + }; + if (meta.is_failed()) { + status["exit_code"] = meta.exit_code; + status["failed"] = true; + } + models_json.push_back(json { + {"id", meta.name}, + {"object", "model"}, // for OAI-compat + {"owned_by", "llamacpp"}, // for OAI-compat + {"created", t}, // for OAI-compat + {"in_cache", meta.in_cache}, + {"path", meta.path}, + {"status", status}, + // TODO: add other fields, may require reading GGUF metadata + }); + } + res_ok(res, { + {"data", models_json}, + {"object", "list"}, + }); + return res; + }; + + this->post_router_models_unload = [this](const server_http_req & req) { + auto res = std::make_unique(); + json body = json::parse(req.body); + std::string name = json_value(body, "model", std::string()); + auto model = models.get_meta(name); + if (!model.has_value()) { + res_error(res, format_error_response("model is not found", ERROR_TYPE_INVALID_REQUEST)); + return res; + } + if (model->status != SERVER_MODEL_STATUS_LOADED) { + res_error(res, format_error_response("model is not loaded", ERROR_TYPE_INVALID_REQUEST)); + return res; + } + models.unload(name); + res_ok(res, {{"success", true}}); + return res; + }; +} + + + +// +// server_http_proxy +// + +// simple implementation of a pipe +// used for streaming data between threads +template +struct pipe_t { + std::mutex mutex; + std::condition_variable cv; + std::queue queue; + std::atomic writer_closed{false}; + std::atomic reader_closed{false}; + void close_write() { + writer_closed.store(true, std::memory_order_relaxed); + cv.notify_all(); + } + void close_read() { + reader_closed.store(true, std::memory_order_relaxed); + cv.notify_all(); + } + bool read(T & output, const std::function & should_stop) { + std::unique_lock lk(mutex); + constexpr auto poll_interval = std::chrono::milliseconds(500); + while (true) { + if (!queue.empty()) { + output = std::move(queue.front()); + queue.pop(); + return true; + } + if (writer_closed.load()) { + return false; // clean EOF + } + if (should_stop()) { + close_read(); // signal broken pipe to writer + return false; // cancelled / reader no longer alive + } + cv.wait_for(lk, poll_interval); + } + } + bool write(T && data) { + std::lock_guard lk(mutex); + if (reader_closed.load()) { + return false; // broken pipe + } + queue.push(std::move(data)); + cv.notify_one(); + return true; + } +}; + +server_http_proxy::server_http_proxy( + const std::string & method, + const std::string & host, + int port, + const std::string & path, + const std::map & headers, + const std::string & body, + const std::function should_stop) { + // shared between reader and writer threads + auto cli = std::make_shared(host, port); + auto pipe = std::make_shared>(); + + // setup Client + cli->set_connection_timeout(0, 200000); // 200 milliseconds + this->status = 500; // to be overwritten upon response + this->cleanup = [pipe]() { + pipe->close_read(); + pipe->close_write(); + }; + + // wire up the receive end of the pipe + this->next = [pipe, should_stop](std::string & out) -> bool { + msg_t msg; + bool has_next = pipe->read(msg, should_stop); + if (!msg.data.empty()) { + out = std::move(msg.data); + } + return has_next; // false if EOF or pipe broken + }; + + // wire up the HTTP client + // note: do NOT capture `this` pointer, as it may be destroyed before the thread ends + httplib::ResponseHandler response_handler = [pipe, cli](const httplib::Response & response) { + msg_t msg; + msg.status = response.status; + for (const auto & [key, value] : response.headers) { + msg.headers[key] = value; + } + return pipe->write(std::move(msg)); // send headers first + }; + httplib::ContentReceiverWithProgress content_receiver = [pipe](const char * data, size_t data_length, size_t, size_t) { + // send data chunks + // returns false if pipe is closed / broken (signal to stop receiving) + return pipe->write({{}, 0, std::string(data, data_length)}); + }; + + // prepare the request to destination server + httplib::Request req; + { + req.method = method; + req.path = path; + for (const auto & [key, value] : headers) { + req.set_header(key, value); + } + req.body = body; + req.response_handler = response_handler; + req.content_receiver = content_receiver; + } + + // start the proxy thread + SRV_DBG("start proxy thread %s %s\n", req.method.c_str(), req.path.c_str()); + this->thread = std::thread([cli, pipe, req]() { + auto result = cli->send(std::move(req)); + if (result.error() != httplib::Error::Success) { + auto err_str = httplib::to_string(result.error()); + SRV_ERR("http client error: %s\n", err_str.c_str()); + pipe->write({{}, 500, ""}); // header + pipe->write({{}, 0, "proxy error: " + err_str}); // body + } + pipe->close_write(); // signal EOF to reader + SRV_DBG("%s", "client request thread ended\n"); + }); + this->thread.detach(); + + // wait for the first chunk (headers) + msg_t header; + if (pipe->read(header, should_stop)) { + SRV_DBG("%s", "received response headers\n"); + this->status = header.status; + this->headers = header.headers; + } else { + SRV_DBG("%s", "no response headers received (request cancelled?)\n"); + } +} diff --git a/tools/server/server-models.h b/tools/server/server-models.h new file mode 100644 index 00000000000..b9bec983ef6 --- /dev/null +++ b/tools/server/server-models.h @@ -0,0 +1,174 @@ +#pragma once + +#include "common.h" +#include "server-http.h" + +#include +#include +#include +#include + +/** + * state diagram: + * + * UNLOADED ──► LOADING ──► LOADED + * ▲ │ │ + * └───failed───┘ │ + * ▲ │ + * └────────unloaded─────────┘ + */ +enum server_model_status { + // TODO: also add downloading state when the logic is added + SERVER_MODEL_STATUS_UNLOADED, + SERVER_MODEL_STATUS_LOADING, + SERVER_MODEL_STATUS_LOADED +}; + +static server_model_status server_model_status_from_string(const std::string & status_str) { + if (status_str == "unloaded") { + return SERVER_MODEL_STATUS_UNLOADED; + } + if (status_str == "loading") { + return SERVER_MODEL_STATUS_LOADING; + } + if (status_str == "loaded") { + return SERVER_MODEL_STATUS_LOADED; + } + throw std::runtime_error("invalid server model status"); +} + +static std::string server_model_status_to_string(server_model_status status) { + switch (status) { + case SERVER_MODEL_STATUS_UNLOADED: return "unloaded"; + case SERVER_MODEL_STATUS_LOADING: return "loading"; + case SERVER_MODEL_STATUS_LOADED: return "loaded"; + default: return "unknown"; + } +} + +struct server_model_meta { + std::string name; + std::string path; + std::string path_mmproj; // only available if in_cache=false + bool in_cache = false; // if true, use -hf; use -m otherwise + int port = 0; + server_model_status status = SERVER_MODEL_STATUS_UNLOADED; + int64_t last_used = 0; // for LRU unloading + std::vector args; // additional args passed to the model instance (used for debugging) + int exit_code = 0; // exit code of the model instance process (only valid if status == FAILED) + + bool is_active() const { + return status == SERVER_MODEL_STATUS_LOADED || status == SERVER_MODEL_STATUS_LOADING; + } + + bool is_failed() const { + return status == SERVER_MODEL_STATUS_UNLOADED && exit_code != 0; + } +}; + +struct subprocess_s; + +struct server_models { +private: + struct instance_t { + std::shared_ptr subproc; // shared between main thread and monitoring thread + std::thread th; + server_model_meta meta; + FILE * stdin_file = nullptr; + }; + + std::mutex mutex; + std::condition_variable cv; + std::map mapping; + + common_params base_params; + std::vector base_args; + std::vector base_env; + + void update_meta(const std::string & name, const server_model_meta & meta); + + // unload least recently used models if the limit is reached + void unload_lru(); + +public: + server_models(const common_params & params, int argc, char ** argv, char ** envp); + + // check if a model instance exists + bool has_model(const std::string & name); + + // return a copy of model metadata + std::optional get_meta(const std::string & name); + + // return a copy of all model metadata + std::vector get_all_meta(); + + // if auto_load is true, load the model with previous args if any + void load(const std::string & name, bool auto_load); + void unload(const std::string & name); + void unload_all(); + + // update the status of a model instance + void update_status(const std::string & name, server_model_status status); + + // wait until the model instance is fully loaded + // return when the model is loaded or failed to load + void wait_until_loaded(const std::string & name); + + // load the model if not loaded, otherwise do nothing + // return false if model is already loaded; return true otherwise (meta may need to be refreshed) + bool ensure_model_loaded(const std::string & name); + + // proxy an HTTP request to the model instance + server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used); + + // notify the router server that a model instance is ready + // return the monitoring thread (to be joined by the caller) + static std::thread setup_child_server(const common_params & base_params, int router_port, const std::string & name, std::function & shutdown_handler); +}; + +struct server_models_routes { + common_params params; + server_models models; + server_models_routes(const common_params & params, int argc, char ** argv, char ** envp) + : params(params), models(params, argc, argv, envp) { + init_routes(); + } + + void init_routes(); + // handlers using lambda function, so that they can capture `this` without `std::bind` + server_http_context::handler_t get_router_props; + server_http_context::handler_t proxy_get; + server_http_context::handler_t proxy_post; + server_http_context::handler_t get_router_models; + server_http_context::handler_t post_router_models_load; + server_http_context::handler_t post_router_models_status; + server_http_context::handler_t post_router_models_unload; +}; + +/** + * A simple HTTP proxy that forwards requests to another server + * and relays the responses back. + */ +struct server_http_proxy : server_http_res { + std::function cleanup = nullptr; +public: + server_http_proxy(const std::string & method, + const std::string & host, + int port, + const std::string & path, + const std::map & headers, + const std::string & body, + const std::function should_stop); + ~server_http_proxy() { + if (cleanup) { + cleanup(); + } + } +private: + std::thread thread; + struct msg_t { + std::map headers; + int status = 0; + std::string data; + }; +}; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 5256790db2f..950537d82d0 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -1,5 +1,6 @@ #include "server-context.h" #include "server-http.h" +#include "server-models.h" #include "arg.h" #include "common.h" @@ -47,16 +48,16 @@ static server_http_context::handler_t ex_wrapper(server_http_context::handler_t json error_data = format_error_response(message, ERROR_TYPE_SERVER); res->status = json_value(error_data, "code", 500); res->data = safe_json_to_str({{ "error", error_data }}); - LOG_WRN("got exception: %s\n", res->data.c_str()); + SRV_WRN("got exception: %s\n", res->data.c_str()); } catch (const std::exception & e) { - LOG_ERR("got another exception: %s | while hanlding exception: %s\n", e.what(), message.c_str()); + SRV_ERR("got another exception: %s | while handling exception: %s\n", e.what(), message.c_str()); res->data = "Internal Server Error"; } return res; }; } -int main(int argc, char ** argv) { +int main(int argc, char ** argv, char ** envp) { // own arguments required by this example common_params params; @@ -75,6 +76,11 @@ int main(int argc, char ** argv) { params.kv_unified = true; } + // for consistency between server router mode and single-model mode, we set the same model name as alias + if (params.model_alias.empty() && !params.model.name.empty()) { + params.model_alias = params.model.name; + } + common_init(); // struct that contains llama context and inference @@ -101,6 +107,42 @@ int main(int argc, char ** argv) { // register API routes server_routes routes(params, ctx_server, [&ctx_http]() { return ctx_http.is_ready.load(); }); + bool is_router_server = params.model.path.empty(); + std::optional models_routes{}; + if (is_router_server) { + // setup server instances manager + models_routes.emplace(params, argc, argv, envp); + + // proxy handlers + // note: routes.get_health stays the same + routes.get_metrics = models_routes->proxy_get; + routes.post_props = models_routes->proxy_post; + routes.get_api_show = models_routes->proxy_get; + routes.post_completions = models_routes->proxy_post; + routes.post_completions_oai = models_routes->proxy_post; + routes.post_chat_completions = models_routes->proxy_post; + routes.post_anthropic_messages = models_routes->proxy_post; + routes.post_anthropic_count_tokens = models_routes->proxy_post; + routes.post_infill = models_routes->proxy_post; + routes.post_embeddings = models_routes->proxy_post; + routes.post_embeddings_oai = models_routes->proxy_post; + routes.post_rerank = models_routes->proxy_post; + routes.post_tokenize = models_routes->proxy_post; + routes.post_detokenize = models_routes->proxy_post; + routes.post_apply_template = models_routes->proxy_post; + routes.get_lora_adapters = models_routes->proxy_get; + routes.post_lora_adapters = models_routes->proxy_post; + routes.get_slots = models_routes->proxy_get; + routes.post_slots = models_routes->proxy_post; + + // custom routes for router + routes.get_props = models_routes->get_router_props; + routes.get_models = models_routes->get_router_models; + ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load)); + ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload)); + ctx_http.post("/models/status", ex_wrapper(models_routes->post_router_models_status)); + } + ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check) ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check) ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics)); @@ -140,42 +182,68 @@ int main(int argc, char ** argv) { // Start the server // - // setup clean up function, to be called before exit - auto clean_up = [&ctx_http, &ctx_server]() { - SRV_INF("%s: cleaning up before exit...\n", __func__); - ctx_http.stop(); - ctx_server.terminate(); - llama_backend_free(); - }; + std::function clean_up; - // start the HTTP server before loading the model to be able to serve /health requests - if (!ctx_http.start()) { - clean_up(); - LOG_ERR("%s: exiting due to HTTP server error\n", __func__); - return 1; - } + if (is_router_server) { + LOG_INF("%s: starting router server, no model will be loaded in this process\n", __func__); - // load the model - LOG_INF("%s: loading model\n", __func__); + clean_up = [&models_routes]() { + SRV_INF("%s: cleaning up before exit...\n", __func__); + if (models_routes.has_value()) { + models_routes->models.unload_all(); + } + llama_backend_free(); + }; - if (!ctx_server.load_model(params)) { - clean_up(); - if (ctx_http.thread.joinable()) { - ctx_http.thread.join(); + if (!ctx_http.start()) { + clean_up(); + LOG_ERR("%s: exiting due to HTTP server error\n", __func__); + return 1; } - LOG_ERR("%s: exiting due to model loading error\n", __func__); - return 1; - } + ctx_http.is_ready.store(true); + + shutdown_handler = [&](int) { + ctx_http.stop(); + }; + + } else { + // setup clean up function, to be called before exit + clean_up = [&ctx_http, &ctx_server]() { + SRV_INF("%s: cleaning up before exit...\n", __func__); + ctx_http.stop(); + ctx_server.terminate(); + llama_backend_free(); + }; + + // start the HTTP server before loading the model to be able to serve /health requests + if (!ctx_http.start()) { + clean_up(); + LOG_ERR("%s: exiting due to HTTP server error\n", __func__); + return 1; + } + + // load the model + LOG_INF("%s: loading model\n", __func__); - ctx_server.init(); - ctx_http.is_ready.store(true); + if (!ctx_server.load_model(params)) { + clean_up(); + if (ctx_http.thread.joinable()) { + ctx_http.thread.join(); + } + LOG_ERR("%s: exiting due to model loading error\n", __func__); + return 1; + } - LOG_INF("%s: model loaded\n", __func__); + ctx_server.init(); + ctx_http.is_ready.store(true); - shutdown_handler = [&](int) { - // this will unblock start_loop() - ctx_server.terminate(); - }; + LOG_INF("%s: model loaded\n", __func__); + + shutdown_handler = [&](int) { + // this will unblock start_loop() + ctx_server.terminate(); + }; + } // TODO: refactor in common/console #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) @@ -192,16 +260,39 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif - LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str()); - LOG_INF("%s: starting the main loop...\n", __func__); - // this call blocks the main thread until ctx_server.terminate() is called - ctx_server.start_loop(); + if (is_router_server) { + LOG_INF("%s: router server is listening on %s\n", __func__, ctx_http.listening_address.c_str()); + LOG_INF("%s: NOTE: router mode is experimental\n", __func__); + LOG_INF("%s: it is not recommended to use this mode in untrusted environments\n", __func__); + if (ctx_http.thread.joinable()) { + ctx_http.thread.join(); // keep the main thread alive + } - clean_up(); - if (ctx_http.thread.joinable()) { - ctx_http.thread.join(); + // when the HTTP server stops, clean up and exit + clean_up(); + } else { + LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str()); + LOG_INF("%s: starting the main loop...\n", __func__); + + // optionally, notify router server that this instance is ready + const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT"); + std::thread monitor_thread; + if (router_port != nullptr) { + monitor_thread = server_models::setup_child_server(params, std::atoi(router_port), params.model_alias, shutdown_handler); + } + + // this call blocks the main thread until queue_tasks.terminate() is called + ctx_server.start_loop(); + + clean_up(); + if (ctx_http.thread.joinable()) { + ctx_http.thread.join(); + } + if (monitor_thread.joinable()) { + monitor_thread.join(); + } + llama_memory_breakdown_print(ctx_server.get_llama_context()); } - llama_memory_breakdown_print(ctx_server.get_llama_context()); return 0; } diff --git a/tools/server/tests/unit/test_router.py b/tools/server/tests/unit/test_router.py new file mode 100644 index 00000000000..e6f3c6485c0 --- /dev/null +++ b/tools/server/tests/unit/test_router.py @@ -0,0 +1,50 @@ +import pytest +from utils import * + +server: ServerProcess + +@pytest.fixture(autouse=True) +def create_server(): + global server + server = ServerPreset.router() + + +@pytest.mark.parametrize( + "model,success", + [ + ("ggml-org/tinygemma3-GGUF:Q8_0", True), + ("non-existent/model", False), + ] +) +def test_router_chat_completion_stream(model: str, success: bool): + # TODO: make sure the model is in cache (ie. ServerProcess.load_all()) before starting the router server + global server + server.start() + content = "" + ex: ServerError | None = None + try: + res = server.make_stream_request("POST", "/chat/completions", data={ + "model": model, + "max_tokens": 16, + "messages": [ + {"role": "user", "content": "hello"}, + ], + "stream": True, + }) + for data in res: + if data["choices"]: + choice = data["choices"][0] + if choice["finish_reason"] in ["stop", "length"]: + assert "content" not in choice["delta"] + else: + assert choice["finish_reason"] is None + content += choice["delta"]["content"] or '' + except ServerError as e: + ex = e + + if success: + assert ex is None + assert len(content) > 0 + else: + assert ex is not None + assert content == "" diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py index a779283d69d..afe4f77d978 100644 --- a/tools/server/tests/utils.py +++ b/tools/server/tests/utils.py @@ -46,7 +46,7 @@ class ServerProcess: debug: bool = False server_port: int = 8080 server_host: str = "127.0.0.1" - model_hf_repo: str = "ggml-org/models" + model_hf_repo: str | None = "ggml-org/models" model_hf_file: str | None = "tinyllamas/stories260K.gguf" model_alias: str = "tinyllama-2" temperature: float = 0.8 @@ -521,9 +521,8 @@ def tinygemma3() -> ServerProcess: server = ServerProcess() server.offline = True # will be downloaded by load_all() # mmproj is already provided by HF registry API - server.model_hf_repo = "ggml-org/tinygemma3-GGUF" - server.model_hf_file = "tinygemma3-Q8_0.gguf" - server.mmproj_url = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/mmproj-tinygemma3.gguf" + server.model_hf_file = None + server.model_hf_repo = "ggml-org/tinygemma3-GGUF:Q8_0" server.model_alias = "tinygemma3" server.n_ctx = 1024 server.n_batch = 32 @@ -532,6 +531,21 @@ def tinygemma3() -> ServerProcess: server.seed = 42 return server + @staticmethod + def router() -> ServerProcess: + server = ServerProcess() + # router server has no models + server.model_file = None + server.model_alias = None + server.model_hf_repo = None + server.model_hf_file = None + server.n_ctx = 1024 + server.n_batch = 16 + server.n_slots = 1 + server.n_predict = 16 + server.seed = 42 + return server + def parallel_function_calls(function_list: List[Tuple[Callable[..., Any], Tuple[Any, ...]]]) -> List[Any]: """ diff --git a/tools/server/webui/.storybook/main.ts b/tools/server/webui/.storybook/main.ts index 7145bcb7eba..bfd16fa2245 100644 --- a/tools/server/webui/.storybook/main.ts +++ b/tools/server/webui/.storybook/main.ts @@ -1,7 +1,7 @@ import type { StorybookConfig } from '@storybook/sveltekit'; const config: StorybookConfig = { - stories: ['../src/**/*.mdx', '../src/**/*.stories.@(js|ts|svelte)'], + stories: ['../tests/stories/**/*.mdx', '../tests/stories/**/*.stories.@(js|ts|svelte)'], addons: [ '@storybook/addon-svelte-csf', '@chromatic-com/storybook', diff --git a/tools/server/webui/README.md b/tools/server/webui/README.md index 9d16f34d86b..d995271fc4b 100644 --- a/tools/server/webui/README.md +++ b/tools/server/webui/README.md @@ -2,65 +2,685 @@ A modern, feature-rich web interface for llama.cpp built with SvelteKit. This UI provides an intuitive chat interface with advanced file handling, conversation management, and comprehensive model interaction capabilities. +The WebUI supports two server operation modes: + +- **MODEL mode** - Single model operation (standard llama-server) +- **ROUTER mode** - Multi-model operation with dynamic model loading/unloading + +--- + +## Table of Contents + +- [Features](#features) +- [Getting Started](#getting-started) +- [Tech Stack](#tech-stack) +- [Build Pipeline](#build-pipeline) +- [Architecture](#architecture) +- [Data Flows](#data-flows) +- [Architectural Patterns](#architectural-patterns) +- [Testing](#testing) + +--- + ## Features -- **Modern Chat Interface** - Clean, responsive design with dark/light mode -- **File Attachments** - Support for images, text files, PDFs, and audio with rich previews and drag-and-drop support -- **Conversation Management** - Create, edit, branch, and search conversations -- **Advanced Markdown** - Code highlighting, math formulas (KaTeX), and content blocks -- **Reasoning Content** - Support for models with thinking blocks -- **Keyboard Shortcuts** - Keyboard navigation (Shift+Ctrl/Cmd+O for new chat, Shift+Ctrl/Cmdt+E for edit conversation, Shift+Ctrl/Cmdt+D for delete conversation, Ctrl/Cmd+K for search, Ctrl/Cmd+V for paste, Ctrl/Cmd+B for opening/collapsing sidebar) -- **Request Tracking** - Monitor processing with slots endpoint integration -- **UI Testing** - Storybook component library with automated tests +### Chat Interface + +- **Streaming responses** with real-time updates +- **Reasoning content** - Support for models with thinking/reasoning blocks +- **Dark/light theme** with system preference detection +- **Responsive design** for desktop and mobile + +### File Attachments + +- **Images** - JPEG, PNG, GIF, WebP, SVG (with PNG conversion) +- **Documents** - PDF (text extraction or image conversion for vision models) +- **Audio** - MP3, WAV for audio-capable models +- **Text files** - Source code, markdown, and other text formats +- **Drag-and-drop** and paste support with rich previews + +### Conversation Management + +- **Branching** - Branch messages conversations at any point by editing messages or regenerating responses, navigate between branches +- **Regeneration** - Regenerate responses with optional model switching (ROUTER mode) +- **Import/Export** - JSON format for backup and sharing +- **Search** - Find conversations by title or content + +### Advanced Rendering + +- **Syntax highlighting** - Code blocks with language detection +- **Math formulas** - KaTeX rendering for LaTeX expressions +- **Markdown** - Full GFM support with tables, lists, and more + +### Multi-Model Support (ROUTER mode) + +- **Model selector** with Loaded/Available groups +- **Automatic loading** - Models load on selection +- **Modality validation** - Prevents sending images to non-vision models +- **LRU unloading** - Server auto-manages model cache + +### Keyboard Shortcuts + +| Shortcut | Action | +| ------------------ | -------------------- | +| `Shift+Ctrl/Cmd+O` | New chat | +| `Shift+Ctrl/Cmd+E` | Edit conversation | +| `Shift+Ctrl/Cmd+D` | Delete conversation | +| `Ctrl/Cmd+K` | Search conversations | +| `Ctrl/Cmd+B` | Toggle sidebar | + +### Developer Experience + +- **Request tracking** - Monitor token generation with `/slots` endpoint +- **Storybook** - Component library with visual testing +- **Hot reload** - Instant updates during development + +--- + +## Getting Started + +### Prerequisites -## Development +- **Node.js** 18+ (20+ recommended) +- **npm** 9+ +- **llama-server** running locally (for API access) -Install dependencies: +### 1. Install Dependencies ```bash +cd tools/server/webui npm install ``` -Start the development server + Storybook: +### 2. Start llama-server + +In a separate terminal, start the backend server: + +```bash +# Single model (MODEL mode) +./llama-server -m model.gguf + +# Multi-model (ROUTER mode) +./llama-server --model-store /path/to/models +``` + +### 3. Start Development Servers ```bash npm run dev ``` -This will start both the SvelteKit dev server and Storybook on port 6006. +This starts: + +- **Vite dev server** at `http://localhost:5173` - The main WebUI +- **Storybook** at `http://localhost:6006` - Component documentation + +The Vite dev server proxies API requests to `http://localhost:8080` (default llama-server port): + +```typescript +// vite.config.ts proxy configuration +proxy: { + '/v1': 'http://localhost:8080', + '/props': 'http://localhost:8080', + '/slots': 'http://localhost:8080', + '/models': 'http://localhost:8080' +} +``` + +### Development Workflow -## Building +1. Open `http://localhost:5173` in your browser +2. Make changes to `.svelte`, `.ts`, or `.css` files +3. Changes hot-reload instantly +4. Use Storybook at `http://localhost:6006` for isolated component development -Create a production build: +--- + +## Tech Stack + +| Layer | Technology | Purpose | +| ----------------- | ------------------------------- | -------------------------------------------------------- | +| **Framework** | SvelteKit + Svelte 5 | Reactive UI with runes (`$state`, `$derived`, `$effect`) | +| **UI Components** | shadcn-svelte + bits-ui | Accessible, customizable component library | +| **Styling** | TailwindCSS 4 | Utility-first CSS with design tokens | +| **Database** | IndexedDB (Dexie) | Client-side storage for conversations and messages | +| **Build** | Vite | Fast bundling with static adapter | +| **Testing** | Playwright + Vitest + Storybook | E2E, unit, and visual testing | +| **Markdown** | remark + rehype | Markdown processing with KaTeX and syntax highlighting | + +### Key Dependencies + +```json +{ + "svelte": "^5.0.0", + "bits-ui": "^2.8.11", + "dexie": "^4.0.11", + "pdfjs-dist": "^5.4.54", + "highlight.js": "^11.11.1", + "rehype-katex": "^7.0.1" +} +``` + +--- + +## Build Pipeline + +### Development Build ```bash -npm run build +npm run dev ``` -The build outputs static files to `../public` directory for deployment with llama.cpp server. +Runs Vite in development mode with: -## Testing +- Hot Module Replacement (HMR) +- Source maps +- Proxy to llama-server -Run the test suite: +### Production Build ```bash -# E2E tests -npm run test:e2e +npm run build +``` + +The build process: + +1. **Vite Build** - Bundles all TypeScript, Svelte, and CSS +2. **Static Adapter** - Outputs to `../public` (llama-server's static file directory) +3. **Post-Build Script** - Cleans up intermediate files +4. **Custom Plugin** - Creates `index.html.gz` with: + - Inlined favicon as base64 + - GZIP compression (level 9) + - Deterministic output (zeroed timestamps) + +```text +tools/server/webui/ → build → tools/server/public/ +├── src/ ├── index.html.gz (served by llama-server) +├── static/ └── (favicon inlined) +└── ... +``` + +### SvelteKit Configuration + +```javascript +// svelte.config.js +adapter: adapter({ + pages: '../public', // Output directory + assets: '../public', // Static assets + fallback: 'index.html', // SPA fallback + strict: true +}), +output: { + bundleStrategy: 'inline' // Single-file bundle +} +``` + +### Integration with llama-server + +The WebUI is embedded directly into the llama-server binary: + +1. `npm run build` outputs `index.html.gz` to `tools/server/public/` +2. llama-server compiles this into the binary at build time +3. When accessing `/`, llama-server serves the gzipped HTML +4. All assets are inlined (CSS, JS, fonts, favicon) + +This results in a **single portable binary** with the full WebUI included. + +--- + +## Architecture + +The WebUI follows a layered architecture with unidirectional data flow: + +```text +Routes → Components → Hooks → Stores → Services → Storage/API +``` + +### High-Level Architecture + +See: [`docs/architecture/high-level-architecture-simplified.md`](docs/architecture/high-level-architecture-simplified.md) + +```mermaid +flowchart TB + subgraph Routes["📍 Routes"] + R1["/ (Welcome)"] + R2["/chat/[id]"] + RL["+layout.svelte"] + end + + subgraph Components["🧩 Components"] + C_Sidebar["ChatSidebar"] + C_Screen["ChatScreen"] + C_Form["ChatForm"] + C_Messages["ChatMessages"] + C_ModelsSelector["ModelsSelector"] + C_Settings["ChatSettings"] + end + + subgraph Stores["🗄️ Stores"] + S1["chatStore"] + S2["conversationsStore"] + S3["modelsStore"] + S4["serverStore"] + S5["settingsStore"] + end + + subgraph Services["⚙️ Services"] + SV1["ChatService"] + SV2["ModelsService"] + SV3["PropsService"] + SV4["DatabaseService"] + end + + subgraph Storage["💾 Storage"] + ST1["IndexedDB"] + ST2["LocalStorage"] + end + + subgraph APIs["🌐 llama-server"] + API1["/v1/chat/completions"] + API2["/props"] + API3["/models/*"] + end + + R1 & R2 --> C_Screen + RL --> C_Sidebar + C_Screen --> C_Form & C_Messages & C_Settings + C_Screen --> S1 & S2 + C_ModelsSelector --> S3 & S4 + S1 --> SV1 & SV4 + S3 --> SV2 & SV3 + SV4 --> ST1 + SV1 --> API1 + SV2 --> API3 + SV3 --> API2 +``` + +### Layer Breakdown + +#### Routes (`src/routes/`) + +- **`/`** - Welcome screen, creates new conversation +- **`/chat/[id]`** - Active chat interface +- **`+layout.svelte`** - Sidebar, navigation, global initialization + +#### Components (`src/lib/components/`) + +Components are organized in `app/` (application-specific) and `ui/` (shadcn-svelte primitives). + +**Chat Components** (`app/chat/`): + +| Component | Responsibility | +| ------------------ | --------------------------------------------------------------------------- | +| `ChatScreen/` | Main chat container, coordinates message list, input form, and attachments | +| `ChatForm/` | Message input textarea with file upload, paste handling, keyboard shortcuts | +| `ChatMessages/` | Message list with branch navigation, regenerate/continue/edit actions | +| `ChatAttachments/` | File attachment previews, drag-and-drop, PDF/image/audio handling | +| `ChatSettings/` | Parameter sliders (temperature, top-p, etc.) with server default sync | +| `ChatSidebar/` | Conversation list, search, import/export, navigation | + +**Dialog Components** (`app/dialogs/`): + +| Component | Responsibility | +| ------------------------------- | -------------------------------------------------------- | +| `DialogChatSettings` | Full-screen settings configuration | +| `DialogModelInformation` | Model details (context size, modalities, parallel slots) | +| `DialogChatAttachmentPreview` | Full preview for images, PDFs (text or page view), code | +| `DialogConfirmation` | Generic confirmation for destructive actions | +| `DialogConversationTitleUpdate` | Edit conversation title | + +**Server/Model Components** (`app/server/`, `app/models/`): + +| Component | Responsibility | +| ------------------- | --------------------------------------------------------- | +| `ServerErrorSplash` | Error display when server is unreachable | +| `ModelsSelector` | Model dropdown with Loaded/Available groups (ROUTER mode) | + +**Shared UI Components** (`app/misc/`): + +| Component | Responsibility | +| -------------------------------- | ---------------------------------------------------------------- | +| `MarkdownContent` | Markdown rendering with KaTeX, syntax highlighting, copy buttons | +| `SyntaxHighlightedCode` | Code blocks with language detection and highlighting | +| `ActionButton`, `ActionDropdown` | Reusable action buttons and menus | +| `BadgeModality`, `BadgeInfo` | Status and capability badges | + +#### Hooks (`src/lib/hooks/`) + +- **`useModelChangeValidation`** - Validates model switch against conversation modalities +- **`useProcessingState`** - Tracks streaming progress and token generation -# Unit tests -npm run test:unit +#### Stores (`src/lib/stores/`) -# UI tests -npm run test:ui +| Store | Responsibility | +| -------------------- | --------------------------------------------------------- | +| `chatStore` | Message sending, streaming, abort control, error handling | +| `conversationsStore` | CRUD for conversations, message branching, navigation | +| `modelsStore` | Model list, selection, loading/unloading (ROUTER) | +| `serverStore` | Server properties, role detection, modalities | +| `settingsStore` | User preferences, parameter sync with server defaults | +#### Services (`src/lib/services/`) + +| Service | Responsibility | +| ---------------------- | ----------------------------------------------- | +| `ChatService` | API calls to`/v1/chat/completions`, SSE parsing | +| `ModelsService` | `/models`, `/models/load`, `/models/unload` | +| `PropsService` | `/props`, `/props?model=` | +| `DatabaseService` | IndexedDB operations via Dexie | +| `ParameterSyncService` | Syncs settings with server defaults | + +--- + +## Data Flows + +### MODEL Mode (Single Model) + +See: [`docs/flows/data-flow-simplified-model-mode.md`](docs/flows/data-flow-simplified-model-mode.md) + +```mermaid +sequenceDiagram + participant User + participant UI + participant Stores + participant DB as IndexedDB + participant API as llama-server + + Note over User,API: Initialization + UI->>Stores: initialize() + Stores->>DB: load conversations + Stores->>API: GET /props + API-->>Stores: server config + Stores->>API: GET /v1/models + API-->>Stores: single model (auto-selected) + + Note over User,API: Chat Flow + User->>UI: send message + Stores->>DB: save user message + Stores->>API: POST /v1/chat/completions (stream) + loop streaming + API-->>Stores: SSE chunks + Stores-->>UI: reactive update + end + Stores->>DB: save assistant message +``` + +### ROUTER Mode (Multi-Model) + +See: [`docs/flows/data-flow-simplified-router-mode.md`](docs/flows/data-flow-simplified-router-mode.md) + +```mermaid +sequenceDiagram + participant User + participant UI + participant Stores + participant API as llama-server + + Note over User,API: Initialization + Stores->>API: GET /props + API-->>Stores: {role: "router"} + Stores->>API: GET /models + API-->>Stores: models[] with status + + Note over User,API: Model Selection + User->>UI: select model + alt model not loaded + Stores->>API: POST /models/load + loop poll status + Stores->>API: GET /models + end + Stores->>API: GET /props?model=X + end + Stores->>Stores: validate modalities + + Note over User,API: Chat Flow + Stores->>API: POST /v1/chat/completions {model: X} + loop streaming + API-->>Stores: SSE chunks + model info + end +``` + +### Detailed Flow Diagrams + +| Flow | Description | File | +| ------------- | ------------------------------------------ | ----------------------------------------------------------- | +| Chat | Message lifecycle, streaming, regeneration | [`chat-flow.md`](docs/flows/chat-flow.md) | +| Models | Loading, unloading, modality caching | [`models-flow.md`](docs/flows/models-flow.md) | +| Server | Props fetching, role detection | [`server-flow.md`](docs/flows/server-flow.md) | +| Conversations | CRUD, branching, import/export | [`conversations-flow.md`](docs/flows/conversations-flow.md) | +| Database | IndexedDB schema, operations | [`database-flow.md`](docs/flows/database-flow.md) | +| Settings | Parameter sync, user overrides | [`settings-flow.md`](docs/flows/settings-flow.md) | + +--- + +## Architectural Patterns + +### 1. Reactive State with Svelte 5 Runes + +All stores use Svelte 5's fine-grained reactivity: + +```typescript +// Store with reactive state +class ChatStore { + #isLoading = $state(false); + #currentResponse = $state(''); + + // Derived values auto-update + get isStreaming() { + return $derived(this.#isLoading && this.#currentResponse.length > 0); + } +} + +// Exported reactive accessors +export const isLoading = () => chatStore.isLoading; +export const currentResponse = () => chatStore.currentResponse; +``` + +### 2. Unidirectional Data Flow + +Data flows in one direction, making state predictable: + +```mermaid +flowchart LR + subgraph UI["UI Layer"] + A[User Action] --> B[Component] + end + + subgraph State["State Layer"] + B --> C[Store Method] + C --> D[State Update] + end + + subgraph IO["I/O Layer"] + C --> E[Service] + E --> F[API / IndexedDB] + F -.->|Response| D + end + + D -->|Reactive| B +``` + +Components dispatch actions to stores, stores coordinate with services for I/O, and state updates reactively propagate back to the UI. + +### 3. Per-Conversation State + +Enables concurrent streaming across multiple conversations: + +```typescript +class ChatStore { + chatLoadingStates = new Map(); + chatStreamingStates = new Map(); + abortControllers = new Map(); +} +``` + +### 4. Message Branching with Tree Structure + +Conversations are stored as a tree, not a linear list: + +```typescript +interface DatabaseMessage { + id: string; + parent: string | null; // Points to parent message + children: string[]; // List of child message IDs + // ... +} + +interface DatabaseConversation { + currentNode: string; // Currently viewed branch tip + // ... +} +``` + +Navigation between branches updates `currentNode` without losing history. + +### 5. Layered Service Architecture + +Stores handle state; services handle I/O: + +```text +┌─────────────────┐ +│ Stores │ Business logic, state management +├─────────────────┤ +│ Services │ API calls, database operations +├─────────────────┤ +│ Storage/API │ IndexedDB, LocalStorage, HTTP +└─────────────────┘ +``` + +### 6. Server Role Abstraction + +Single codebase handles both MODEL and ROUTER modes: + +```typescript +// serverStore.ts +get isRouterMode() { + return this.role === ServerRole.ROUTER; +} + +// Components conditionally render based on mode +{#if isRouterMode()} + +{/if} +``` + +### 7. Modality Validation + +Prevents sending attachments to incompatible models: + +```typescript +// useModelChangeValidation hook +const validate = (modelId: string) => { + const modelModalities = modelsStore.getModelModalities(modelId); + const conversationModalities = conversationsStore.usedModalities; + + // Check if model supports all used modalities + if (conversationModalities.hasImages && !modelModalities.vision) { + return { valid: false, reason: 'Model does not support images' }; + } + // ... +}; +``` + +### 8. Persistent Storage Strategy + +Data is persisted across sessions using two storage mechanisms: + +```mermaid +flowchart TB + subgraph Browser["Browser Storage"] + subgraph IDB["IndexedDB (Dexie)"] + C[Conversations] + M[Messages] + end + subgraph LS["LocalStorage"] + S[Settings Config] + O[User Overrides] + T[Theme Preference] + end + end + + subgraph Stores["Svelte Stores"] + CS[conversationsStore] --> C + CS --> M + SS[settingsStore] --> S + SS --> O + SS --> T + end +``` + +- **IndexedDB**: Conversations and messages (large, structured data) +- **LocalStorage**: Settings, user parameter overrides, theme (small key-value data) +- **Memory only**: Server props, model list (fetched fresh on each session) + +--- + +## Testing + +### Test Types + +| Type | Tool | Location | Command | +| ------------- | ------------------ | -------------------------------- | ------------------- | +| **E2E** | Playwright | `tests/e2e/` | `npm run test:e2e` | +| **Unit** | Vitest | `tests/client/`, `tests/server/` | `npm run test:unit` | +| **UI/Visual** | Storybook + Vitest | `tests/stories/` | `npm run test:ui` | + +### Running Tests + +```bash # All tests npm run test + +# Individual test suites +npm run test:e2e # End-to-end (requires llama-server) +npm run test:client # Client-side unit tests +npm run test:server # Server-side unit tests +npm run test:ui # Storybook visual tests ``` -## Architecture +### Storybook Development + +```bash +npm run storybook # Start Storybook dev server on :6006 +npm run build-storybook # Build static Storybook +``` + +### Linting and Formatting + +```bash +npm run lint # Check code style +npm run format # Auto-format with Prettier +npm run check # TypeScript type checking +``` + +--- + +## Project Structure + +```text +tools/server/webui/ +├── src/ +│ ├── lib/ +│ │ ├── components/ # UI components (app/, ui/) +│ │ ├── hooks/ # Svelte hooks +│ │ ├── stores/ # State management +│ │ ├── services/ # API and database services +│ │ ├── types/ # TypeScript interfaces +│ │ └── utils/ # Utility functions +│ ├── routes/ # SvelteKit routes +│ └── styles/ # Global styles +├── static/ # Static assets +├── tests/ # Test files +├── docs/ # Architecture diagrams +│ ├── architecture/ # High-level architecture +│ └── flows/ # Feature-specific flows +└── .storybook/ # Storybook configuration +``` + +--- + +## Related Documentation -- **Framework**: SvelteKit with Svelte 5 runes -- **Components**: ShadCN UI + bits-ui design system -- **Database**: IndexedDB with Dexie for local storage -- **Build**: Static adapter for deployment with llama.cpp server -- **Testing**: Playwright (E2E) + Vitest (unit) + Storybook (components) +- [llama.cpp Server README](../README.md) - Full server documentation +- [Multimodal Documentation](../../../docs/multimodal.md) - Image and audio support +- [Function Calling](../../../docs/function-calling.md) - Tool use capabilities diff --git a/tools/server/webui/docs/architecture/high-level-architecture-simplified.md b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md new file mode 100644 index 00000000000..50f2e1df0a0 --- /dev/null +++ b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md @@ -0,0 +1,102 @@ +```mermaid +flowchart TB + subgraph Routes["📍 Routes"] + R1["/ (Welcome)"] + R2["/chat/[id]"] + RL["+layout.svelte"] + end + + subgraph Components["🧩 Components"] + C_Sidebar["ChatSidebar"] + C_Screen["ChatScreen"] + C_Form["ChatForm"] + C_Messages["ChatMessages"] + C_ModelsSelector["ModelsSelector"] + C_Settings["ChatSettings"] + end + + subgraph Hooks["🪝 Hooks"] + H1["useModelChangeValidation"] + H2["useProcessingState"] + end + + subgraph Stores["🗄️ Stores"] + S1["chatStore
Chat interactions & streaming"] + S2["conversationsStore
Conversation data & messages"] + S3["modelsStore
Model selection & loading"] + S4["serverStore
Server props & role detection"] + S5["settingsStore
User configuration"] + end + + subgraph Services["⚙️ Services"] + SV1["ChatService"] + SV2["ModelsService"] + SV3["PropsService"] + SV4["DatabaseService"] + SV5["ParameterSyncService"] + end + + subgraph Storage["💾 Storage"] + ST1["IndexedDB
conversations, messages"] + ST2["LocalStorage
config, userOverrides"] + end + + subgraph APIs["🌐 llama-server API"] + API1["/v1/chat/completions"] + API2["/props"] + API3["/models/*"] + API4["/v1/models"] + end + + %% Routes → Components + R1 & R2 --> C_Screen + RL --> C_Sidebar + + %% Component hierarchy + C_Screen --> C_Form & C_Messages & C_Settings + C_Form & C_Messages --> C_ModelsSelector + + %% Components → Hooks → Stores + C_Form & C_Messages --> H1 & H2 + H1 --> S3 & S4 + H2 --> S1 & S5 + + %% Components → Stores + C_Screen --> S1 & S2 + C_Sidebar --> S2 + C_ModelsSelector --> S3 & S4 + C_Settings --> S5 + + %% Stores → Services + S1 --> SV1 & SV4 + S2 --> SV4 + S3 --> SV2 & SV3 + S4 --> SV3 + S5 --> SV5 + + %% Services → Storage + SV4 --> ST1 + SV5 --> ST2 + + %% Services → APIs + SV1 --> API1 + SV2 --> API3 & API4 + SV3 --> API2 + + %% Styling + classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px + classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px + classDef hookStyle fill:#fff8e1,stroke:#ff8f00,stroke-width:2px + classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px + classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px + classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px + classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px + + class R1,R2,RL routeStyle + class C_Sidebar,C_Screen,C_Form,C_Messages,C_ModelsSelector,C_Settings componentStyle + class H1,H2 hookStyle + class S1,S2,S3,S4,S5 storeStyle + class SV1,SV2,SV3,SV4,SV5 serviceStyle + class ST1,ST2 storageStyle + class API1,API2,API3,API4 apiStyle +``` diff --git a/tools/server/webui/docs/architecture/high-level-architecture.md b/tools/server/webui/docs/architecture/high-level-architecture.md new file mode 100644 index 00000000000..730da10a59e --- /dev/null +++ b/tools/server/webui/docs/architecture/high-level-architecture.md @@ -0,0 +1,269 @@ +```mermaid +flowchart TB +subgraph Routes["📍 Routes"] +R1["/ (+page.svelte)"] +R2["/chat/[id]"] +RL["+layout.svelte"] +end + + subgraph Components["🧩 Components"] + direction TB + subgraph LayoutComponents["Layout"] + C_Sidebar["ChatSidebar"] + C_Screen["ChatScreen"] + end + subgraph ChatUIComponents["Chat UI"] + C_Form["ChatForm"] + C_Messages["ChatMessages"] + C_Message["ChatMessage"] + C_Attach["ChatAttachments"] + C_ModelsSelector["ModelsSelector"] + C_Settings["ChatSettings"] + end + end + + subgraph Hooks["🪝 Hooks"] + H1["useModelChangeValidation"] + H2["useProcessingState"] + H3["isMobile"] + end + + subgraph Stores["🗄️ Stores"] + direction TB + subgraph S1["chatStore"] + S1State["State:
isLoading, currentResponse
errorDialogState
activeProcessingState
chatLoadingStates
chatStreamingStates
abortControllers
processingStates
activeConversationId
isStreamingActive"] + S1LoadState["Loading State:
setChatLoading()
isChatLoading()
syncLoadingStateForChat()
clearUIState()
isChatLoadingPublic()
getAllLoadingChats()
getAllStreamingChats()"] + S1ProcState["Processing State:
setActiveProcessingConversation()
getProcessingState()
clearProcessingState()
getActiveProcessingState()
updateProcessingStateFromTimings()
getCurrentProcessingStateSync()
restoreProcessingStateFromMessages()"] + S1Stream["Streaming:
streamChatCompletion()
startStreaming()
stopStreaming()
stopGeneration()
isStreaming()"] + S1Error["Error Handling:
showErrorDialog()
dismissErrorDialog()
isAbortError()"] + S1Msg["Message Operations:
addMessage()
sendMessage()
updateMessage()
deleteMessage()
getDeletionInfo()"] + S1Regen["Regeneration:
regenerateMessage()
regenerateMessageWithBranching()
continueAssistantMessage()"] + S1Edit["Editing:
editAssistantMessage()
editUserMessagePreserveResponses()
editMessageWithBranching()"] + S1Utils["Utilities:
getApiOptions()
parseTimingData()
getOrCreateAbortController()
getConversationModel()"] + end + subgraph S2["conversationsStore"] + S2State["State:
conversations
activeConversation
activeMessages
usedModalities
isInitialized
titleUpdateConfirmationCallback"] + S2Modal["Modalities:
getModalitiesUpToMessage()
calculateModalitiesFromMessages()"] + S2Lifecycle["Lifecycle:
initialize()
loadConversations()
clearActiveConversation()"] + S2ConvCRUD["Conversation CRUD:
createConversation()
loadConversation()
deleteConversation()
updateConversationName()
updateConversationTitleWithConfirmation()"] + S2MsgMgmt["Message Management:
refreshActiveMessages()
addMessageToActive()
updateMessageAtIndex()
findMessageIndex()
sliceActiveMessages()
removeMessageAtIndex()
getConversationMessages()"] + S2Nav["Navigation:
navigateToSibling()
updateCurrentNode()
updateConversationTimestamp()"] + S2Export["Import/Export:
downloadConversation()
exportAllConversations()
importConversations()
triggerDownload()"] + S2Utils["Utilities:
setTitleUpdateConfirmationCallback()"] + end + subgraph S3["modelsStore"] + S3State["State:
models, routerModels
selectedModelId
selectedModelName
loading, updating, error
modelLoadingStates
modelPropsCache
modelPropsFetching
propsCacheVersion"] + S3Getters["Computed Getters:
selectedModel
loadedModelIds
loadingModelIds
singleModelName"] + S3Modal["Modalities:
getModelModalities()
modelSupportsVision()
modelSupportsAudio()
getModelModalitiesArray()
getModelProps()
updateModelModalities()"] + S3Status["Status Queries:
isModelLoaded()
isModelOperationInProgress()
getModelStatus()
isModelPropsFetching()"] + S3Fetch["Data Fetching:
fetch()
fetchRouterModels()
fetchModelProps()
fetchModalitiesForLoadedModels()"] + S3Select["Model Selection:
selectModelById()
selectModelByName()
clearSelection()
findModelByName()
findModelById()
hasModel()"] + S3LoadUnload["Loading/Unloading Models:
loadModel()
unloadModel()
ensureModelLoaded()
waitForModelStatus()
pollForModelStatus()"] + S3Utils["Utilities:
toDisplayName()
clear()"] + end + subgraph S4["serverStore"] + S4State["State:
props
loading, error
role
fetchPromise"] + S4Getters["Getters:
defaultParams
contextSize
isRouterMode
isModelMode"] + S4Data["Data Handling:
fetch()
getErrorMessage()
clear()"] + S4Utils["Utilities:
detectRole()"] + end + subgraph S5["settingsStore"] + S5State["State:
config
theme
isInitialized
userOverrides"] + S5Lifecycle["Lifecycle:
initialize()
loadConfig()
saveConfig()
loadTheme()
saveTheme()"] + S5Update["Config Updates:
updateConfig()
updateMultipleConfig()
updateTheme()"] + S5Reset["Reset:
resetConfig()
resetTheme()
resetAll()
resetParameterToServerDefault()"] + S5Sync["Server Sync:
syncWithServerDefaults()
forceSyncWithServerDefaults()"] + S5Utils["Utilities:
getConfig()
getAllConfig()
getParameterInfo()
getParameterDiff()
getServerDefaults()
clearAllUserOverrides()"] + end + + subgraph ReactiveExports["⚡ Reactive Exports"] + direction LR + subgraph ChatExports["chatStore"] + RE1["isLoading()"] + RE2["currentResponse()"] + RE3["errorDialog()"] + RE4["activeProcessingState()"] + RE5["isChatStreaming()"] + RE6["isChatLoading()"] + RE7["getChatStreaming()"] + RE8["getAllLoadingChats()"] + RE9["getAllStreamingChats()"] + end + subgraph ConvExports["conversationsStore"] + RE10["conversations()"] + RE11["activeConversation()"] + RE12["activeMessages()"] + RE13["isConversationsInitialized()"] + RE14["usedModalities()"] + end + subgraph ModelsExports["modelsStore"] + RE15["modelOptions()"] + RE16["routerModels()"] + RE17["modelsLoading()"] + RE18["modelsUpdating()"] + RE19["modelsError()"] + RE20["selectedModelId()"] + RE21["selectedModelName()"] + RE22["selectedModelOption()"] + RE23["loadedModelIds()"] + RE24["loadingModelIds()"] + RE25["propsCacheVersion()"] + RE26["singleModelName()"] + end + subgraph ServerExports["serverStore"] + RE27["serverProps()"] + RE28["serverLoading()"] + RE29["serverError()"] + RE30["serverRole()"] + RE31["defaultParams()"] + RE32["contextSize()"] + RE33["isRouterMode()"] + RE34["isModelMode()"] + end + subgraph SettingsExports["settingsStore"] + RE35["config()"] + RE36["theme()"] + RE37["isInitialized()"] + end + end + end + + subgraph Services["⚙️ Services"] + direction TB + subgraph SV1["ChatService"] + SV1Msg["Messaging:
sendMessage()"] + SV1Stream["Streaming:
handleStreamResponse()
parseSSEChunk()"] + SV1Convert["Conversion:
convertMessageToChatData()
convertExtraToApiFormat()"] + SV1Utils["Utilities:
extractReasoningContent()
getServerProps()
getModels()"] + end + subgraph SV2["ModelsService"] + SV2List["Listing:
list()
listRouter()"] + SV2LoadUnload["Load/Unload:
load()
unload()"] + SV2Status["Status:
isModelLoaded()
isModelLoading()"] + end + subgraph SV3["PropsService"] + SV3Fetch["Fetching:
fetch()
fetchForModel()"] + end + subgraph SV4["DatabaseService"] + SV4Conv["Conversations:
createConversation()
getConversation()
getAllConversations()
updateConversation()
deleteConversation()"] + SV4Msg["Messages:
createMessageBranch()
createRootMessage()
getConversationMessages()
updateMessage()
deleteMessage()
deleteMessageCascading()"] + SV4Node["Navigation:
updateCurrentNode()"] + SV4Import["Import:
importConversations()"] + end + subgraph SV5["ParameterSyncService"] + SV5Extract["Extraction:
extractServerDefaults()"] + SV5Merge["Merging:
mergeWithServerDefaults()"] + SV5Info["Info:
getParameterInfo()
canSyncParameter()
getSyncableParameterKeys()
validateServerParameter()"] + SV5Diff["Diff:
createParameterDiff()"] + end + end + + subgraph Storage["💾 Storage"] + ST1["IndexedDB"] + ST2["conversations"] + ST3["messages"] + ST5["LocalStorage"] + ST6["config"] + ST7["userOverrides"] + end + + subgraph APIs["🌐 llama-server API"] + API1["/v1/chat/completions"] + API2["/props
/props?model="] + API3["/models
/models/load
/models/unload"] + API4["/v1/models"] + end + + %% Routes render Components + R1 --> C_Screen + R2 --> C_Screen + RL --> C_Sidebar + + %% Component hierarchy + C_Screen --> C_Form & C_Messages & C_Settings + C_Messages --> C_Message + C_Message --> C_ModelsSelector + C_Form --> C_ModelsSelector + C_Form --> C_Attach + C_Message --> C_Attach + + %% Components use Hooks + C_Form --> H1 + C_Message --> H1 & H2 + C_Screen --> H2 + + %% Hooks use Stores + H1 --> S3 & S4 + H2 --> S1 & S5 + + %% Components use Stores + C_Screen --> S1 & S2 + C_Messages --> S2 + C_Message --> S1 & S2 & S3 + C_Form --> S1 & S3 + C_Sidebar --> S2 + C_ModelsSelector --> S3 & S4 + C_Settings --> S5 + + %% Stores export Reactive State + S1 -. exports .-> ChatExports + S2 -. exports .-> ConvExports + S3 -. exports .-> ModelsExports + S4 -. exports .-> ServerExports + S5 -. exports .-> SettingsExports + + %% Stores use Services + S1 --> SV1 & SV4 + S2 --> SV4 + S3 --> SV2 & SV3 + S4 --> SV3 + S5 --> SV5 + + %% Services to Storage + SV4 --> ST1 + ST1 --> ST2 & ST3 + SV5 --> ST5 + ST5 --> ST6 & ST7 + + %% Services to APIs + SV1 --> API1 + SV2 --> API3 & API4 + SV3 --> API2 + + %% Styling + classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px + classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px + classDef componentGroupStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:1px + classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px + classDef stateStyle fill:#ffe0b2,stroke:#e65100,stroke-width:1px + classDef methodStyle fill:#ffecb3,stroke:#e65100,stroke-width:1px + classDef reactiveStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px + classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px + classDef serviceMStyle fill:#c8e6c9,stroke:#2e7d32,stroke-width:1px + classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px + classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px + + class R1,R2,RL routeStyle + class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message componentStyle + class C_ModelsSelector,C_Settings componentStyle + class C_Attach componentStyle + class H1,H2,H3 methodStyle + class LayoutComponents,ChatUIComponents componentGroupStyle + class Hooks storeStyle + class S1,S2,S3,S4,S5 storeStyle + class S1State,S2State,S3State,S4State,S5State stateStyle + class S1Msg,S1Regen,S1Edit,S1Stream,S1LoadState,S1ProcState,S1Error,S1Utils methodStyle + class S2Lifecycle,S2ConvCRUD,S2MsgMgmt,S2Nav,S2Modal,S2Export,S2Utils methodStyle + class S3Getters,S3Modal,S3Status,S3Fetch,S3Select,S3LoadUnload,S3Utils methodStyle + class S4Getters,S4Data,S4Utils methodStyle + class S5Lifecycle,S5Update,S5Reset,S5Sync,S5Utils methodStyle + class ChatExports,ConvExports,ModelsExports,ServerExports,SettingsExports reactiveStyle + class SV1,SV2,SV3,SV4,SV5 serviceStyle + class SV1Msg,SV1Stream,SV1Convert,SV1Utils serviceMStyle + class SV2List,SV2LoadUnload,SV2Status serviceMStyle + class SV3Fetch serviceMStyle + class SV4Conv,SV4Msg,SV4Node,SV4Import serviceMStyle + class SV5Extract,SV5Merge,SV5Info,SV5Diff serviceMStyle + class ST1,ST2,ST3,ST5,ST6,ST7 storageStyle + class API1,API2,API3,API4 apiStyle +``` diff --git a/tools/server/webui/docs/flows/chat-flow.md b/tools/server/webui/docs/flows/chat-flow.md new file mode 100644 index 00000000000..05e1df385a7 --- /dev/null +++ b/tools/server/webui/docs/flows/chat-flow.md @@ -0,0 +1,174 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 ChatForm / ChatMessage + participant chatStore as 🗄️ chatStore + participant convStore as 🗄️ conversationsStore + participant settingsStore as 🗄️ settingsStore + participant ChatSvc as ⚙️ ChatService + participant DbSvc as ⚙️ DatabaseService + participant API as 🌐 /v1/chat/completions + + Note over chatStore: State:
isLoading, currentResponse
errorDialogState, activeProcessingState
chatLoadingStates (Map)
chatStreamingStates (Map)
abortControllers (Map)
processingStates (Map) + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 💬 SEND MESSAGE + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>chatStore: sendMessage(content, extras) + activate chatStore + + chatStore->>chatStore: setChatLoading(convId, true) + chatStore->>chatStore: clearChatStreaming(convId) + + alt no active conversation + chatStore->>convStore: createConversation() + Note over convStore: → see conversations-flow.mmd + end + + chatStore->>chatStore: addMessage("user", content, extras) + chatStore->>DbSvc: createMessageBranch(userMsg, parentId) + chatStore->>convStore: addMessageToActive(userMsg) + chatStore->>convStore: updateCurrentNode(userMsg.id) + + chatStore->>chatStore: createAssistantMessage(userMsg.id) + chatStore->>DbSvc: createMessageBranch(assistantMsg, userMsg.id) + chatStore->>convStore: addMessageToActive(assistantMsg) + + chatStore->>chatStore: streamChatCompletion(messages, assistantMsg) + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🌊 STREAMING + %% ═══════════════════════════════════════════════════════════════════════════ + + activate chatStore + chatStore->>chatStore: startStreaming() + Note right of chatStore: isStreamingActive = true + + chatStore->>chatStore: setActiveProcessingConversation(convId) + chatStore->>chatStore: getOrCreateAbortController(convId) + Note right of chatStore: abortControllers.set(convId, new AbortController()) + + chatStore->>chatStore: getApiOptions() + Note right of chatStore: Merge from settingsStore.config:
temperature, max_tokens, top_p, etc. + + chatStore->>ChatSvc: sendMessage(messages, options, signal) + activate ChatSvc + + ChatSvc->>ChatSvc: convertMessageToChatData(messages) + Note right of ChatSvc: DatabaseMessage[] → ApiChatMessageData[]
Process attachments (images, PDFs, audio) + + ChatSvc->>API: POST /v1/chat/completions + Note right of API: {messages, model?, stream: true, ...params} + + loop SSE chunks + API-->>ChatSvc: data: {"choices":[{"delta":{...}}]} + ChatSvc->>ChatSvc: parseSSEChunk(line) + + alt content chunk + ChatSvc-->>chatStore: onChunk(content) + chatStore->>chatStore: setChatStreaming(convId, response, msgId) + Note right of chatStore: currentResponse = $state(accumulated) + chatStore->>convStore: updateMessageAtIndex(idx, {content}) + end + + alt reasoning chunk + ChatSvc-->>chatStore: onReasoningChunk(reasoning) + chatStore->>convStore: updateMessageAtIndex(idx, {thinking}) + end + + alt tool_calls chunk + ChatSvc-->>chatStore: onToolCallChunk(toolCalls) + chatStore->>convStore: updateMessageAtIndex(idx, {toolCalls}) + end + + alt model info + ChatSvc-->>chatStore: onModel(modelName) + chatStore->>chatStore: recordModel(modelName) + chatStore->>DbSvc: updateMessage(msgId, {model}) + end + + alt timings (during stream) + ChatSvc-->>chatStore: onTimings(timings, promptProgress) + chatStore->>chatStore: updateProcessingStateFromTimings() + end + + chatStore-->>UI: reactive $state update + end + + API-->>ChatSvc: data: [DONE] + ChatSvc-->>chatStore: onComplete(content, reasoning, timings, toolCalls) + deactivate ChatSvc + + chatStore->>chatStore: stopStreaming() + chatStore->>DbSvc: updateMessage(msgId, {content, timings, model}) + chatStore->>convStore: updateCurrentNode(msgId) + chatStore->>chatStore: setChatLoading(convId, false) + chatStore->>chatStore: clearChatStreaming(convId) + chatStore->>chatStore: clearProcessingState(convId) + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ⏹️ STOP GENERATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>chatStore: stopGeneration() + activate chatStore + chatStore->>chatStore: savePartialResponseIfNeeded(convId) + Note right of chatStore: Save currentResponse to DB if non-empty + chatStore->>chatStore: abortControllers.get(convId).abort() + Note right of chatStore: fetch throws AbortError → caught by isAbortError() + chatStore->>chatStore: stopStreaming() + chatStore->>chatStore: setChatLoading(convId, false) + chatStore->>chatStore: clearChatStreaming(convId) + chatStore->>chatStore: clearProcessingState(convId) + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🔁 REGENERATE + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>chatStore: regenerateMessageWithBranching(msgId, model?) + activate chatStore + chatStore->>convStore: findMessageIndex(msgId) + chatStore->>chatStore: Get parent of target message + chatStore->>chatStore: createAssistantMessage(parentId) + chatStore->>DbSvc: createMessageBranch(newAssistantMsg, parentId) + chatStore->>convStore: refreshActiveMessages() + Note right of chatStore: Same streaming flow + chatStore->>chatStore: streamChatCompletion(...) + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ➡️ CONTINUE + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>chatStore: continueAssistantMessage(msgId) + activate chatStore + chatStore->>chatStore: Get existing content from message + chatStore->>chatStore: streamChatCompletion(..., existingContent) + Note right of chatStore: Appends to existing message content + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ✏️ EDIT USER MESSAGE + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>chatStore: editUserMessagePreserveResponses(msgId, newContent) + activate chatStore + chatStore->>chatStore: Get parent of target message + chatStore->>DbSvc: createMessageBranch(editedMsg, parentId) + chatStore->>convStore: refreshActiveMessages() + Note right of chatStore: Creates new branch, original preserved + deactivate chatStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ❌ ERROR HANDLING + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over chatStore: On stream error (non-abort): + chatStore->>chatStore: showErrorDialog(type, message) + Note right of chatStore: errorDialogState = {type: 'timeout'|'server', message} + chatStore->>convStore: removeMessageAtIndex(failedMsgIdx) + chatStore->>DbSvc: deleteMessage(failedMsgId) +``` diff --git a/tools/server/webui/docs/flows/conversations-flow.md b/tools/server/webui/docs/flows/conversations-flow.md new file mode 100644 index 00000000000..185ed16e0cd --- /dev/null +++ b/tools/server/webui/docs/flows/conversations-flow.md @@ -0,0 +1,155 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 ChatSidebar / ChatScreen + participant convStore as 🗄️ conversationsStore + participant chatStore as 🗄️ chatStore + participant DbSvc as ⚙️ DatabaseService + participant IDB as 💾 IndexedDB + + Note over convStore: State:
conversations: DatabaseConversation[]
activeConversation: DatabaseConversation | null
activeMessages: DatabaseMessage[]
isInitialized: boolean
usedModalities: $derived({vision, audio}) + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 🚀 INITIALIZATION + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over convStore: Auto-initialized in constructor (browser only) + convStore->>convStore: initialize() + activate convStore + convStore->>convStore: loadConversations() + convStore->>DbSvc: getAllConversations() + DbSvc->>IDB: SELECT * FROM conversations ORDER BY lastModified DESC + IDB-->>DbSvc: Conversation[] + DbSvc-->>convStore: conversations + convStore->>convStore: conversations = $state(data) + convStore->>convStore: isInitialized = true + deactivate convStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: ➕ CREATE CONVERSATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: createConversation(name?) + activate convStore + convStore->>DbSvc: createConversation(name || "New Chat") + DbSvc->>IDB: INSERT INTO conversations + IDB-->>DbSvc: conversation {id, name, lastModified, currNode: ""} + DbSvc-->>convStore: conversation + convStore->>convStore: conversations.unshift(conversation) + convStore->>convStore: activeConversation = $state(conversation) + convStore->>convStore: activeMessages = $state([]) + deactivate convStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 📂 LOAD CONVERSATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: loadConversation(convId) + activate convStore + convStore->>DbSvc: getConversation(convId) + DbSvc->>IDB: SELECT * FROM conversations WHERE id = ? + IDB-->>DbSvc: conversation + convStore->>convStore: activeConversation = $state(conversation) + + convStore->>convStore: refreshActiveMessages() + convStore->>DbSvc: getConversationMessages(convId) + DbSvc->>IDB: SELECT * FROM messages WHERE convId = ? + IDB-->>DbSvc: allMessages[] + convStore->>convStore: filterByLeafNodeId(allMessages, currNode) + Note right of convStore: Filter to show only current branch path + convStore->>convStore: activeMessages = $state(filtered) + + convStore->>chatStore: syncLoadingStateForChat(convId) + Note right of chatStore: Sync isLoading/currentResponse if streaming + deactivate convStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 🌳 MESSAGE BRANCHING MODEL + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over IDB: Message Tree Structure:
- Each message has parent (null for root)
- Each message has children[] array
- Conversation.currNode points to active leaf
- filterByLeafNodeId() traverses from root to currNode + + rect rgb(240, 240, 255) + Note over convStore: Example Branch Structure: + Note over convStore: root → user1 → assistant1 → user2 → assistant2a (currNode)
↘ assistant2b (alt branch) + end + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: ↔️ BRANCH NAVIGATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: navigateToSibling(msgId, direction) + activate convStore + convStore->>convStore: Find message in activeMessages + convStore->>convStore: Get parent message + convStore->>convStore: Find sibling in parent.children[] + convStore->>convStore: findLeafNode(siblingId, allMessages) + Note right of convStore: Navigate to leaf of sibling branch + convStore->>convStore: updateCurrentNode(leafId) + convStore->>DbSvc: updateCurrentNode(convId, leafId) + DbSvc->>IDB: UPDATE conversations SET currNode = ? + convStore->>convStore: refreshActiveMessages() + deactivate convStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 📝 UPDATE CONVERSATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: updateConversationName(convId, newName) + activate convStore + convStore->>DbSvc: updateConversation(convId, {name: newName}) + DbSvc->>IDB: UPDATE conversations SET name = ? + convStore->>convStore: Update in conversations array + deactivate convStore + + Note over convStore: Auto-title update (after first response): + convStore->>convStore: updateConversationTitleWithConfirmation() + convStore->>convStore: titleUpdateConfirmationCallback?() + Note right of convStore: Shows dialog if title would change + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 🗑️ DELETE CONVERSATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: deleteConversation(convId) + activate convStore + convStore->>DbSvc: deleteConversation(convId) + DbSvc->>IDB: DELETE FROM conversations WHERE id = ? + DbSvc->>IDB: DELETE FROM messages WHERE convId = ? + convStore->>convStore: conversations.filter(c => c.id !== convId) + alt deleted active conversation + convStore->>convStore: clearActiveConversation() + end + deactivate convStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 📊 MODALITY TRACKING + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over convStore: usedModalities = $derived.by(() => {
calculateModalitiesFromMessages(activeMessages)
}) + + Note over convStore: Scans activeMessages for attachments:
- IMAGE → vision: true
- PDF (processedAsImages) → vision: true
- AUDIO → audio: true + + UI->>convStore: getModalitiesUpToMessage(msgId) + Note right of convStore: Used for regeneration validation
Only checks messages BEFORE target + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,IDB: 📤 EXPORT / 📥 IMPORT + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>convStore: exportAllConversations() + activate convStore + convStore->>DbSvc: getAllConversations() + loop each conversation + convStore->>DbSvc: getConversationMessages(convId) + end + convStore->>convStore: triggerDownload(JSON blob) + deactivate convStore + + UI->>convStore: importConversations(file) + activate convStore + convStore->>convStore: Parse JSON file + convStore->>DbSvc: importConversations(parsed) + DbSvc->>IDB: Bulk INSERT conversations + messages + convStore->>convStore: loadConversations() + deactivate convStore +``` diff --git a/tools/server/webui/docs/flows/data-flow-simplified-model-mode.md b/tools/server/webui/docs/flows/data-flow-simplified-model-mode.md new file mode 100644 index 00000000000..07b362147fa --- /dev/null +++ b/tools/server/webui/docs/flows/data-flow-simplified-model-mode.md @@ -0,0 +1,45 @@ +```mermaid +%% MODEL Mode Data Flow (single model) +%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd + +sequenceDiagram + participant User as 👤 User + participant UI as 🧩 UI + participant Stores as 🗄️ Stores + participant DB as 💾 IndexedDB + participant API as 🌐 llama-server + + Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd) + + UI->>Stores: initialize() + Stores->>DB: load conversations + Stores->>API: GET /props + API-->>Stores: server config + modalities + Stores->>API: GET /v1/models + API-->>Stores: single model (auto-selected) + + Note over User,API: 💬 Chat Flow (see: chat-flow.mmd) + + User->>UI: send message + UI->>Stores: sendMessage() + Stores->>DB: save user message + Stores->>API: POST /v1/chat/completions (stream) + loop streaming + API-->>Stores: SSE chunks + Stores-->>UI: reactive update + end + API-->>Stores: done + timings + Stores->>DB: save assistant message + + Note over User,API: 🔁 Regenerate + + User->>UI: regenerate + Stores->>DB: create message branch + Note right of Stores: same streaming flow + + Note over User,API: ⏹️ Stop + + User->>UI: stop + Stores->>Stores: abort stream + Stores->>DB: save partial response +``` diff --git a/tools/server/webui/docs/flows/data-flow-simplified-router-mode.md b/tools/server/webui/docs/flows/data-flow-simplified-router-mode.md new file mode 100644 index 00000000000..f5c4f05edf6 --- /dev/null +++ b/tools/server/webui/docs/flows/data-flow-simplified-router-mode.md @@ -0,0 +1,77 @@ +```mermaid +%% ROUTER Mode Data Flow (multi-model) +%% Detailed flows: ./flows/server-flow.mmd, ./flows/models-flow.mmd, ./flows/chat-flow.mmd + +sequenceDiagram + participant User as 👤 User + participant UI as 🧩 UI + participant Stores as 🗄️ Stores + participant DB as 💾 IndexedDB + participant API as 🌐 llama-server + + Note over User,API: 🚀 Initialization (see: server-flow.mmd, models-flow.mmd) + + UI->>Stores: initialize() + Stores->>DB: load conversations + Stores->>API: GET /props + API-->>Stores: {role: "router"} + Stores->>API: GET /models + API-->>Stores: models[] with status (loaded/available) + loop each loaded model + Stores->>API: GET /props?model=X + API-->>Stores: modalities (vision/audio) + end + + Note over User,API: 🔄 Model Selection (see: models-flow.mmd) + + User->>UI: select model + alt model not loaded + Stores->>API: POST /models/load + loop poll status + Stores->>API: GET /models + API-->>Stores: check if loaded + end + Stores->>API: GET /props?model=X + API-->>Stores: cache modalities + end + Stores->>Stores: validate modalities vs conversation + alt valid + Stores->>Stores: select model + else invalid + Stores->>API: POST /models/unload + UI->>User: show error toast + end + + Note over User,API: 💬 Chat Flow (see: chat-flow.mmd) + + User->>UI: send message + UI->>Stores: sendMessage() + Stores->>DB: save user message + Stores->>API: POST /v1/chat/completions {model: X} + Note right of API: router forwards to model + loop streaming + API-->>Stores: SSE chunks + model info + Stores-->>UI: reactive update + end + API-->>Stores: done + timings + Stores->>DB: save assistant message + model used + + Note over User,API: 🔁 Regenerate (optional: different model) + + User->>UI: regenerate + Stores->>Stores: validate modalities up to this message + Stores->>DB: create message branch + Note right of Stores: same streaming flow + + Note over User,API: ⏹️ Stop + + User->>UI: stop + Stores->>Stores: abort stream + Stores->>DB: save partial response + + Note over User,API: 🗑️ LRU Unloading + + Note right of API: Server auto-unloads LRU models
when cache full + User->>UI: select unloaded model + Note right of Stores: triggers load flow again +``` diff --git a/tools/server/webui/docs/flows/database-flow.md b/tools/server/webui/docs/flows/database-flow.md new file mode 100644 index 00000000000..50f8284e3c3 --- /dev/null +++ b/tools/server/webui/docs/flows/database-flow.md @@ -0,0 +1,155 @@ +```mermaid +sequenceDiagram + participant Store as 🗄️ Stores + participant DbSvc as ⚙️ DatabaseService + participant Dexie as 📦 Dexie ORM + participant IDB as 💾 IndexedDB + + Note over DbSvc: Stateless service - all methods static
Database: "LlamacppWebui" + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 📊 SCHEMA + %% ═══════════════════════════════════════════════════════════════════════════ + + rect rgb(240, 248, 255) + Note over IDB: conversations table:
id (PK), lastModified, currNode, name + end + + rect rgb(255, 248, 240) + Note over IDB: messages table:
id (PK), convId (FK), type, role, timestamp,
parent, children[], content, thinking,
toolCalls, extra[], model, timings + end + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 💬 CONVERSATIONS CRUD + %% ═══════════════════════════════════════════════════════════════════════════ + + Store->>DbSvc: createConversation(name) + activate DbSvc + DbSvc->>DbSvc: Generate UUID + DbSvc->>Dexie: db.conversations.add({id, name, lastModified, currNode: ""}) + Dexie->>IDB: INSERT + IDB-->>Dexie: success + DbSvc-->>Store: DatabaseConversation + deactivate DbSvc + + Store->>DbSvc: getConversation(convId) + DbSvc->>Dexie: db.conversations.get(convId) + Dexie->>IDB: SELECT WHERE id = ? + IDB-->>DbSvc: DatabaseConversation + + Store->>DbSvc: getAllConversations() + DbSvc->>Dexie: db.conversations.orderBy('lastModified').reverse().toArray() + Dexie->>IDB: SELECT ORDER BY lastModified DESC + IDB-->>DbSvc: DatabaseConversation[] + + Store->>DbSvc: updateConversation(convId, updates) + DbSvc->>Dexie: db.conversations.update(convId, {...updates, lastModified}) + Dexie->>IDB: UPDATE + + Store->>DbSvc: deleteConversation(convId) + activate DbSvc + DbSvc->>Dexie: db.conversations.delete(convId) + Dexie->>IDB: DELETE FROM conversations + DbSvc->>Dexie: db.messages.where('convId').equals(convId).delete() + Dexie->>IDB: DELETE FROM messages WHERE convId = ? + deactivate DbSvc + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 📝 MESSAGES CRUD + %% ═══════════════════════════════════════════════════════════════════════════ + + Store->>DbSvc: createRootMessage(convId) + activate DbSvc + DbSvc->>DbSvc: Create root message {type: "root", parent: null} + DbSvc->>Dexie: db.messages.add(rootMsg) + Dexie->>IDB: INSERT + DbSvc-->>Store: rootMessageId + deactivate DbSvc + + Store->>DbSvc: createMessageBranch(message, parentId) + activate DbSvc + DbSvc->>DbSvc: Generate UUID for new message + DbSvc->>Dexie: db.messages.add({...message, id, parent: parentId}) + Dexie->>IDB: INSERT message + + alt parentId exists + DbSvc->>Dexie: db.messages.get(parentId) + Dexie->>IDB: SELECT parent + DbSvc->>DbSvc: parent.children.push(newId) + DbSvc->>Dexie: db.messages.update(parentId, {children}) + Dexie->>IDB: UPDATE parent.children + end + + DbSvc->>Dexie: db.conversations.update(convId, {currNode: newId}) + Dexie->>IDB: UPDATE conversation.currNode + DbSvc-->>Store: DatabaseMessage + deactivate DbSvc + + Store->>DbSvc: getConversationMessages(convId) + DbSvc->>Dexie: db.messages.where('convId').equals(convId).toArray() + Dexie->>IDB: SELECT WHERE convId = ? + IDB-->>DbSvc: DatabaseMessage[] + + Store->>DbSvc: updateMessage(msgId, updates) + DbSvc->>Dexie: db.messages.update(msgId, updates) + Dexie->>IDB: UPDATE + + Store->>DbSvc: deleteMessage(msgId) + DbSvc->>Dexie: db.messages.delete(msgId) + Dexie->>IDB: DELETE + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 🌳 BRANCHING OPERATIONS + %% ═══════════════════════════════════════════════════════════════════════════ + + Store->>DbSvc: updateCurrentNode(convId, nodeId) + DbSvc->>Dexie: db.conversations.update(convId, {currNode: nodeId, lastModified}) + Dexie->>IDB: UPDATE + + Store->>DbSvc: deleteMessageCascading(msgId) + activate DbSvc + DbSvc->>DbSvc: findDescendantMessages(msgId, allMessages) + Note right of DbSvc: Recursively find all children + loop each descendant + DbSvc->>Dexie: db.messages.delete(descendantId) + Dexie->>IDB: DELETE + end + DbSvc->>Dexie: db.messages.delete(msgId) + Dexie->>IDB: DELETE target message + deactivate DbSvc + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 📥 IMPORT + %% ═══════════════════════════════════════════════════════════════════════════ + + Store->>DbSvc: importConversations(data) + activate DbSvc + loop each conversation in data + DbSvc->>DbSvc: Generate new UUIDs (avoid conflicts) + DbSvc->>Dexie: db.conversations.add(conversation) + Dexie->>IDB: INSERT conversation + loop each message + DbSvc->>Dexie: db.messages.add(message) + Dexie->>IDB: INSERT message + end + end + deactivate DbSvc + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over Store,IDB: 🔗 MESSAGE TREE UTILITIES + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over DbSvc: Used by stores (imported from utils): + + rect rgb(240, 255, 240) + Note over DbSvc: filterByLeafNodeId(messages, leafId)
→ Returns path from root to leaf
→ Used to display current branch + end + + rect rgb(240, 255, 240) + Note over DbSvc: findLeafNode(startId, messages)
→ Traverse to deepest child
→ Used for branch navigation + end + + rect rgb(240, 255, 240) + Note over DbSvc: findDescendantMessages(msgId, messages)
→ Find all children recursively
→ Used for cascading deletes + end +``` diff --git a/tools/server/webui/docs/flows/models-flow.md b/tools/server/webui/docs/flows/models-flow.md new file mode 100644 index 00000000000..ce63da1b367 --- /dev/null +++ b/tools/server/webui/docs/flows/models-flow.md @@ -0,0 +1,181 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 ModelsSelector + participant Hooks as 🪝 useModelChangeValidation + participant modelsStore as 🗄️ modelsStore + participant serverStore as 🗄️ serverStore + participant convStore as 🗄️ conversationsStore + participant ModelsSvc as ⚙️ ModelsService + participant PropsSvc as ⚙️ PropsService + participant API as 🌐 llama-server + + Note over modelsStore: State:
models: ModelOption[]
routerModels: ApiModelDataEntry[]
selectedModelId, selectedModelName
loading, updating, error
modelLoadingStates (Map)
modelPropsCache (Map)
propsCacheVersion + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🚀 INITIALIZATION (MODEL mode) + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>modelsStore: fetch() + activate modelsStore + modelsStore->>modelsStore: loading = true + + alt serverStore.props not loaded + modelsStore->>serverStore: fetch() + Note over serverStore: → see server-flow.mmd + end + + modelsStore->>ModelsSvc: list() + ModelsSvc->>API: GET /v1/models + API-->>ModelsSvc: ApiModelListResponse {data: [model]} + + modelsStore->>modelsStore: models = $state(mapped) + Note right of modelsStore: Map to ModelOption[]:
{id, name, model, description, capabilities} + + Note over modelsStore: MODEL mode: Get modalities from serverStore.props + modelsStore->>modelsStore: modelPropsCache.set(model.id, serverStore.props) + modelsStore->>modelsStore: models[0].modalities = props.modalities + + modelsStore->>modelsStore: Auto-select single model + Note right of modelsStore: selectedModelId = models[0].id + modelsStore->>modelsStore: loading = false + deactivate modelsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🚀 INITIALIZATION (ROUTER mode) + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>modelsStore: fetch() + activate modelsStore + modelsStore->>ModelsSvc: list() + ModelsSvc->>API: GET /v1/models + API-->>ModelsSvc: ApiModelListResponse + modelsStore->>modelsStore: models = $state(mapped) + deactivate modelsStore + + Note over UI: After models loaded, layout triggers: + UI->>modelsStore: fetchRouterModels() + activate modelsStore + modelsStore->>ModelsSvc: listRouter() + ModelsSvc->>API: GET /models + API-->>ModelsSvc: ApiRouterModelsListResponse + Note right of API: {data: [{id, status, path, in_cache}]} + modelsStore->>modelsStore: routerModels = $state(data) + + modelsStore->>modelsStore: fetchModalitiesForLoadedModels() + loop each model where status === "loaded" + modelsStore->>PropsSvc: fetchForModel(modelId) + PropsSvc->>API: GET /props?model={modelId} + API-->>PropsSvc: ApiLlamaCppServerProps + modelsStore->>modelsStore: modelPropsCache.set(modelId, props) + end + modelsStore->>modelsStore: propsCacheVersion++ + deactivate modelsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🔄 MODEL SELECTION (ROUTER mode) + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>Hooks: useModelChangeValidation({getRequiredModalities, onSuccess?, onValidationFailure?}) + Note over Hooks: Hook configured per-component:
ChatForm: getRequiredModalities = usedModalities
ChatMessage: getRequiredModalities = getModalitiesUpToMessage(msgId) + + UI->>Hooks: handleModelChange(modelId, modelName) + activate Hooks + Hooks->>Hooks: previousSelectedModelId = modelsStore.selectedModelId + Hooks->>modelsStore: isModelLoaded(modelName)? + + alt model NOT loaded + Hooks->>modelsStore: loadModel(modelName) + Note over modelsStore: → see LOAD MODEL section below + end + + Note over Hooks: Always fetch props (from cache or API) + Hooks->>modelsStore: fetchModelProps(modelName) + modelsStore-->>Hooks: props + + Hooks->>convStore: getRequiredModalities() + convStore-->>Hooks: {vision, audio} + + Hooks->>Hooks: Validate: model.modalities ⊇ required? + + alt validation PASSED + Hooks->>modelsStore: selectModelById(modelId) + Hooks-->>UI: return true + else validation FAILED + Hooks->>UI: toast.error("Model doesn't support required modalities") + alt model was just loaded + Hooks->>modelsStore: unloadModel(modelName) + end + alt onValidationFailure provided + Hooks->>modelsStore: selectModelById(previousSelectedModelId) + end + Hooks-->>UI: return false + end + deactivate Hooks + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ⬆️ LOAD MODEL (ROUTER mode) + %% ═══════════════════════════════════════════════════════════════════════════ + + modelsStore->>modelsStore: loadModel(modelId) + activate modelsStore + + alt already loaded + modelsStore-->>modelsStore: return (no-op) + end + + modelsStore->>modelsStore: modelLoadingStates.set(modelId, true) + modelsStore->>ModelsSvc: load(modelId) + ModelsSvc->>API: POST /models/load {model: modelId} + API-->>ModelsSvc: {status: "loading"} + + modelsStore->>modelsStore: pollForModelStatus(modelId, LOADED) + loop poll every 500ms (max 60 attempts) + modelsStore->>modelsStore: fetchRouterModels() + modelsStore->>ModelsSvc: listRouter() + ModelsSvc->>API: GET /models + API-->>ModelsSvc: models[] + modelsStore->>modelsStore: getModelStatus(modelId) + alt status === LOADED + Note right of modelsStore: break loop + else status === LOADING + Note right of modelsStore: wait 500ms, continue + end + end + + modelsStore->>modelsStore: updateModelModalities(modelId) + modelsStore->>PropsSvc: fetchForModel(modelId) + PropsSvc->>API: GET /props?model={modelId} + API-->>PropsSvc: props with modalities + modelsStore->>modelsStore: modelPropsCache.set(modelId, props) + modelsStore->>modelsStore: propsCacheVersion++ + + modelsStore->>modelsStore: modelLoadingStates.set(modelId, false) + deactivate modelsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ⬇️ UNLOAD MODEL (ROUTER mode) + %% ═══════════════════════════════════════════════════════════════════════════ + + modelsStore->>modelsStore: unloadModel(modelId) + activate modelsStore + modelsStore->>modelsStore: modelLoadingStates.set(modelId, true) + modelsStore->>ModelsSvc: unload(modelId) + ModelsSvc->>API: POST /models/unload {model: modelId} + + modelsStore->>modelsStore: pollForModelStatus(modelId, UNLOADED) + loop poll until unloaded + modelsStore->>ModelsSvc: listRouter() + ModelsSvc->>API: GET /models + end + + modelsStore->>modelsStore: modelLoadingStates.set(modelId, false) + deactivate modelsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 📊 COMPUTED GETTERS + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over modelsStore: Getters:
- selectedModel: ModelOption | null
- loadedModelIds: string[] (from routerModels)
- loadingModelIds: string[] (from modelLoadingStates)
- singleModelName: string | null (MODEL mode only) + + Note over modelsStore: Modality helpers:
- getModelModalities(modelId): {vision, audio}
- modelSupportsVision(modelId): boolean
- modelSupportsAudio(modelId): boolean +``` diff --git a/tools/server/webui/docs/flows/server-flow.md b/tools/server/webui/docs/flows/server-flow.md new file mode 100644 index 00000000000..d6a1611f6f4 --- /dev/null +++ b/tools/server/webui/docs/flows/server-flow.md @@ -0,0 +1,76 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 +layout.svelte + participant serverStore as 🗄️ serverStore + participant PropsSvc as ⚙️ PropsService + participant API as 🌐 llama-server + + Note over serverStore: State:
props: ApiLlamaCppServerProps | null
loading, error
role: ServerRole | null (MODEL | ROUTER)
fetchPromise (deduplication) + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🚀 INITIALIZATION + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>serverStore: fetch() + activate serverStore + + alt fetchPromise exists (already fetching) + serverStore-->>UI: return fetchPromise + Note right of serverStore: Deduplicate concurrent calls + end + + serverStore->>serverStore: loading = true + serverStore->>serverStore: fetchPromise = new Promise() + + serverStore->>PropsSvc: fetch() + PropsSvc->>API: GET /props + API-->>PropsSvc: ApiLlamaCppServerProps + Note right of API: {role, model_path, model_alias,
modalities, default_generation_settings, ...} + + PropsSvc-->>serverStore: props + serverStore->>serverStore: props = $state(data) + + serverStore->>serverStore: detectRole(props) + Note right of serverStore: role = props.role === "router"
? ServerRole.ROUTER
: ServerRole.MODEL + + serverStore->>serverStore: loading = false + serverStore->>serverStore: fetchPromise = null + deactivate serverStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 📊 COMPUTED GETTERS + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over serverStore: Getters from props: + + rect rgb(240, 255, 240) + Note over serverStore: defaultParams
→ props.default_generation_settings.params
(temperature, top_p, top_k, etc.) + end + + rect rgb(240, 255, 240) + Note over serverStore: contextSize
→ props.default_generation_settings.n_ctx + end + + rect rgb(255, 240, 240) + Note over serverStore: isRouterMode
→ role === ServerRole.ROUTER + end + + rect rgb(255, 240, 240) + Note over serverStore: isModelMode
→ role === ServerRole.MODEL + end + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🔗 RELATIONSHIPS + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over serverStore: Used by: + Note right of serverStore: - modelsStore: role detection, MODEL mode modalities
- settingsStore: syncWithServerDefaults (defaultParams)
- chatStore: contextSize for processing state
- UI components: isRouterMode for conditional rendering + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: ❌ ERROR HANDLING + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over serverStore: getErrorMessage(): string | null
Returns formatted error for UI display + + Note over serverStore: clear(): void
Resets all state (props, error, loading, role) +``` diff --git a/tools/server/webui/docs/flows/settings-flow.md b/tools/server/webui/docs/flows/settings-flow.md new file mode 100644 index 00000000000..578e01e6e1d --- /dev/null +++ b/tools/server/webui/docs/flows/settings-flow.md @@ -0,0 +1,144 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 ChatSettings + participant settingsStore as 🗄️ settingsStore + participant serverStore as 🗄️ serverStore + participant ParamSvc as ⚙️ ParameterSyncService + participant LS as 💾 LocalStorage + + Note over settingsStore: State:
config: SettingsConfigType
theme: string ("auto" | "light" | "dark")
isInitialized: boolean
userOverrides: Set<string> + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 🚀 INITIALIZATION + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over settingsStore: Auto-initialized in constructor (browser only) + settingsStore->>settingsStore: initialize() + activate settingsStore + + settingsStore->>settingsStore: loadConfig() + settingsStore->>LS: get("llama-config") + LS-->>settingsStore: StoredConfig | null + + alt config exists + settingsStore->>settingsStore: Merge with SETTING_CONFIG_DEFAULT + Note right of settingsStore: Fill missing keys with defaults + else no config + settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT + end + + settingsStore->>LS: get("llama-userOverrides") + LS-->>settingsStore: string[] | null + settingsStore->>settingsStore: userOverrides = new Set(data) + + settingsStore->>settingsStore: loadTheme() + settingsStore->>LS: get("llama-theme") + LS-->>settingsStore: theme | "auto" + + settingsStore->>settingsStore: isInitialized = true + deactivate settingsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 🔄 SYNC WITH SERVER DEFAULTS + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over UI: Triggered from +layout.svelte when serverStore.props loaded + UI->>settingsStore: syncWithServerDefaults() + activate settingsStore + + settingsStore->>serverStore: defaultParams + serverStore-->>settingsStore: {temperature, top_p, top_k, ...} + + settingsStore->>ParamSvc: extractServerDefaults(defaultParams) + ParamSvc-->>settingsStore: Record + + settingsStore->>ParamSvc: mergeWithServerDefaults(config, serverDefaults) + Note right of ParamSvc: For each syncable parameter:
- If NOT in userOverrides → use server default
- If in userOverrides → keep user value + ParamSvc-->>settingsStore: mergedConfig + + settingsStore->>settingsStore: config = mergedConfig + settingsStore->>settingsStore: saveConfig() + deactivate settingsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: ⚙️ UPDATE CONFIG + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>settingsStore: updateConfig(key, value) + activate settingsStore + settingsStore->>settingsStore: config[key] = value + settingsStore->>settingsStore: userOverrides.add(key) + Note right of settingsStore: Mark as user-modified (won't be overwritten by server) + settingsStore->>settingsStore: saveConfig() + settingsStore->>LS: set("llama-config", config) + settingsStore->>LS: set("llama-userOverrides", [...userOverrides]) + deactivate settingsStore + + UI->>settingsStore: updateMultipleConfig({key1: val1, key2: val2}) + activate settingsStore + Note right of settingsStore: Batch update, single save + settingsStore->>settingsStore: For each key: config[key] = value + settingsStore->>settingsStore: For each key: userOverrides.add(key) + settingsStore->>settingsStore: saveConfig() + deactivate settingsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 🔄 RESET + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>settingsStore: resetConfig() + activate settingsStore + settingsStore->>settingsStore: config = SETTING_CONFIG_DEFAULT + settingsStore->>settingsStore: userOverrides.clear() + settingsStore->>settingsStore: syncWithServerDefaults() + Note right of settingsStore: Apply server defaults for syncable params + settingsStore->>settingsStore: saveConfig() + deactivate settingsStore + + UI->>settingsStore: resetParameterToServerDefault(key) + activate settingsStore + settingsStore->>settingsStore: userOverrides.delete(key) + settingsStore->>serverStore: defaultParams[key] + settingsStore->>settingsStore: config[key] = serverDefault + settingsStore->>settingsStore: saveConfig() + deactivate settingsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 🎨 THEME + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>settingsStore: updateTheme(newTheme) + activate settingsStore + settingsStore->>settingsStore: theme = newTheme + settingsStore->>settingsStore: saveTheme() + settingsStore->>LS: set("llama-theme", theme) + deactivate settingsStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 📊 PARAMETER INFO + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>settingsStore: getParameterInfo(key) + settingsStore->>ParamSvc: getParameterInfo(key, config, serverDefaults, userOverrides) + ParamSvc-->>settingsStore: ParameterInfo + Note right of ParamSvc: {
currentValue,
serverDefault,
isUserOverride: boolean,
canSync: boolean,
isDifferentFromServer: boolean
} + + UI->>settingsStore: getParameterDiff() + settingsStore->>ParamSvc: createParameterDiff(config, serverDefaults, userOverrides) + ParamSvc-->>settingsStore: ParameterDiff[] + Note right of ParamSvc: Array of parameters where user != server + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,LS: 📋 CONFIG CATEGORIES + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over settingsStore: Syncable with server (from /props): + rect rgb(240, 255, 240) + Note over settingsStore: temperature, top_p, top_k, min_p
repeat_penalty, presence_penalty, frequency_penalty
dynatemp_range, dynatemp_exponent
typ_p, xtc_probability, xtc_threshold
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n + end + + Note over settingsStore: UI-only (not synced): + rect rgb(255, 240, 240) + Note over settingsStore: systemMessage, custom (JSON)
showStatistics, enableContinueGeneration
autoMicOnEmpty, disableAutoScroll
apiKey, pdfAsImage, disableReasoningFormat + end +``` diff --git a/tools/server/webui/package-lock.json b/tools/server/webui/package-lock.json index 4af5e86ab9a..9c1c2499cfd 100644 --- a/tools/server/webui/package-lock.json +++ b/tools/server/webui/package-lock.json @@ -64,7 +64,7 @@ "svelte": "^5.0.0", "svelte-check": "^4.0.0", "tailwind-merge": "^3.3.1", - "tailwind-variants": "^1.0.0", + "tailwind-variants": "^3.2.2", "tailwindcss": "^4.0.0", "tw-animate-css": "^1.3.5", "typescript": "^5.0.0", @@ -8324,31 +8324,23 @@ } }, "node_modules/tailwind-variants": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-1.0.0.tgz", - "integrity": "sha512-2WSbv4ulEEyuBKomOunut65D8UZwxrHoRfYnxGcQNnHqlSCp2+B7Yz2W+yrNDrxRodOXtGD/1oCcKGNBnUqMqA==", + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/tailwind-variants/-/tailwind-variants-3.2.2.tgz", + "integrity": "sha512-Mi4kHeMTLvKlM98XPnK+7HoBPmf4gygdFmqQPaDivc3DpYS6aIY6KiG/PgThrGvii5YZJqRsPz0aPyhoFzmZgg==", "dev": true, "license": "MIT", - "dependencies": { - "tailwind-merge": "3.0.2" - }, "engines": { "node": ">=16.x", "pnpm": ">=7.x" }, "peerDependencies": { + "tailwind-merge": ">=3.0.0", "tailwindcss": "*" - } - }, - "node_modules/tailwind-variants/node_modules/tailwind-merge": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.0.2.tgz", - "integrity": "sha512-l7z+OYZ7mu3DTqrL88RiKrKIqO3NcpEO8V/Od04bNpvk0kiIFndGEoqfuzvj4yuhRkHKjRkII2z+KS2HfPcSxw==", - "dev": true, - "license": "MIT", - "funding": { - "type": "github", - "url": "https://github.com/sponsors/dcastil" + }, + "peerDependenciesMeta": { + "tailwind-merge": { + "optional": true + } } }, "node_modules/tailwindcss": { diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json index 8b88f691a43..987a7239ed4 100644 --- a/tools/server/webui/package.json +++ b/tools/server/webui/package.json @@ -66,7 +66,7 @@ "svelte": "^5.0.0", "svelte-check": "^4.0.0", "tailwind-merge": "^3.3.1", - "tailwind-variants": "^1.0.0", + "tailwind-variants": "^3.2.2", "tailwindcss": "^4.0.0", "tw-animate-css": "^1.3.5", "typescript": "^5.0.0", diff --git a/tools/server/webui/playwright.config.ts b/tools/server/webui/playwright.config.ts index 51688b39410..26d3be535d1 100644 --- a/tools/server/webui/playwright.config.ts +++ b/tools/server/webui/playwright.config.ts @@ -7,5 +7,5 @@ export default defineConfig({ timeout: 120000, reuseExistingServer: false }, - testDir: 'e2e' + testDir: 'tests/e2e' }); diff --git a/tools/server/webui/scripts/dev.sh b/tools/server/webui/scripts/dev.sh index 2bda8f22c8f..b7539c205e2 100644 --- a/tools/server/webui/scripts/dev.sh +++ b/tools/server/webui/scripts/dev.sh @@ -49,7 +49,9 @@ trap cleanup SIGINT SIGTERM echo "🚀 Starting development servers..." echo "📝 Note: Make sure to start llama-server separately if needed" cd tools/server/webui -storybook dev -p 6006 --ci & vite dev --host 0.0.0.0 & +# Use --insecure-http-parser to handle malformed HTTP responses from llama-server +# (some responses have both Content-Length and Transfer-Encoding headers) +storybook dev -p 6006 --ci & NODE_OPTIONS="--insecure-http-parser" vite dev --host 0.0.0.0 & # Wait for all background processes wait diff --git a/tools/server/webui/src/app.css b/tools/server/webui/src/app.css index 2ca1536409b..9705040a4de 100644 --- a/tools/server/webui/src/app.css +++ b/tools/server/webui/src/app.css @@ -29,7 +29,7 @@ --chart-3: oklch(0.398 0.07 227.392); --chart-4: oklch(0.828 0.189 84.429); --chart-5: oklch(0.769 0.188 70.08); - --sidebar: oklch(0.985 0 0); + --sidebar: oklch(0.987 0 0); --sidebar-foreground: oklch(0.145 0 0); --sidebar-primary: oklch(0.205 0 0); --sidebar-primary-foreground: oklch(0.985 0 0); @@ -66,7 +66,7 @@ --chart-3: oklch(0.769 0.188 70.08); --chart-4: oklch(0.627 0.265 303.9); --chart-5: oklch(0.645 0.246 16.439); - --sidebar: oklch(0.205 0 0); + --sidebar: oklch(0.19 0 0); --sidebar-foreground: oklch(0.985 0 0); --sidebar-primary: oklch(0.488 0.243 264.376); --sidebar-primary-foreground: oklch(0.985 0 0); diff --git a/tools/server/webui/src/app.d.ts b/tools/server/webui/src/app.d.ts index eb14d6fe451..71976936edd 100644 --- a/tools/server/webui/src/app.d.ts +++ b/tools/server/webui/src/app.d.ts @@ -4,27 +4,38 @@ // Import chat types from dedicated module import type { + // API types ApiChatCompletionRequest, ApiChatCompletionResponse, ApiChatCompletionStreamChunk, + ApiChatCompletionToolCall, + ApiChatCompletionToolCallDelta, ApiChatMessageData, ApiChatMessageContentPart, ApiContextSizeError, ApiErrorResponse, ApiLlamaCppServerProps, - ApiProcessingState -} from '$lib/types/api'; - -import type { + ApiModelDataEntry, + ApiModelListResponse, + ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse, + // Chat types + ChatAttachmentDisplayItem, + ChatAttachmentPreviewItem, ChatMessageType, ChatRole, ChatUploadedFile, ChatMessageSiblingInfo, ChatMessagePromptProgress, - ChatMessageTimings -} from '$lib/types/chat'; - -import type { + ChatMessageTimings, + // Database types DatabaseConversation, DatabaseMessage, DatabaseMessageExtra, @@ -32,14 +43,20 @@ import type { DatabaseMessageExtraImageFile, DatabaseMessageExtraTextFile, DatabaseMessageExtraPdfFile, - DatabaseMessageExtraLegacyContext -} from '$lib/types/database'; - -import type { + DatabaseMessageExtraLegacyContext, + ExportedConversation, + ExportedConversations, + // Model types + ModelModalities, + ModelOption, + // Settings types + SettingsChatServiceOptions, SettingsConfigValue, SettingsFieldConfig, SettingsConfigType -} from '$lib/types/settings'; +} from '$lib/types'; + +import { ServerRole, ServerModelStatus, ModelModality } from '$lib/enums'; declare global { // namespace App { @@ -51,22 +68,38 @@ declare global { // } export { + // API types ApiChatCompletionRequest, ApiChatCompletionResponse, ApiChatCompletionStreamChunk, + ApiChatCompletionToolCall, + ApiChatCompletionToolCallDelta, ApiChatMessageData, ApiChatMessageContentPart, ApiContextSizeError, ApiErrorResponse, ApiLlamaCppServerProps, + ApiModelDataEntry, + ApiModelListResponse, ApiProcessingState, - ChatMessageData, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse, + // Chat types + ChatAttachmentDisplayItem, + ChatAttachmentPreviewItem, ChatMessagePromptProgress, ChatMessageSiblingInfo, ChatMessageTimings, ChatMessageType, ChatRole, ChatUploadedFile, + // Database types DatabaseConversation, DatabaseMessage, DatabaseMessageExtra, @@ -75,9 +108,19 @@ declare global { DatabaseMessageExtraTextFile, DatabaseMessageExtraPdfFile, DatabaseMessageExtraLegacyContext, + ExportedConversation, + ExportedConversations, + // Enum types + ModelModality, + ServerRole, + ServerModelStatus, + // Model types + ModelModalities, + ModelOption, + // Settings types + SettingsChatServiceOptions, SettingsConfigValue, SettingsFieldConfig, - SettingsConfigType, - SettingsChatServiceOptions + SettingsConfigType }; } diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte index 212b1fe890a..b5fe3fa9c44 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreview.svelte @@ -1,9 +1,17 @@ -{#if type === MimeTypeText.PLAIN || type === FileTypeCategory.TEXT} +{#if isText} {#if readonly} + {#if limitToSingleRow} +
+ -
+
+ {#each displayItems as item (item.id)} + {#if item.isImage && item.preview} + openPreview(item, event)} + /> + {:else} + openPreview(item, event)} + /> + {/if} + {/each} +
+ + +
+ + {#if showViewAll} +
+ +
+ {/if} + {:else} +
{#each displayItems as item (item.id)} {#if item.isImage && item.preview} {:else} openPreview(item, event)} + attachment={item.attachment} + uploadedFile={item.uploadedFile} + onClick={(event?: MouseEvent) => openPreview(item, event)} /> {/if} {/each}
- - -
- - {#if showViewAll} -
- -
{/if} {/if} @@ -261,9 +225,9 @@ attachment={previewItem.attachment} preview={previewItem.preview} name={previewItem.name} - type={previewItem.type} size={previewItem.size} textContent={previewItem.textContent} + {activeModelId} /> {/if} @@ -275,4 +239,5 @@ {onFileRemove} imageHeight="h-64" {imageClass} + {activeModelId} /> diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte index ae82f7b7438..279b2e22273 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsViewAll.svelte @@ -4,9 +4,7 @@ ChatAttachmentThumbnailFile, DialogChatAttachmentPreview } from '$lib/components/app'; - import { FileTypeCategory } from '$lib/enums/files'; - import { getFileTypeCategory } from '$lib/utils/file-type'; - import type { ChatAttachmentDisplayItem, ChatAttachmentPreviewItem } from '$lib/types/chat'; + import { getAttachmentDisplayItems } from '$lib/utils'; interface Props { uploadedFiles?: ChatUploadedFile[]; @@ -16,6 +14,7 @@ imageHeight?: string; imageWidth?: string; imageClass?: string; + activeModelId?: string; } let { @@ -25,89 +24,17 @@ onFileRemove, imageHeight = 'h-24', imageWidth = 'w-auto', - imageClass = '' + imageClass = '', + activeModelId }: Props = $props(); let previewDialogOpen = $state(false); let previewItem = $state(null); - let displayItems = $derived(getDisplayItems()); + let displayItems = $derived(getAttachmentDisplayItems({ uploadedFiles, attachments })); let imageItems = $derived(displayItems.filter((item) => item.isImage)); let fileItems = $derived(displayItems.filter((item) => !item.isImage)); - function getDisplayItems(): ChatAttachmentDisplayItem[] { - const items: ChatAttachmentDisplayItem[] = []; - - for (const file of uploadedFiles) { - items.push({ - id: file.id, - name: file.name, - size: file.size, - preview: file.preview, - type: file.type, - isImage: getFileTypeCategory(file.type) === FileTypeCategory.IMAGE, - uploadedFile: file, - textContent: file.textContent - }); - } - - for (const [index, attachment] of attachments.entries()) { - if (attachment.type === 'imageFile') { - items.push({ - id: `attachment-${index}`, - name: attachment.name, - preview: attachment.base64Url, - type: 'image', - isImage: true, - attachment, - attachmentIndex: index - }); - } else if (attachment.type === 'textFile') { - items.push({ - id: `attachment-${index}`, - name: attachment.name, - type: 'text', - isImage: false, - attachment, - attachmentIndex: index, - textContent: attachment.content - }); - } else if (attachment.type === 'context') { - // Legacy format from old webui - treat as text file - items.push({ - id: `attachment-${index}`, - name: attachment.name, - type: 'text', - isImage: false, - attachment, - attachmentIndex: index, - textContent: attachment.content - }); - } else if (attachment.type === 'audioFile') { - items.push({ - id: `attachment-${index}`, - name: attachment.name, - type: attachment.mimeType || 'audio', - isImage: false, - attachment, - attachmentIndex: index - }); - } else if (attachment.type === 'pdfFile') { - items.push({ - id: `attachment-${index}`, - name: attachment.name, - type: 'application/pdf', - isImage: false, - attachment, - attachmentIndex: index, - textContent: attachment.content - }); - } - } - - return items.reverse(); - } - function openPreview(item: (typeof displayItems)[0], event?: Event) { if (event) { event.preventDefault(); @@ -119,7 +46,6 @@ attachment: item.attachment, preview: item.preview, name: item.name, - type: item.type, size: item.size, textContent: item.textContent }; @@ -138,12 +64,13 @@ class="cursor-pointer" id={item.id} name={item.name} - type={item.type} size={item.size} {readonly} onRemove={onFileRemove} textContent={item.textContent} - onClick={(event) => openPreview(item, event)} + attachment={item.attachment} + uploadedFile={item.uploadedFile} + onClick={(event?: MouseEvent) => openPreview(item, event)} /> {/each} @@ -183,8 +110,8 @@ attachment={previewItem.attachment} preview={previewItem.preview} name={previewItem.name} - type={previewItem.type} size={previewItem.size} textContent={previewItem.textContent} + {activeModelId} /> {/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte index 6c9a11849c3..97dccd8be8f 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte @@ -9,15 +9,13 @@ } from '$lib/components/app'; import { INPUT_CLASSES } from '$lib/constants/input-classes'; import { config } from '$lib/stores/settings.svelte'; - import { FileTypeCategory, MimeTypeApplication } from '$lib/enums/files'; - import { - AudioRecorder, - convertToWav, - createAudioFile, - isAudioRecordingSupported - } from '$lib/utils/audio-recording'; - import { onMount } from 'svelte'; + import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte'; + import { isRouterMode } from '$lib/stores/server.svelte'; + import { chatStore } from '$lib/stores/chat.svelte'; + import { activeMessages } from '$lib/stores/conversations.svelte'; import { + FileTypeCategory, + MimeTypeApplication, FileExtensionAudio, FileExtensionImage, FileExtensionPdf, @@ -25,8 +23,15 @@ MimeTypeAudio, MimeTypeImage, MimeTypeText - } from '$lib/enums/files'; - import { isIMEComposing } from '$lib/utils/is-ime-composing'; + } from '$lib/enums'; + import { isIMEComposing } from '$lib/utils'; + import { + AudioRecorder, + convertToWav, + createAudioFile, + isAudioRecordingSupported + } from '$lib/utils/browser-only'; + import { onMount } from 'svelte'; interface Props { class?: string; @@ -53,6 +58,7 @@ }: Props = $props(); let audioRecorder: AudioRecorder | undefined; + let chatFormActionsRef: ChatFormActions | undefined = $state(undefined); let currentConfig = $derived(config()); let fileAcceptString = $state(undefined); let fileInputRef: ChatFormFileInputInvisible | undefined = $state(undefined); @@ -63,18 +69,97 @@ let recordingSupported = $state(false); let textareaRef: ChatFormTextarea | undefined = $state(undefined); + // Check if model is selected (in ROUTER mode) + let conversationModel = $derived( + chatStore.getConversationModel(activeMessages() as DatabaseMessage[]) + ); + let isRouter = $derived(isRouterMode()); + let hasModelSelected = $derived(!isRouter || !!conversationModel || !!selectedModelId()); + + // Get active model ID for capability detection + let activeModelId = $derived.by(() => { + const options = modelOptions(); + + if (!isRouter) { + return options.length > 0 ? options[0].model : null; + } + + // First try user-selected model + const selectedId = selectedModelId(); + if (selectedId) { + const model = options.find((m) => m.id === selectedId); + if (model) return model.model; + } + + // Fallback to conversation model + if (conversationModel) { + const model = options.find((m) => m.model === conversationModel); + if (model) return model.model; + } + + return null; + }); + + // State for model props reactivity + let modelPropsVersion = $state(0); + + // Fetch model props when active model changes (works for both MODEL and ROUTER mode) + $effect(() => { + if (activeModelId) { + const cached = modelsStore.getModelProps(activeModelId); + if (!cached) { + modelsStore.fetchModelProps(activeModelId).then(() => { + modelPropsVersion++; + }); + } + } + }); + + // Derive modalities from active model (works for both MODEL and ROUTER mode) + let hasAudioModality = $derived.by(() => { + if (activeModelId) { + void modelPropsVersion; // Trigger reactivity on props fetch + return modelsStore.modelSupportsAudio(activeModelId); + } + + return false; + }); + + let hasVisionModality = $derived.by(() => { + if (activeModelId) { + void modelPropsVersion; // Trigger reactivity on props fetch + return modelsStore.modelSupportsVision(activeModelId); + } + + return false; + }); + + function checkModelSelected(): boolean { + if (!hasModelSelected) { + // Open the model selector + chatFormActionsRef?.openModelSelector(); + return false; + } + + return true; + } + function getAcceptStringForFileType(fileType: FileTypeCategory): string { switch (fileType) { case FileTypeCategory.IMAGE: return [...Object.values(FileExtensionImage), ...Object.values(MimeTypeImage)].join(','); + case FileTypeCategory.AUDIO: return [...Object.values(FileExtensionAudio), ...Object.values(MimeTypeAudio)].join(','); + case FileTypeCategory.PDF: return [...Object.values(FileExtensionPdf), ...Object.values(MimeTypeApplication)].join( ',' ); + case FileTypeCategory.TEXT: return [...Object.values(FileExtensionText), MimeTypeText.PLAIN].join(','); + default: return ''; } @@ -103,6 +188,9 @@ if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return; + // Check if model is selected first + if (!checkModelSelected()) return; + const messageToSend = message.trim(); const filesToSend = [...uploadedFiles]; @@ -131,6 +219,7 @@ if (files.length > 0) { event.preventDefault(); onFileUpload?.(files); + return; } @@ -154,6 +243,7 @@ async function handleMicClick() { if (!audioRecorder || !recordingSupported) { console.warn('Audio recording not supported'); + return; } @@ -187,6 +277,9 @@ event.preventDefault(); if ((!message.trim() && uploadedFiles.length === 0) || disabled || isLoading) return; + // Check if model is selected first + if (!checkModelSelected()) return; + const messageToSend = message.trim(); const filesToSend = [...uploadedFiles]; @@ -225,12 +318,16 @@
0 || uploadedFiles.length > 0} + hasText={message.trim().length > 0} {disabled} {isLoading} {isRecording} + {uploadedFiles} onFileUpload={handleFileUpload} onMicClick={handleMicClick} onStop={handleStop} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte index 71cb88e80dc..f4aa8a3a3f0 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte @@ -1,22 +1,29 @@
- + - {#if !supportsAudio()} + {#if !hasAudioModality}

Current model does not support audio

diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte new file mode 100644 index 00000000000..861cd182e82 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte @@ -0,0 +1,55 @@ + + +{#snippet submitButton(props = {})} + +{/snippet} + +{#if tooltipLabel} + + + {@render submitButton()} + + + +

{tooltipLabel}

+
+
+{:else} + {@render submitButton()} +{/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte index aa500423e51..8607e00c025 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte @@ -1,13 +1,20 @@ -
- +
+ - {#if currentConfig.modelSelectorEnabled} - - {/if} + {#if isLoading} + {:else if shouldShowRecordButton} + {:else} - - - + {/if}
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte index aa27763034f..52f3913b93b 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormFileInputInvisible.svelte @@ -1,9 +1,11 @@ - - - - - -
- {#if loading && options.length === 0 && !isMounted} -
- - Loading models… -
- {:else if options.length === 0} -

No models available.

- {:else} - {@const selectedOption = getDisplayOption()} - -
- - - {#if isOpen} -
-
0 - ? `${menuPosition.maxHeight}px` - : undefined} - > - {#each options as option (option.id)} - - {/each} -
-
- {/if} -
- {/if} - - {#if error} -

{error}

- {/if} -
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte index 7c0679bdcc1..19b763f55ef 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormTextarea.svelte @@ -1,5 +1,5 @@ + + + + + + diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte index c8b615e1613..8556cbef5b9 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessageUser.svelte @@ -5,7 +5,7 @@ import { ChatAttachmentsList, MarkdownContent } from '$lib/components/app'; import { INPUT_CLASSES } from '$lib/constants/input-classes'; import { config } from '$lib/stores/settings.svelte'; - import autoResizeTextarea from '$lib/utils/autoresize-textarea'; + import { autoResizeTextarea } from '$lib/utils'; import ChatMessageActions from './ChatMessageActions.svelte'; interface Props { diff --git a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte index ee147858fbf..6024f66c8bd 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessages.svelte @@ -1,17 +1,9 @@ -
+
{#each processingDetails as detail (detail)} {detail} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte b/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte deleted file mode 100644 index 8b8d916889f..00000000000 --- a/tools/server/webui/src/lib/components/app/chat/ChatScreen/ChatScreenWarning.svelte +++ /dev/null @@ -1,38 +0,0 @@ - - -
-
-
-
- -

- Server `/props` endpoint not available - using cached data -

-
- -
-
-
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte index 204f0d7551e..67df20439c6 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettings.svelte @@ -17,7 +17,7 @@ ChatSettingsFields } from '$lib/components/app'; import { ScrollArea } from '$lib/components/ui/scroll-area'; - import { config, updateMultipleConfig } from '$lib/stores/settings.svelte'; + import { config, settingsStore } from '$lib/stores/settings.svelte'; import { setMode } from 'mode-watcher'; import type { Component } from 'svelte'; @@ -79,19 +79,14 @@ title: 'Display', icon: Monitor, fields: [ - { - key: 'showThoughtInProgress', - label: 'Show thought in progress', - type: 'checkbox' - }, { key: 'showMessageStats', label: 'Show message generation statistics', type: 'checkbox' }, { - key: 'showTokensPerSecond', - label: 'Show tokens per second', + key: 'showThoughtInProgress', + label: 'Show thought in progress', type: 'checkbox' }, { @@ -100,18 +95,19 @@ type: 'checkbox' }, { - key: 'showModelInfo', - label: 'Show model information', - type: 'checkbox' + key: 'autoMicOnEmpty', + label: 'Show microphone on empty input', + type: 'checkbox', + isExperimental: true }, { - key: 'disableAutoScroll', - label: 'Disable automatic scroll', + key: 'renderUserContentAsMarkdown', + label: 'Render user content as Markdown', type: 'checkbox' }, { - key: 'renderUserContentAsMarkdown', - label: 'Render user content as Markdown', + key: 'disableAutoScroll', + label: 'Disable automatic scroll', type: 'checkbox' } ] @@ -232,11 +228,6 @@ title: 'Developer', icon: Code, fields: [ - { - key: 'modelSelectorEnabled', - label: 'Enable model selector', - type: 'checkbox' - }, { key: 'showToolCalls', label: 'Show tool call labels', @@ -342,7 +333,7 @@ } } - updateMultipleConfig(processedConfig); + settingsStore.updateMultipleConfig(processedConfig); onSave?.(); } diff --git a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte index 8834e3e3e1c..305687decbd 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatSettings/ChatSettingsFields.svelte @@ -6,8 +6,7 @@ import * as Select from '$lib/components/ui/select'; import { Textarea } from '$lib/components/ui/textarea'; import { SETTING_CONFIG_DEFAULT, SETTING_CONFIG_INFO } from '$lib/constants/settings-config'; - import { supportsVision } from '$lib/stores/server.svelte'; - import { getParameterInfo, resetParameterToServerDefault } from '$lib/stores/settings.svelte'; + import { settingsStore } from '$lib/stores/settings.svelte'; import { ParameterSyncService } from '$lib/services/parameter-sync'; import { ChatSettingsParameterSourceIndicator } from '$lib/components/app'; import type { Component } from 'svelte'; @@ -27,7 +26,7 @@ return null; } - return getParameterInfo(key); + return settingsStore.getParameterInfo(key); } @@ -82,7 +81,7 @@ + {/each} +
+
+ {/if} +
+ + + handleOpenChange(false)}>Cancel + + + diff --git a/tools/server/webui/src/lib/components/app/index.ts b/tools/server/webui/src/lib/components/app/index.ts index 54bd8d5aa35..cf4d7495e2f 100644 --- a/tools/server/webui/src/lib/components/app/index.ts +++ b/tools/server/webui/src/lib/components/app/index.ts @@ -10,20 +10,21 @@ export { default as ChatForm } from './chat/ChatForm/ChatForm.svelte'; export { default as ChatFormActionFileAttachments } from './chat/ChatForm/ChatFormActions/ChatFormActionFileAttachments.svelte'; export { default as ChatFormActionRecord } from './chat/ChatForm/ChatFormActions/ChatFormActionRecord.svelte'; export { default as ChatFormActions } from './chat/ChatForm/ChatFormActions/ChatFormActions.svelte'; +export { default as ChatFormActionSubmit } from './chat/ChatForm/ChatFormActions/ChatFormActionSubmit.svelte'; export { default as ChatFormFileInputInvisible } from './chat/ChatForm/ChatFormFileInputInvisible.svelte'; export { default as ChatFormHelperText } from './chat/ChatForm/ChatFormHelperText.svelte'; -export { default as ChatFormModelSelector } from './chat/ChatForm/ChatFormModelSelector.svelte'; export { default as ChatFormTextarea } from './chat/ChatForm/ChatFormTextarea.svelte'; export { default as ChatMessage } from './chat/ChatMessages/ChatMessage.svelte'; -export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte'; +export { default as ChatMessageActions } from './chat/ChatMessages/ChatMessageActions.svelte'; export { default as ChatMessageBranchingControls } from './chat/ChatMessages/ChatMessageBranchingControls.svelte'; +export { default as ChatMessageStatistics } from './chat/ChatMessages/ChatMessageStatistics.svelte'; export { default as ChatMessageThinkingBlock } from './chat/ChatMessages/ChatMessageThinkingBlock.svelte'; +export { default as ChatMessages } from './chat/ChatMessages/ChatMessages.svelte'; export { default as ChatScreen } from './chat/ChatScreen/ChatScreen.svelte'; export { default as ChatScreenHeader } from './chat/ChatScreen/ChatScreenHeader.svelte'; export { default as ChatScreenProcessingInfo } from './chat/ChatScreen/ChatScreenProcessingInfo.svelte'; -export { default as ChatScreenWarning } from './chat/ChatScreen/ChatScreenWarning.svelte'; export { default as ChatSettings } from './chat/ChatSettings/ChatSettings.svelte'; export { default as ChatSettingsFooter } from './chat/ChatSettings/ChatSettingsFooter.svelte'; @@ -45,19 +46,27 @@ export { default as DialogConfirmation } from './dialogs/DialogConfirmation.svel export { default as DialogConversationSelection } from './dialogs/DialogConversationSelection.svelte'; export { default as DialogConversationTitleUpdate } from './dialogs/DialogConversationTitleUpdate.svelte'; export { default as DialogEmptyFileAlert } from './dialogs/DialogEmptyFileAlert.svelte'; +export { default as DialogModelInformation } from './dialogs/DialogModelInformation.svelte'; +export { default as DialogModelNotAvailable } from './dialogs/DialogModelNotAvailable.svelte'; // Miscellanous export { default as ActionButton } from './misc/ActionButton.svelte'; export { default as ActionDropdown } from './misc/ActionDropdown.svelte'; +export { default as BadgeChatStatistic } from './misc/BadgeChatStatistic.svelte'; +export { default as BadgeInfo } from './misc/BadgeInfo.svelte'; +export { default as ModelBadge } from './models/ModelBadge.svelte'; +export { default as BadgeModality } from './misc/BadgeModality.svelte'; export { default as ConversationSelection } from './misc/ConversationSelection.svelte'; +export { default as CopyToClipboardIcon } from './misc/CopyToClipboardIcon.svelte'; export { default as KeyboardShortcutInfo } from './misc/KeyboardShortcutInfo.svelte'; export { default as MarkdownContent } from './misc/MarkdownContent.svelte'; export { default as RemoveButton } from './misc/RemoveButton.svelte'; +export { default as SyntaxHighlightedCode } from './misc/SyntaxHighlightedCode.svelte'; +export { default as ModelsSelector } from './models/ModelsSelector.svelte'; // Server export { default as ServerStatus } from './server/ServerStatus.svelte'; export { default as ServerErrorSplash } from './server/ServerErrorSplash.svelte'; export { default as ServerLoadingSplash } from './server/ServerLoadingSplash.svelte'; -export { default as ServerInfo } from './server/ServerInfo.svelte'; diff --git a/tools/server/webui/src/lib/components/app/misc/ActionButton.svelte b/tools/server/webui/src/lib/components/app/misc/ActionButton.svelte index 11c4679a6eb..411a8b6094c 100644 --- a/tools/server/webui/src/lib/components/app/misc/ActionButton.svelte +++ b/tools/server/webui/src/lib/components/app/misc/ActionButton.svelte @@ -1,7 +1,6 @@ - + diff --git a/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte b/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte new file mode 100644 index 00000000000..a0d5e863c2a --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/BadgeModality.svelte @@ -0,0 +1,39 @@ + + +{#each displayableModalities as modality, index (index)} + {@const IconComponent = MODALITY_ICONS[modality]} + {@const label = MODALITY_LABELS[modality]} + + + {#if IconComponent} + + {/if} + + {label} + +{/each} diff --git a/tools/server/webui/src/lib/components/app/misc/CopyToClipboardIcon.svelte b/tools/server/webui/src/lib/components/app/misc/CopyToClipboardIcon.svelte new file mode 100644 index 00000000000..bf6cd4fb28c --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/CopyToClipboardIcon.svelte @@ -0,0 +1,18 @@ + + + canCopy && copyToClipboard(text)} +/> diff --git a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte index 176a98b212f..99d6e21e131 100644 --- a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte +++ b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte @@ -7,9 +7,8 @@ import remarkRehype from 'remark-rehype'; import rehypeKatex from 'rehype-katex'; import rehypeStringify from 'rehype-stringify'; - import { copyCodeToClipboard } from '$lib/utils/copy'; + import { copyCodeToClipboard, preprocessLaTeX } from '$lib/utils'; import { rehypeRestoreTableHtml } from '$lib/markdown/table-html-restorer'; - import { preprocessLaTeX } from '$lib/utils/latex-protection'; import { browser } from '$app/environment'; import '$styles/katex-custom.scss'; diff --git a/tools/server/webui/src/lib/components/app/misc/SyntaxHighlightedCode.svelte b/tools/server/webui/src/lib/components/app/misc/SyntaxHighlightedCode.svelte new file mode 100644 index 00000000000..f36a9a20b9a --- /dev/null +++ b/tools/server/webui/src/lib/components/app/misc/SyntaxHighlightedCode.svelte @@ -0,0 +1,96 @@ + + +
+
{@html highlightedHtml}
+
+ + diff --git a/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte b/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte new file mode 100644 index 00000000000..bea1bf6e3f9 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/models/ModelBadge.svelte @@ -0,0 +1,56 @@ + + +{#snippet badgeContent()} + + {#snippet icon()} + + {/snippet} + + {model} + + {#if showCopyIcon} + + {/if} + +{/snippet} + +{#if model && isModelMode} + {#if showTooltip} + + + {@render badgeContent()} + + + + {onclick ? 'Click for model details' : model} + + + {:else} + {@render badgeContent()} + {/if} +{/if} diff --git a/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte b/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte new file mode 100644 index 00000000000..c4331e92f13 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/models/ModelsSelector.svelte @@ -0,0 +1,596 @@ + + + + + +
+ {#if loading && options.length === 0 && isRouter} +
+ + Loading models… +
+ {:else if options.length === 0 && isRouter} +

No models available.

+ {:else} + {@const selectedOption = getDisplayOption()} + +
+ + + {#if isOpen && isRouter} +
+
0 + ? `${menuPosition.maxHeight}px` + : undefined} + > + {#if !isCurrentModelInCache() && currentModel} + + +
+ {/if} + {#each options as option (option.id)} + {@const status = getModelStatus(option.model)} + {@const isLoaded = status === ServerModelStatus.LOADED} + {@const isLoading = status === ServerModelStatus.LOADING} + {@const isSelected = currentModel === option.model || activeId === option.id} + {@const isCompatible = isModelCompatible(option)} + {@const missingModalities = getMissingModalities(option)} +
isCompatible && handleSelect(option.id)} + onkeydown={(e) => { + if (isCompatible && (e.key === 'Enter' || e.key === ' ')) { + e.preventDefault(); + handleSelect(option.id); + } + }} + > + {option.model} + + {#if missingModalities} + + {#if missingModalities.vision} + + + + + +

No vision support

+
+
+ {/if} + {#if missingModalities.audio} + + + + + +

No audio support

+
+
+ {/if} +
+ {/if} + + {#if isLoading} + + + + + +

Loading model...

+
+
+ {:else if isLoaded} + + + + + +

Unload model

+
+
+ {:else} + + {/if} +
+ {/each} +
+
+ {/if} +
+ {/if} +
+ +{#if showModelDialog && !isRouter} + +{/if} diff --git a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte index af142e32aa1..39613f200cb 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerErrorSplash.svelte @@ -5,7 +5,7 @@ import { Input } from '$lib/components/ui/input'; import Label from '$lib/components/ui/label/label.svelte'; import { serverStore, serverLoading } from '$lib/stores/server.svelte'; - import { config, updateConfig } from '$lib/stores/settings.svelte'; + import { config, settingsStore } from '$lib/stores/settings.svelte'; import { fade, fly, scale } from 'svelte/transition'; interface Props { @@ -42,7 +42,7 @@ if (onRetry) { onRetry(); } else { - serverStore.fetchServerProps(); + serverStore.fetch(); } } @@ -61,7 +61,7 @@ try { // Update the API key in settings first - updateConfig('apiKey', apiKeyInput.trim()); + settingsStore.updateConfig('apiKey', apiKeyInput.trim()); // Test the API key by making a real request to the server const response = await fetch('./props', { diff --git a/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte b/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte deleted file mode 100644 index 9a43e333c49..00000000000 --- a/tools/server/webui/src/lib/components/app/server/ServerInfo.svelte +++ /dev/null @@ -1,43 +0,0 @@ - - -{#if props} -
- {#if model} - - - - {model} - - {/if} - -
- {#if props.default_generation_settings.n_ctx} - - ctx: {props.default_generation_settings.n_ctx.toLocaleString()} - - {/if} - - {#if modalities.length > 0} - {#each modalities as modality (modality)} - - {#if modality === 'vision'} - - {:else if modality === 'audio'} - - {/if} - - {modality} - - {/each} - {/if} -
-
-{/if} diff --git a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte index f04c954d704..d9f6d4a32a4 100644 --- a/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte +++ b/tools/server/webui/src/lib/components/app/server/ServerStatus.svelte @@ -2,7 +2,8 @@ import { AlertTriangle, Server } from '@lucide/svelte'; import { Badge } from '$lib/components/ui/badge'; import { Button } from '$lib/components/ui/button'; - import { serverProps, serverLoading, serverError, modelName } from '$lib/stores/server.svelte'; + import { serverProps, serverLoading, serverError } from '$lib/stores/server.svelte'; + import { singleModelName } from '$lib/stores/models.svelte'; interface Props { class?: string; @@ -13,7 +14,7 @@ let error = $derived(serverError()); let loading = $derived(serverLoading()); - let model = $derived(modelName()); + let model = $derived(singleModelName()); let serverData = $derived(serverProps()); function getStatusColor() { diff --git a/tools/server/webui/src/lib/components/ui/alert/alert-description.svelte b/tools/server/webui/src/lib/components/ui/alert/alert-description.svelte new file mode 100644 index 00000000000..440d0069d3b --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/alert/alert-description.svelte @@ -0,0 +1,23 @@ + + +
+ {@render children?.()} +
diff --git a/tools/server/webui/src/lib/components/ui/alert/alert-title.svelte b/tools/server/webui/src/lib/components/ui/alert/alert-title.svelte new file mode 100644 index 00000000000..0721aebf12a --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/alert/alert-title.svelte @@ -0,0 +1,20 @@ + + +
+ {@render children?.()} +
diff --git a/tools/server/webui/src/lib/components/ui/alert/alert.svelte b/tools/server/webui/src/lib/components/ui/alert/alert.svelte new file mode 100644 index 00000000000..7d79e4bc0ed --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/alert/alert.svelte @@ -0,0 +1,44 @@ + + + + + diff --git a/tools/server/webui/src/lib/components/ui/alert/index.ts b/tools/server/webui/src/lib/components/ui/alert/index.ts new file mode 100644 index 00000000000..5e0f854da6f --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/alert/index.ts @@ -0,0 +1,14 @@ +import Root from './alert.svelte'; +import Description from './alert-description.svelte'; +import Title from './alert-title.svelte'; +export { alertVariants, type AlertVariant } from './alert.svelte'; + +export { + Root, + Description, + Title, + // + Root as Alert, + Description as AlertDescription, + Title as AlertTitle +}; diff --git a/tools/server/webui/src/lib/components/ui/sidebar/sidebar-provider.svelte b/tools/server/webui/src/lib/components/ui/sidebar/sidebar-provider.svelte index ed90ea84ebf..364235a4994 100644 --- a/tools/server/webui/src/lib/components/ui/sidebar/sidebar-provider.svelte +++ b/tools/server/webui/src/lib/components/ui/sidebar/sidebar-provider.svelte @@ -1,5 +1,4 @@ + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-caption.svelte b/tools/server/webui/src/lib/components/ui/table/table-caption.svelte new file mode 100644 index 00000000000..0fdcc6439c1 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-caption.svelte @@ -0,0 +1,20 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-cell.svelte b/tools/server/webui/src/lib/components/ui/table/table-cell.svelte new file mode 100644 index 00000000000..4506fdfc5bc --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-cell.svelte @@ -0,0 +1,23 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-footer.svelte b/tools/server/webui/src/lib/components/ui/table/table-footer.svelte new file mode 100644 index 00000000000..77e4a64c08b --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-footer.svelte @@ -0,0 +1,20 @@ + + +tr]:last:border-b-0', className)} + {...restProps} +> + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-head.svelte b/tools/server/webui/src/lib/components/ui/table/table-head.svelte new file mode 100644 index 00000000000..c1c57ad4434 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-head.svelte @@ -0,0 +1,23 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-header.svelte b/tools/server/webui/src/lib/components/ui/table/table-header.svelte new file mode 100644 index 00000000000..eb366739b39 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-header.svelte @@ -0,0 +1,20 @@ + + + + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table-row.svelte b/tools/server/webui/src/lib/components/ui/table/table-row.svelte new file mode 100644 index 00000000000..4131d3660a4 --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table-row.svelte @@ -0,0 +1,23 @@ + + +svelte-css-wrapper]:[&>th,td]:bg-muted/50', + className + )} + {...restProps} +> + {@render children?.()} + diff --git a/tools/server/webui/src/lib/components/ui/table/table.svelte b/tools/server/webui/src/lib/components/ui/table/table.svelte new file mode 100644 index 00000000000..c11a6a6c4ba --- /dev/null +++ b/tools/server/webui/src/lib/components/ui/table/table.svelte @@ -0,0 +1,22 @@ + + +
+ + {@render children?.()} +
+
diff --git a/tools/server/webui/src/lib/constants/debounce.ts b/tools/server/webui/src/lib/constants/debounce.ts deleted file mode 100644 index 7394669f3ac..00000000000 --- a/tools/server/webui/src/lib/constants/debounce.ts +++ /dev/null @@ -1 +0,0 @@ -export const SLOTS_DEBOUNCE_INTERVAL = 100; diff --git a/tools/server/webui/src/lib/constants/default-context.ts b/tools/server/webui/src/lib/constants/default-context.ts new file mode 100644 index 00000000000..78f31116e36 --- /dev/null +++ b/tools/server/webui/src/lib/constants/default-context.ts @@ -0,0 +1 @@ +export const DEFAULT_CONTEXT = 4096; diff --git a/tools/server/webui/src/lib/constants/floating-ui-constraints.ts b/tools/server/webui/src/lib/constants/floating-ui-constraints.ts new file mode 100644 index 00000000000..c95d3f18417 --- /dev/null +++ b/tools/server/webui/src/lib/constants/floating-ui-constraints.ts @@ -0,0 +1,3 @@ +export const VIEWPORT_GUTTER = 8; +export const MENU_OFFSET = 6; +export const MENU_MAX_WIDTH = 320; diff --git a/tools/server/webui/src/lib/constants/icons.ts b/tools/server/webui/src/lib/constants/icons.ts new file mode 100644 index 00000000000..1e88ab5b3a1 --- /dev/null +++ b/tools/server/webui/src/lib/constants/icons.ts @@ -0,0 +1,32 @@ +/** + * Icon mappings for file types and model modalities + * Centralized configuration to ensure consistent icon usage across the app + */ + +import { + File as FileIcon, + FileText as FileTextIcon, + Image as ImageIcon, + Eye as VisionIcon, + Mic as AudioIcon +} from '@lucide/svelte'; +import { FileTypeCategory, ModelModality } from '$lib/enums'; + +export const FILE_TYPE_ICONS = { + [FileTypeCategory.IMAGE]: ImageIcon, + [FileTypeCategory.AUDIO]: AudioIcon, + [FileTypeCategory.TEXT]: FileTextIcon, + [FileTypeCategory.PDF]: FileIcon +} as const; + +export const DEFAULT_FILE_ICON = FileIcon; + +export const MODALITY_ICONS = { + [ModelModality.VISION]: VisionIcon, + [ModelModality.AUDIO]: AudioIcon +} as const; + +export const MODALITY_LABELS = { + [ModelModality.VISION]: 'Vision', + [ModelModality.AUDIO]: 'Audio' +} as const; diff --git a/tools/server/webui/src/lib/constants/localstorage-keys.ts b/tools/server/webui/src/lib/constants/localstorage-keys.ts index 8bdc5f33c38..919b6ea06d3 100644 --- a/tools/server/webui/src/lib/constants/localstorage-keys.ts +++ b/tools/server/webui/src/lib/constants/localstorage-keys.ts @@ -1,2 +1,2 @@ -export const SERVER_PROPS_LOCALSTORAGE_KEY = 'LlamaCppWebui.serverProps'; -export const SELECTED_MODEL_LOCALSTORAGE_KEY = 'LlamaCppWebui.selectedModel'; +export const CONFIG_LOCALSTORAGE_KEY = 'LlamaCppWebui.config'; +export const USER_OVERRIDES_LOCALSTORAGE_KEY = 'LlamaCppWebui.userOverrides'; diff --git a/tools/server/webui/src/lib/constants/settings-config.ts b/tools/server/webui/src/lib/constants/settings-config.ts index 6783757e6b4..1fc35b48c41 100644 --- a/tools/server/webui/src/lib/constants/settings-config.ts +++ b/tools/server/webui/src/lib/constants/settings-config.ts @@ -4,7 +4,6 @@ export const SETTING_CONFIG_DEFAULT: Record = apiKey: '', systemMessage: '', theme: 'system', - showTokensPerSecond: false, showThoughtInProgress: false, showToolCalls: false, disableReasoningFormat: false, @@ -13,10 +12,9 @@ export const SETTING_CONFIG_DEFAULT: Record = askForTitleConfirmation: false, pasteLongTextToFileLen: 2500, pdfAsImage: false, - showModelInfo: false, disableAutoScroll: false, renderUserContentAsMarkdown: false, - modelSelectorEnabled: false, + autoMicOnEmpty: false, // make sure these default values are in sync with `common.h` samplers: 'top_k;typ_p;top_p;min_p;temperature', temperature: 0.8, @@ -81,7 +79,6 @@ export const SETTING_CONFIG_INFO: Record = { 'DRY sampling reduces repetition in generated text even across long contexts. This parameter sets DRY penalty for the last n tokens.', max_tokens: 'The maximum number of token per output. Use -1 for infinite (no limit).', custom: 'Custom JSON parameters to send to the API. Must be valid JSON format.', - showTokensPerSecond: 'Display generation speed in tokens per second during streaming.', showThoughtInProgress: 'Expand thought process by default when generating messages.', showToolCalls: 'Display tool call labels and payloads from Harmony-compatible delta.tool_calls data below assistant messages.', @@ -92,13 +89,13 @@ export const SETTING_CONFIG_INFO: Record = { 'Display generation statistics (tokens/second, token count, duration) below each assistant message.', askForTitleConfirmation: 'Ask for confirmation before automatically changing conversation title when editing the first message.', - pdfAsImage: 'Parse PDF as image instead of text (requires vision-capable model).', - showModelInfo: 'Display the model name used to generate each message below the message content.', + pdfAsImage: + 'Parse PDF as image instead of text. Automatically falls back to text processing for non-vision models.', disableAutoScroll: 'Disable automatic scrolling while messages stream so you can control the viewport position manually.', renderUserContentAsMarkdown: 'Render user messages using markdown formatting in the chat.', - modelSelectorEnabled: - 'Enable the model selector in the chat input to choose the inference model. Sends the associated model field in API requests.', + autoMicOnEmpty: + 'Automatically show microphone button instead of send button when textarea is empty for models with audio modality support.', pyInterpreterEnabled: 'Enable Python interpreter using Pyodide. Allows running Python code in markdown code blocks.', enableContinueGeneration: diff --git a/tools/server/webui/src/lib/constants/supported-file-types.ts b/tools/server/webui/src/lib/constants/supported-file-types.ts index 1258c3a059c..93bbab5d399 100644 --- a/tools/server/webui/src/lib/constants/supported-file-types.ts +++ b/tools/server/webui/src/lib/constants/supported-file-types.ts @@ -16,7 +16,7 @@ import { MimeTypeImage, MimeTypeApplication, MimeTypeText -} from '$lib/enums/files'; +} from '$lib/enums'; // File type configuration using enums export const AUDIO_FILE_TYPES = { diff --git a/tools/server/webui/src/lib/enums/attachment.ts b/tools/server/webui/src/lib/enums/attachment.ts new file mode 100644 index 00000000000..7c7d0da9946 --- /dev/null +++ b/tools/server/webui/src/lib/enums/attachment.ts @@ -0,0 +1,10 @@ +/** + * Attachment type enum for database message extras + */ +export enum AttachmentType { + AUDIO = 'AUDIO', + IMAGE = 'IMAGE', + PDF = 'PDF', + TEXT = 'TEXT', + LEGACY_CONTEXT = 'context' // Legacy attachment type for backward compatibility +} diff --git a/tools/server/webui/src/lib/enums/files.ts b/tools/server/webui/src/lib/enums/files.ts index 3f725da2273..45b0feea169 100644 --- a/tools/server/webui/src/lib/enums/files.ts +++ b/tools/server/webui/src/lib/enums/files.ts @@ -32,10 +32,10 @@ export enum FileTypePdf { export enum FileTypeText { PLAIN_TEXT = 'plainText', - MARKDOWN = 'markdown', + MARKDOWN = 'md', ASCIIDOC = 'asciidoc', - JAVASCRIPT = 'javascript', - TYPESCRIPT = 'typescript', + JAVASCRIPT = 'js', + TYPESCRIPT = 'ts', JSX = 'jsx', TSX = 'tsx', CSS = 'css', diff --git a/tools/server/webui/src/lib/enums/index.ts b/tools/server/webui/src/lib/enums/index.ts new file mode 100644 index 00000000000..d9e90014705 --- /dev/null +++ b/tools/server/webui/src/lib/enums/index.ts @@ -0,0 +1,21 @@ +export { AttachmentType } from './attachment'; + +export { + FileTypeCategory, + FileTypeImage, + FileTypeAudio, + FileTypePdf, + FileTypeText, + FileExtensionImage, + FileExtensionAudio, + FileExtensionPdf, + FileExtensionText, + MimeTypeApplication, + MimeTypeAudio, + MimeTypeImage, + MimeTypeText +} from './files'; + +export { ModelModality } from './model'; + +export { ServerRole, ServerModelStatus } from './server'; diff --git a/tools/server/webui/src/lib/enums/model.ts b/tools/server/webui/src/lib/enums/model.ts new file mode 100644 index 00000000000..7729ecfeabb --- /dev/null +++ b/tools/server/webui/src/lib/enums/model.ts @@ -0,0 +1,5 @@ +export enum ModelModality { + TEXT = 'TEXT', + AUDIO = 'AUDIO', + VISION = 'VISION' +} diff --git a/tools/server/webui/src/lib/enums/server.ts b/tools/server/webui/src/lib/enums/server.ts new file mode 100644 index 00000000000..7f30eab2cf6 --- /dev/null +++ b/tools/server/webui/src/lib/enums/server.ts @@ -0,0 +1,20 @@ +/** + * Server role enum - used for single/multi-model mode + */ +export enum ServerRole { + /** Single model mode - server running with a specific model loaded */ + MODEL = 'model', + /** Router mode - server managing multiple model instances */ + ROUTER = 'router' +} + +/** + * Model status enum - matches tools/server/server-models.h from C++ server + * Used as the `value` field in the status object from /models endpoint + */ +export enum ServerModelStatus { + UNLOADED = 'unloaded', + LOADING = 'loading', + LOADED = 'loaded', + FAILED = 'failed' +} diff --git a/tools/server/webui/src/lib/hooks/use-model-change-validation.svelte.ts b/tools/server/webui/src/lib/hooks/use-model-change-validation.svelte.ts new file mode 100644 index 00000000000..bb666159c98 --- /dev/null +++ b/tools/server/webui/src/lib/hooks/use-model-change-validation.svelte.ts @@ -0,0 +1,118 @@ +import { modelsStore } from '$lib/stores/models.svelte'; +import { isRouterMode } from '$lib/stores/server.svelte'; +import { toast } from 'svelte-sonner'; + +interface UseModelChangeValidationOptions { + /** + * Function to get required modalities for validation. + * For ChatForm: () => usedModalities() - all messages + * For ChatMessageAssistant: () => getModalitiesUpToMessage(messageId) - messages before + */ + getRequiredModalities: () => ModelModalities; + + /** + * Optional callback to execute after successful validation. + * For ChatForm: undefined - just select model + * For ChatMessageAssistant: (modelName) => onRegenerate(modelName) + */ + onSuccess?: (modelName: string) => void; + + /** + * Optional callback for rollback on validation failure. + * For ChatForm: (previousId) => selectModelById(previousId) + * For ChatMessageAssistant: undefined - no rollback needed + */ + onValidationFailure?: (previousModelId: string | null) => Promise; +} + +export function useModelChangeValidation(options: UseModelChangeValidationOptions) { + const { getRequiredModalities, onSuccess, onValidationFailure } = options; + + let previousSelectedModelId: string | null = null; + const isRouter = $derived(isRouterMode()); + + async function handleModelChange(modelId: string, modelName: string): Promise { + try { + // Store previous selection for potential rollback + if (onValidationFailure) { + previousSelectedModelId = modelsStore.selectedModelId; + } + + // Load model if not already loaded (router mode only) + let hasLoadedModel = false; + const isModelLoadedBefore = modelsStore.isModelLoaded(modelName); + + if (isRouter && !isModelLoadedBefore) { + try { + await modelsStore.loadModel(modelName); + hasLoadedModel = true; + } catch { + toast.error(`Failed to load model "${modelName}"`); + return false; + } + } + + // Fetch model props to validate modalities + const props = await modelsStore.fetchModelProps(modelName); + + if (props?.modalities) { + const requiredModalities = getRequiredModalities(); + + // Check if model supports required modalities + const missingModalities: string[] = []; + if (requiredModalities.vision && !props.modalities.vision) { + missingModalities.push('vision'); + } + if (requiredModalities.audio && !props.modalities.audio) { + missingModalities.push('audio'); + } + + if (missingModalities.length > 0) { + toast.error( + `Model "${modelName}" doesn't support required modalities: ${missingModalities.join(', ')}. Please select a different model.` + ); + + // Unload the model if we just loaded it + if (isRouter && hasLoadedModel) { + try { + await modelsStore.unloadModel(modelName); + } catch (error) { + console.error('Failed to unload incompatible model:', error); + } + } + + // Execute rollback callback if provided + if (onValidationFailure && previousSelectedModelId) { + await onValidationFailure(previousSelectedModelId); + } + + return false; + } + } + + // Select the model (validation passed) + await modelsStore.selectModelById(modelId); + + // Execute success callback if provided + if (onSuccess) { + onSuccess(modelName); + } + + return true; + } catch (error) { + console.error('Failed to change model:', error); + toast.error('Failed to validate model capabilities'); + + // Execute rollback callback on error if provided + if (onValidationFailure && previousSelectedModelId) { + await onValidationFailure(previousSelectedModelId); + } + + return false; + } + } + + return { + handleModelChange + }; +} diff --git a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts index e8c3aa1ae8a..a861f23b480 100644 --- a/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts +++ b/tools/server/webui/src/lib/hooks/use-processing-state.svelte.ts @@ -1,4 +1,4 @@ -import { slotsService } from '$lib/services'; +import { activeProcessingState } from '$lib/stores/chat.svelte'; import { config } from '$lib/stores/settings.svelte'; export interface UseProcessingStateReturn { @@ -6,7 +6,7 @@ export interface UseProcessingStateReturn { getProcessingDetails(): string[]; getProcessingMessage(): string; shouldShowDetails(): boolean; - startMonitoring(): Promise; + startMonitoring(): void; stopMonitoring(): void; } @@ -14,92 +14,71 @@ export interface UseProcessingStateReturn { * useProcessingState - Reactive processing state hook * * This hook provides reactive access to the processing state of the server. - * It subscribes to timing data updates from the slots service and provides + * It directly reads from chatStore's reactive state and provides * formatted processing details for UI display. * * **Features:** - * - Real-time processing state monitoring + * - Real-time processing state via direct reactive state binding * - Context and output token tracking * - Tokens per second calculation - * - Graceful degradation when slots endpoint unavailable - * - Automatic cleanup on component unmount + * - Automatic updates when streaming data arrives + * - Supports multiple concurrent conversations * * @returns Hook interface with processing state and control methods */ export function useProcessingState(): UseProcessingStateReturn { let isMonitoring = $state(false); - let processingState = $state(null); let lastKnownState = $state(null); - let unsubscribe: (() => void) | null = null; - async function startMonitoring(): Promise { - if (isMonitoring) return; + // Derive processing state reactively from chatStore's direct state + const processingState = $derived.by(() => { + if (!isMonitoring) { + return lastKnownState; + } + // Read directly from the reactive state export + return activeProcessingState(); + }); + + // Track last known state for keepStatsVisible functionality + $effect(() => { + if (processingState && isMonitoring) { + lastKnownState = processingState; + } + }); + function startMonitoring(): void { + if (isMonitoring) return; isMonitoring = true; - - unsubscribe = slotsService.subscribe((state) => { - processingState = state; - if (state) { - lastKnownState = state; - } else { - lastKnownState = null; - } - }); - - try { - const currentState = await slotsService.getCurrentState(); - - if (currentState) { - processingState = currentState; - lastKnownState = currentState; - } - - if (slotsService.isStreaming()) { - slotsService.startStreaming(); - } - } catch (error) { - console.warn('Failed to start slots monitoring:', error); - // Continue without slots monitoring - graceful degradation - } } function stopMonitoring(): void { if (!isMonitoring) return; - isMonitoring = false; - // Only clear processing state if keepStatsVisible is disabled - // This preserves the last known state for display when stats should remain visible + // Only clear last known state if keepStatsVisible is disabled const currentConfig = config(); if (!currentConfig.keepStatsVisible) { - processingState = null; - } else if (lastKnownState) { - // Keep the last known state visible when keepStatsVisible is enabled - processingState = lastKnownState; - } - - if (unsubscribe) { - unsubscribe(); - unsubscribe = null; + lastKnownState = null; } } function getProcessingMessage(): string { - if (!processingState) { + const state = processingState; + if (!state) { return 'Processing...'; } - switch (processingState.status) { + switch (state.status) { case 'initializing': return 'Initializing...'; case 'preparing': - if (processingState.progressPercent !== undefined) { - return `Processing (${processingState.progressPercent}%)`; + if (state.progressPercent !== undefined) { + return `Processing (${state.progressPercent}%)`; } return 'Preparing response...'; case 'generating': - if (processingState.tokensDecoded > 0) { - return `Generating... (${processingState.tokensDecoded} tokens)`; + if (state.tokensDecoded > 0) { + return `Generating... (${state.tokensDecoded} tokens)`; } return 'Generating...'; default: @@ -115,7 +94,6 @@ export function useProcessingState(): UseProcessingStateReturn { } const details: string[] = []; - const currentConfig = config(); // Get fresh config each time // Always show context info when we have valid data if (stateToUse.contextUsed >= 0 && stateToUse.contextTotal > 0) { @@ -141,11 +119,7 @@ export function useProcessingState(): UseProcessingStateReturn { } } - if ( - currentConfig.showTokensPerSecond && - stateToUse.tokensPerSecond && - stateToUse.tokensPerSecond > 0 - ) { + if (stateToUse.tokensPerSecond && stateToUse.tokensPerSecond > 0) { details.push(`${stateToUse.tokensPerSecond.toFixed(1)} tokens/sec`); } @@ -157,7 +131,8 @@ export function useProcessingState(): UseProcessingStateReturn { } function shouldShowDetails(): boolean { - return processingState !== null && processingState.status !== 'idle'; + const state = processingState; + return state !== null && state.status !== 'idle'; } return { diff --git a/tools/server/webui/src/lib/services/chat.ts b/tools/server/webui/src/lib/services/chat.ts index aa83910b27f..a14832ebd4b 100644 --- a/tools/server/webui/src/lib/services/chat.ts +++ b/tools/server/webui/src/lib/services/chat.ts @@ -1,55 +1,42 @@ -import { config } from '$lib/stores/settings.svelte'; -import { selectedModelName } from '$lib/stores/models.svelte'; -import { slotsService } from './slots'; -import type { - ApiChatCompletionRequest, - ApiChatCompletionResponse, - ApiChatCompletionStreamChunk, - ApiChatCompletionToolCall, - ApiChatCompletionToolCallDelta, - ApiChatMessageData -} from '$lib/types/api'; -import type { - DatabaseMessage, - DatabaseMessageExtra, - DatabaseMessageExtraAudioFile, - DatabaseMessageExtraImageFile, - DatabaseMessageExtraLegacyContext, - DatabaseMessageExtraPdfFile, - DatabaseMessageExtraTextFile -} from '$lib/types/database'; -import type { ChatMessagePromptProgress, ChatMessageTimings } from '$lib/types/chat'; -import type { SettingsChatServiceOptions } from '$lib/types/settings'; +import { getJsonHeaders } from '$lib/utils'; +import { AttachmentType } from '$lib/enums'; + /** - * ChatService - Low-level API communication layer for llama.cpp server interactions + * ChatService - Low-level API communication layer for Chat Completions + * + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API. This service + * handles the real-time communication with the AI backend - sending messages, receiving + * streaming responses, and managing request lifecycles. "Chat" is ephemeral and runtime-focused. + * - **Conversation**: The persistent database entity storing all messages and metadata. + * Managed by ConversationsService/Store, conversations persist across sessions. * - * This service handles direct communication with the llama.cpp server's chat completion API. + * This service handles direct communication with the llama-server's Chat Completions API. * It provides the network layer abstraction for AI model interactions while remaining * stateless and focused purely on API communication. * - * **Architecture & Relationship with ChatStore:** + * **Architecture & Relationships:** * - **ChatService** (this class): Stateless API communication layer - * - Handles HTTP requests/responses with llama.cpp server + * - Handles HTTP requests/responses with the llama-server * - Manages streaming and non-streaming response parsing - * - Provides request abortion capabilities + * - Provides per-conversation request abortion capabilities * - Converts database messages to API format * - Handles error translation for server responses * - * - **ChatStore**: Stateful orchestration and UI state management - * - Uses ChatService for all AI model communication - * - Manages conversation state, message history, and UI reactivity - * - Coordinates with DatabaseStore for persistence - * - Handles complex workflows like branching and regeneration + * - **chatStore**: Uses ChatService for all AI model communication + * - **conversationsStore**: Provides message context for API requests * * **Key Responsibilities:** * - Message format conversion (DatabaseMessage → API format) * - Streaming response handling with real-time callbacks * - Reasoning content extraction and processing * - File attachment processing (images, PDFs, audio, text) - * - Request lifecycle management (abort, cleanup) + * - Request lifecycle management (abort via AbortSignal) */ export class ChatService { - private abortControllers: Map = new Map(); + // ───────────────────────────────────────────────────────────────────────────── + // Messaging + // ───────────────────────────────────────────────────────────────────────────── /** * Sends a chat completion request to the llama.cpp server. @@ -61,10 +48,11 @@ export class ChatService { * @returns {Promise} that resolves to the complete response string (non-streaming) or void (streaming) * @throws {Error} if the request fails or is aborted */ - async sendMessage( + static async sendMessage( messages: ApiChatMessageData[] | (DatabaseMessage & { extra?: DatabaseMessageExtra[] })[], options: SettingsChatServiceOptions = {}, - conversationId?: string + conversationId?: string, + signal?: AbortSignal ): Promise { const { stream, @@ -74,7 +62,7 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, - onFirstValidChunk, + onTimings, // Generation parameters temperature, max_tokens, @@ -99,25 +87,17 @@ export class ChatService { // Other parameters samplers, custom, - timings_per_token + timings_per_token, + // Config options + systemMessage, + disableReasoningFormat } = options; - const currentConfig = config(); - - const requestId = conversationId || 'default'; - - if (this.abortControllers.has(requestId)) { - this.abortControllers.get(requestId)?.abort(); - } - - const abortController = new AbortController(); - this.abortControllers.set(requestId, abortController); - const normalizedMessages: ApiChatMessageData[] = messages .map((msg) => { if ('id' in msg && 'convId' in msg && 'timestamp' in msg) { const dbMsg = msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] }; - return ChatService.convertMessageToChatServiceData(dbMsg); + return ChatService.convertDbMessageToApiChatMessageData(dbMsg); } else { return msg as ApiChatMessageData; } @@ -132,7 +112,7 @@ export class ChatService { return true; }); - const processedMessages = this.injectSystemMessage(normalizedMessages); + const processedMessages = ChatService.injectSystemMessage(normalizedMessages, systemMessage); const requestBody: ApiChatCompletionRequest = { messages: processedMessages.map((msg: ApiChatMessageData) => ({ @@ -142,14 +122,12 @@ export class ChatService { stream }; - const modelSelectorEnabled = Boolean(currentConfig.modelSelectorEnabled); - const activeModel = modelSelectorEnabled ? selectedModelName() : null; - - if (modelSelectorEnabled && activeModel) { - requestBody.model = activeModel; + // Include model in request if provided (required in ROUTER mode) + if (options.model) { + requestBody.model = options.model; } - requestBody.reasoning_format = currentConfig.disableReasoningFormat ? 'none' : 'auto'; + requestBody.reasoning_format = disableReasoningFormat ? 'none' : 'auto'; if (temperature !== undefined) requestBody.temperature = temperature; if (max_tokens !== undefined) { @@ -194,20 +172,15 @@ export class ChatService { } try { - const apiKey = currentConfig.apiKey?.toString().trim(); - const response = await fetch(`./v1/chat/completions`, { method: 'POST', - headers: { - 'Content-Type': 'application/json', - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - }, + headers: getJsonHeaders(), body: JSON.stringify(requestBody), - signal: abortController.signal + signal }); if (!response.ok) { - const error = await this.parseErrorResponse(response); + const error = await ChatService.parseErrorResponse(response); if (onError) { onError(error); } @@ -215,7 +188,7 @@ export class ChatService { } if (stream) { - await this.handleStreamResponse( + await ChatService.handleStreamResponse( response, onChunk, onComplete, @@ -223,13 +196,13 @@ export class ChatService { onReasoningChunk, onToolCallChunk, onModel, - onFirstValidChunk, + onTimings, conversationId, - abortController.signal + signal ); return; } else { - return this.handleNonStreamResponse( + return ChatService.handleNonStreamResponse( response, onComplete, onError, @@ -269,11 +242,13 @@ export class ChatService { onError(userFriendlyError); } throw userFriendlyError; - } finally { - this.abortControllers.delete(requestId); } } + // ───────────────────────────────────────────────────────────────────────────── + // Streaming + // ───────────────────────────────────────────────────────────────────────────── + /** * Handles streaming response from the chat completion API * @param response - The Response object from the fetch request @@ -285,7 +260,7 @@ export class ChatService { * @returns {Promise} Promise that resolves when streaming is complete * @throws {Error} if the stream cannot be read or parsed */ - private async handleStreamResponse( + private static async handleStreamResponse( response: Response, onChunk?: (chunk: string) => void, onComplete?: ( @@ -298,7 +273,7 @@ export class ChatService { onReasoningChunk?: (chunk: string) => void, onToolCallChunk?: (chunk: string) => void, onModel?: (model: string) => void, - onFirstValidChunk?: () => void, + onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void, conversationId?: string, abortSignal?: AbortSignal ): Promise { @@ -315,7 +290,6 @@ export class ChatService { let lastTimings: ChatMessageTimings | undefined; let streamFinished = false; let modelEmitted = false; - let firstValidChunkEmitted = false; let toolCallIndexOffset = 0; let hasOpenToolCallBatch = false; @@ -333,7 +307,7 @@ export class ChatService { return; } - aggregatedToolCalls = this.mergeToolCallDeltas( + aggregatedToolCalls = ChatService.mergeToolCallDeltas( aggregatedToolCalls, toolCalls, toolCallIndexOffset @@ -382,29 +356,20 @@ export class ChatService { try { const parsed: ApiChatCompletionStreamChunk = JSON.parse(data); - - if (!firstValidChunkEmitted && parsed.object === 'chat.completion.chunk') { - firstValidChunkEmitted = true; - - if (!abortSignal?.aborted) { - onFirstValidChunk?.(); - } - } - const content = parsed.choices[0]?.delta?.content; const reasoningContent = parsed.choices[0]?.delta?.reasoning_content; const toolCalls = parsed.choices[0]?.delta?.tool_calls; const timings = parsed.timings; const promptProgress = parsed.prompt_progress; - const chunkModel = this.extractModelName(parsed); + const chunkModel = ChatService.extractModelName(parsed); if (chunkModel && !modelEmitted) { modelEmitted = true; onModel?.(chunkModel); } if (timings || promptProgress) { - this.updateProcessingState(timings, promptProgress, conversationId); + ChatService.notifyTimings(timings, promptProgress, onTimings); if (timings) { lastTimings = timings; } @@ -462,54 +427,6 @@ export class ChatService { } } - private mergeToolCallDeltas( - existing: ApiChatCompletionToolCall[], - deltas: ApiChatCompletionToolCallDelta[], - indexOffset = 0 - ): ApiChatCompletionToolCall[] { - const result = existing.map((call) => ({ - ...call, - function: call.function ? { ...call.function } : undefined - })); - - for (const delta of deltas) { - const index = - typeof delta.index === 'number' && delta.index >= 0 - ? delta.index + indexOffset - : result.length; - - while (result.length <= index) { - result.push({ function: undefined }); - } - - const target = result[index]!; - - if (delta.id) { - target.id = delta.id; - } - - if (delta.type) { - target.type = delta.type; - } - - if (delta.function) { - const fn = target.function ? { ...target.function } : {}; - - if (delta.function.name) { - fn.name = delta.function.name; - } - - if (delta.function.arguments) { - fn.arguments = (fn.arguments ?? '') + delta.function.arguments; - } - - target.function = fn; - } - } - - return result; - } - /** * Handles non-streaming response from the chat completion API. * Parses the JSON response and extracts the generated content. @@ -520,7 +437,7 @@ export class ChatService { * @returns {Promise} Promise that resolves to the generated content string * @throws {Error} if the response cannot be parsed or is malformed */ - private async handleNonStreamResponse( + private static async handleNonStreamResponse( response: Response, onComplete?: ( response: string, @@ -542,7 +459,7 @@ export class ChatService { const data: ApiChatCompletionResponse = JSON.parse(responseText); - const responseModel = this.extractModelName(data); + const responseModel = ChatService.extractModelName(data); if (responseModel) { onModel?.(responseModel); } @@ -558,7 +475,7 @@ export class ChatService { let serializedToolCalls: string | undefined; if (toolCalls && toolCalls.length > 0) { - const mergedToolCalls = this.mergeToolCallDeltas([], toolCalls); + const mergedToolCalls = ChatService.mergeToolCallDeltas([], toolCalls); if (mergedToolCalls.length > 0) { serializedToolCalls = JSON.stringify(mergedToolCalls); @@ -585,6 +502,67 @@ export class ChatService { } } + /** + * Merges tool call deltas into an existing array of tool calls. + * Handles both existing and new tool calls, updating existing ones and adding new ones. + * + * @param existing - The existing array of tool calls to merge into + * @param deltas - The array of tool call deltas to merge + * @param indexOffset - Optional offset to apply to the index of new tool calls + * @returns {ApiChatCompletionToolCall[]} The merged array of tool calls + */ + private static mergeToolCallDeltas( + existing: ApiChatCompletionToolCall[], + deltas: ApiChatCompletionToolCallDelta[], + indexOffset = 0 + ): ApiChatCompletionToolCall[] { + const result = existing.map((call) => ({ + ...call, + function: call.function ? { ...call.function } : undefined + })); + + for (const delta of deltas) { + const index = + typeof delta.index === 'number' && delta.index >= 0 + ? delta.index + indexOffset + : result.length; + + while (result.length <= index) { + result.push({ function: undefined }); + } + + const target = result[index]!; + + if (delta.id) { + target.id = delta.id; + } + + if (delta.type) { + target.type = delta.type; + } + + if (delta.function) { + const fn = target.function ? { ...target.function } : {}; + + if (delta.function.name) { + fn.name = delta.function.name; + } + + if (delta.function.arguments) { + fn.arguments = (fn.arguments ?? '') + delta.function.arguments; + } + + target.function = fn; + } + } + + return result; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Conversion + // ───────────────────────────────────────────────────────────────────────────── + /** * Converts a database message with attachments to API chat message format. * Processes various attachment types (images, text files, PDFs) and formats them @@ -597,7 +575,7 @@ export class ChatService { * @returns {ApiChatMessageData} object formatted for the chat completion API * @static */ - static convertMessageToChatServiceData( + static convertDbMessageToApiChatMessageData( message: DatabaseMessage & { extra?: DatabaseMessageExtra[] } ): ApiChatMessageData { if (!message.extra || message.extra.length === 0) { @@ -618,7 +596,7 @@ export class ChatService { const imageFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraImageFile => - extra.type === 'imageFile' + extra.type === AttachmentType.IMAGE ); for (const image of imageFiles) { @@ -630,7 +608,7 @@ export class ChatService { const textFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraTextFile => - extra.type === 'textFile' + extra.type === AttachmentType.TEXT ); for (const textFile of textFiles) { @@ -643,7 +621,7 @@ export class ChatService { // Handle legacy 'context' type from old webui (pasted content) const legacyContextFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraLegacyContext => - extra.type === 'context' + extra.type === AttachmentType.LEGACY_CONTEXT ); for (const legacyContextFile of legacyContextFiles) { @@ -655,7 +633,7 @@ export class ChatService { const audioFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraAudioFile => - extra.type === 'audioFile' + extra.type === AttachmentType.AUDIO ); for (const audio of audioFiles) { @@ -670,7 +648,7 @@ export class ChatService { const pdfFiles = message.extra.filter( (extra: DatabaseMessageExtra): extra is DatabaseMessageExtraPdfFile => - extra.type === 'pdfFile' + extra.type === AttachmentType.PDF ); for (const pdfFile of pdfFiles) { @@ -695,19 +673,17 @@ export class ChatService { }; } + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + /** - * Get server properties - static method for API compatibility + * Get server properties - static method for API compatibility (to be refactored) */ static async getServerProps(): Promise { try { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); - const response = await fetch(`./props`, { - headers: { - 'Content-Type': 'application/json', - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } + headers: getJsonHeaders() }); if (!response.ok) { @@ -723,49 +699,51 @@ export class ChatService { } /** - * Aborts any ongoing chat completion request. - * Cancels the current request and cleans up the abort controller. - * - * @public + * Get model information from /models endpoint (to be refactored) */ - public abort(conversationId?: string): void { - if (conversationId) { - const abortController = this.abortControllers.get(conversationId); - if (abortController) { - abortController.abort(); - this.abortControllers.delete(conversationId); - } - } else { - for (const controller of this.abortControllers.values()) { - controller.abort(); + static async getModels(): Promise { + try { + const response = await fetch(`./models`, { + headers: getJsonHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to fetch models: ${response.status} ${response.statusText}`); } - this.abortControllers.clear(); + + const data = await response.json(); + return data; + } catch (error) { + console.error('Error fetching models:', error); + throw error; } } /** - * Injects a system message at the beginning of the conversation if configured in settings. - * Checks for existing system messages to avoid duplication and retrieves the system message - * from the current configuration settings. + * Injects a system message at the beginning of the conversation if provided. + * Checks for existing system messages to avoid duplication. * * @param messages - Array of chat messages to process - * @returns Array of messages with system message injected at the beginning if configured + * @param systemMessage - Optional system message to inject + * @returns Array of messages with system message injected at the beginning if provided * @private */ - private injectSystemMessage(messages: ApiChatMessageData[]): ApiChatMessageData[] { - const currentConfig = config(); - const systemMessage = currentConfig.systemMessage?.toString().trim(); + private static injectSystemMessage( + messages: ApiChatMessageData[], + systemMessage?: string + ): ApiChatMessageData[] { + const trimmedSystemMessage = systemMessage?.trim(); - if (!systemMessage) { + if (!trimmedSystemMessage) { return messages; } if (messages.length > 0 && messages[0].role === 'system') { - if (messages[0].content !== systemMessage) { + if (messages[0].content !== trimmedSystemMessage) { const updatedMessages = [...messages]; updatedMessages[0] = { role: 'system', - content: systemMessage + content: trimmedSystemMessage }; return updatedMessages; } @@ -775,7 +753,7 @@ export class ChatService { const systemMsg: ApiChatMessageData = { role: 'system', - content: systemMessage + content: trimmedSystemMessage }; return [systemMsg, ...messages]; @@ -786,7 +764,7 @@ export class ChatService { * @param response - HTTP response object * @returns Promise - Parsed error with context info if available */ - private async parseErrorResponse(response: Response): Promise { + private static async parseErrorResponse(response: Response): Promise { try { const errorText = await response.text(); const errorData: ApiErrorResponse = JSON.parse(errorText); @@ -803,7 +781,18 @@ export class ChatService { } } - private extractModelName(data: unknown): string | undefined { + /** + * Extracts model name from Chat Completions API response data. + * Handles various response formats including streaming chunks and final responses. + * + * WORKAROUND: In single model mode, llama-server returns a default/incorrect model name + * in the response. We override it with the actual model name from serverStore. + * + * @param data - Raw response data from the Chat Completions API + * @returns Model name string if found, undefined otherwise + * @private + */ + private static extractModelName(data: unknown): string | undefined { const asRecord = (value: unknown): Record | undefined => { return typeof value === 'object' && value !== null ? (value as Record) @@ -836,31 +825,22 @@ export class ChatService { return undefined; } - private updateProcessingState( - timings?: ChatMessageTimings, - promptProgress?: ChatMessagePromptProgress, - conversationId?: string + /** + * Calls the onTimings callback with timing data from streaming response. + * + * @param timings - Timing information from the Chat Completions API response + * @param promptProgress - Prompt processing progress data + * @param onTimingsCallback - Callback function to invoke with timing data + * @private + */ + private static notifyTimings( + timings: ChatMessageTimings | undefined, + promptProgress: ChatMessagePromptProgress | undefined, + onTimingsCallback: + | ((timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void) + | undefined ): void { - const tokensPerSecond = - timings?.predicted_ms && timings?.predicted_n - ? (timings.predicted_n / timings.predicted_ms) * 1000 - : 0; - - slotsService - .updateFromTimingData( - { - prompt_n: timings?.prompt_n || 0, - predicted_n: timings?.predicted_n || 0, - predicted_per_second: tokensPerSecond, - cache_n: timings?.cache_n || 0, - prompt_progress: promptProgress - }, - conversationId - ) - .catch((error) => { - console.warn('Failed to update processing state:', error); - }); + if (!timings || !onTimingsCallback) return; + onTimingsCallback(timings, promptProgress); } } - -export const chatService = new ChatService(); diff --git a/tools/server/webui/src/lib/stores/database.ts b/tools/server/webui/src/lib/services/database.ts similarity index 68% rename from tools/server/webui/src/lib/stores/database.ts rename to tools/server/webui/src/lib/services/database.ts index 82edcc3227c..185a598c3bb 100644 --- a/tools/server/webui/src/lib/stores/database.ts +++ b/tools/server/webui/src/lib/services/database.ts @@ -1,5 +1,5 @@ import Dexie, { type EntityTable } from 'dexie'; -import { filterByLeafNodeId, findDescendantMessages } from '$lib/utils/branching'; +import { findDescendantMessages } from '$lib/utils'; class LlamacppDatabase extends Dexie { conversations!: EntityTable; @@ -16,60 +16,59 @@ class LlamacppDatabase extends Dexie { } const db = new LlamacppDatabase(); +import { v4 as uuid } from 'uuid'; /** - * DatabaseStore - Persistent data layer for conversation and message management + * DatabaseService - Stateless IndexedDB communication layer + * + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API (ephemeral, runtime). + * - **Conversation**: The persistent database entity storing all messages and metadata. + * This service handles raw database operations for conversations - the lowest layer + * in the persistence stack. * - * This service provides a comprehensive data access layer built on IndexedDB using Dexie. - * It handles all persistent storage operations for conversations, messages, and application settings - * with support for complex conversation branching and message threading. + * This service provides a stateless data access layer built on IndexedDB using Dexie ORM. + * It handles all low-level storage operations for conversations and messages with support + * for complex branching and message threading. All methods are static - no instance state. * - * **Architecture & Relationships:** - * - **DatabaseStore** (this class): Stateless data persistence layer - * - Manages IndexedDB operations through Dexie ORM - * - Handles conversation and message CRUD operations - * - Supports complex branching with parent-child relationships + * **Architecture & Relationships (bottom to top):** + * - **DatabaseService** (this class): Stateless IndexedDB operations + * - Lowest layer - direct Dexie/IndexedDB communication + * - Pure CRUD operations without business logic + * - Handles branching tree structure (parent-child relationships) * - Provides transaction safety for multi-table operations * - * - **ChatStore**: Primary consumer for conversation state management - * - Uses DatabaseStore for all persistence operations - * - Coordinates UI state with database state - * - Handles conversation lifecycle and message branching + * - **ConversationsService**: Stateless business logic layer + * - Uses DatabaseService for all persistence operations + * - Adds import/export, navigation, and higher-level operations + * + * - **conversationsStore**: Reactive state management for conversations + * - Uses ConversationsService for database operations + * - Manages conversation list, active conversation, and messages in memory + * + * - **chatStore**: Active AI interaction management + * - Uses conversationsStore for conversation context + * - Directly uses DatabaseService for message CRUD during streaming * * **Key Features:** - * - **Conversation Management**: Create, read, update, delete conversations - * - **Message Branching**: Support for tree-like conversation structures + * - **Conversation CRUD**: Create, read, update, delete conversations + * - **Message CRUD**: Add, update, delete messages with branching support + * - **Branch Operations**: Create branches, find descendants, cascade deletions * - **Transaction Safety**: Atomic operations for data consistency - * - **Path Resolution**: Navigate conversation branches and find leaf nodes - * - **Cascading Deletion**: Remove entire conversation branches * * **Database Schema:** - * - `conversations`: Conversation metadata with current node tracking - * - `messages`: Individual messages with parent-child relationships + * - `conversations`: id, lastModified, currNode, name + * - `messages`: id, convId, type, role, timestamp, parent, children * * **Branching Model:** * Messages form a tree structure where each message can have multiple children, * enabling conversation branching and alternative response paths. The conversation's * `currNode` tracks the currently active branch endpoint. */ -import { v4 as uuid } from 'uuid'; - -export class DatabaseStore { - /** - * Adds a new message to the database. - * - * @param message - Message to add (without id) - * @returns The created message - */ - static async addMessage(message: Omit): Promise { - const newMessage: DatabaseMessage = { - ...message, - id: uuid() - }; - - await db.messages.add(newMessage); - return newMessage; - } +export class DatabaseService { + // ───────────────────────────────────────────────────────────────────────────── + // Conversations + // ───────────────────────────────────────────────────────────────────────────── /** * Creates a new conversation. @@ -89,6 +88,10 @@ export class DatabaseStore { return conversation; } + // ───────────────────────────────────────────────────────────────────────────── + // Messages + // ───────────────────────────────────────────────────────────────────────────── + /** * Creates a new message branch by adding a message and updating parent/child relationships. * Also updates the conversation's currNode to point to the new message. @@ -255,18 +258,6 @@ export class DatabaseStore { return await db.conversations.get(id); } - /** - * Gets all leaf nodes (messages with no children) in a conversation. - * Useful for finding all possible conversation endpoints. - * - * @param convId - Conversation ID - * @returns Array of leaf node message IDs - */ - static async getConversationLeafNodes(convId: string): Promise { - const allMessages = await this.getConversationMessages(convId); - return allMessages.filter((msg) => msg.children.length === 0).map((msg) => msg.id); - } - /** * Gets all messages in a conversation, sorted by timestamp (oldest first). * @@ -277,34 +268,6 @@ export class DatabaseStore { return await db.messages.where('convId').equals(convId).sortBy('timestamp'); } - /** - * Gets the conversation path from root to the current leaf node. - * Uses the conversation's currNode to determine the active branch. - * - * @param convId - Conversation ID - * @returns Array of messages in the current conversation path - */ - static async getConversationPath(convId: string): Promise { - const conversation = await this.getConversation(convId); - - if (!conversation) { - return []; - } - - const allMessages = await this.getConversationMessages(convId); - - if (allMessages.length === 0) { - return []; - } - - // If no currNode is set, use the latest message as leaf - const leafNodeId = - conversation.currNode || - allMessages.reduce((latest, msg) => (msg.timestamp > latest.timestamp ? msg : latest)).id; - - return filterByLeafNodeId(allMessages, leafNodeId, false) as DatabaseMessage[]; - } - /** * Updates a conversation. * @@ -322,6 +285,10 @@ export class DatabaseStore { }); } + // ───────────────────────────────────────────────────────────────────────────── + // Navigation + // ───────────────────────────────────────────────────────────────────────────── + /** * Updates the conversation's current node (active branch). * This determines which conversation path is currently being viewed. @@ -349,6 +316,10 @@ export class DatabaseStore { await db.messages.update(id, updates); } + // ───────────────────────────────────────────────────────────────────────────── + // Import + // ───────────────────────────────────────────────────────────────────────────── + /** * Imports multiple conversations and their messages. * Skips conversations that already exist. diff --git a/tools/server/webui/src/lib/services/index.ts b/tools/server/webui/src/lib/services/index.ts index 9a9774bd56c..c36c64a6fa9 100644 --- a/tools/server/webui/src/lib/services/index.ts +++ b/tools/server/webui/src/lib/services/index.ts @@ -1,2 +1,5 @@ -export { chatService } from './chat'; -export { slotsService } from './slots'; +export { ChatService } from './chat'; +export { DatabaseService } from './database'; +export { ModelsService } from './models'; +export { PropsService } from './props'; +export { ParameterSyncService } from './parameter-sync'; diff --git a/tools/server/webui/src/lib/services/models.ts b/tools/server/webui/src/lib/services/models.ts index 1c7fa3b4563..f031bd74975 100644 --- a/tools/server/webui/src/lib/services/models.ts +++ b/tools/server/webui/src/lib/services/models.ts @@ -1,16 +1,34 @@ import { base } from '$app/paths'; -import { config } from '$lib/stores/settings.svelte'; -import type { ApiModelListResponse } from '$lib/types/api'; +import { ServerModelStatus } from '$lib/enums'; +import { getJsonHeaders } from '$lib/utils'; +/** + * ModelsService - Stateless service for model management API communication + * + * This service handles communication with model-related endpoints: + * - `/v1/models` - OpenAI-compatible model list (MODEL + ROUTER mode) + * - `/models` - Router-specific model management (ROUTER mode only) + * + * **Responsibilities:** + * - List available models + * - Load/unload models (ROUTER mode) + * - Check model status (ROUTER mode) + * + * **Used by:** + * - modelsStore: Primary consumer for model state management + */ export class ModelsService { - static async list(): Promise { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); + // ───────────────────────────────────────────────────────────────────────────── + // Listing + // ───────────────────────────────────────────────────────────────────────────── + /** + * Fetch list of models from OpenAI-compatible endpoint + * Works in both MODEL and ROUTER modes + */ + static async list(): Promise { const response = await fetch(`${base}/v1/models`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } + headers: getJsonHeaders() }); if (!response.ok) { @@ -19,4 +37,88 @@ export class ModelsService { return response.json() as Promise; } + + /** + * Fetch list of all models with detailed metadata (ROUTER mode) + * Returns models with load status, paths, and other metadata + */ + static async listRouter(): Promise { + const response = await fetch(`${base}/models`, { + headers: getJsonHeaders() + }); + + if (!response.ok) { + throw new Error(`Failed to fetch router models list (status ${response.status})`); + } + + return response.json() as Promise; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Load/Unload + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Load a model (ROUTER mode) + * POST /models/load + * @param modelId - Model identifier to load + * @param extraArgs - Optional additional arguments to pass to the model instance + */ + static async load(modelId: string, extraArgs?: string[]): Promise { + const payload: { model: string; extra_args?: string[] } = { model: modelId }; + if (extraArgs && extraArgs.length > 0) { + payload.extra_args = extraArgs; + } + + const response = await fetch(`${base}/models/load`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to load model (status ${response.status})`); + } + + return response.json() as Promise; + } + + /** + * Unload a model (ROUTER mode) + * POST /models/unload + * @param modelId - Model identifier to unload + */ + static async unload(modelId: string): Promise { + const response = await fetch(`${base}/models/unload`, { + method: 'POST', + headers: getJsonHeaders(), + body: JSON.stringify({ model: modelId }) + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error(errorData.error || `Failed to unload model (status ${response.status})`); + } + + return response.json() as Promise; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Status + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Check if a model is loaded based on its metadata + */ + static isModelLoaded(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADED; + } + + /** + * Check if a model is currently loading + */ + static isModelLoading(model: ApiModelDataEntry): boolean { + return model.status.value === ServerModelStatus.LOADING; + } } diff --git a/tools/server/webui/src/lib/services/parameter-sync.spec.ts b/tools/server/webui/src/lib/services/parameter-sync.spec.ts index 9ced55faa04..17b12f757c8 100644 --- a/tools/server/webui/src/lib/services/parameter-sync.spec.ts +++ b/tools/server/webui/src/lib/services/parameter-sync.spec.ts @@ -1,6 +1,5 @@ import { describe, it, expect } from 'vitest'; import { ParameterSyncService } from './parameter-sync'; -import type { ApiLlamaCppServerProps } from '$lib/types/api'; describe('ParameterSyncService', () => { describe('roundFloatingPoint', () => { diff --git a/tools/server/webui/src/lib/services/parameter-sync.ts b/tools/server/webui/src/lib/services/parameter-sync.ts index ee147ae1941..d32d669264b 100644 --- a/tools/server/webui/src/lib/services/parameter-sync.ts +++ b/tools/server/webui/src/lib/services/parameter-sync.ts @@ -12,8 +12,7 @@ * - Provide sync utilities for settings store integration */ -import type { ApiLlamaCppServerProps } from '$lib/types/api'; -import { normalizeFloatingPoint } from '$lib/utils/precision'; +import { normalizeFloatingPoint } from '$lib/utils'; export type ParameterSource = 'default' | 'custom'; export type ParameterValue = string | number | boolean; @@ -60,6 +59,10 @@ export const SYNCABLE_PARAMETERS: SyncableParameter[] = [ ]; export class ParameterSyncService { + // ───────────────────────────────────────────────────────────────────────────── + // Extraction + // ───────────────────────────────────────────────────────────────────────────── + /** * Round floating-point numbers to avoid JavaScript precision issues */ @@ -95,6 +98,10 @@ export class ParameterSyncService { return extracted; } + // ───────────────────────────────────────────────────────────────────────────── + // Merging + // ───────────────────────────────────────────────────────────────────────────── + /** * Merge server defaults with current user settings * Returns updated settings that respect user overrides while using server defaults @@ -116,6 +123,10 @@ export class ParameterSyncService { return merged; } + // ───────────────────────────────────────────────────────────────────────────── + // Info + // ───────────────────────────────────────────────────────────────────────────── + /** * Get parameter information including source and values */ @@ -172,6 +183,10 @@ export class ParameterSyncService { } } + // ───────────────────────────────────────────────────────────────────────────── + // Diff + // ───────────────────────────────────────────────────────────────────────────── + /** * Create a diff between current settings and server defaults */ diff --git a/tools/server/webui/src/lib/services/props.ts b/tools/server/webui/src/lib/services/props.ts new file mode 100644 index 00000000000..01fead9fa3e --- /dev/null +++ b/tools/server/webui/src/lib/services/props.ts @@ -0,0 +1,77 @@ +import { getAuthHeaders } from '$lib/utils'; + +/** + * PropsService - Server properties management + * + * This service handles communication with the /props endpoint to retrieve + * server configuration, model information, and capabilities. + * + * **Responsibilities:** + * - Fetch server properties from /props endpoint + * - Handle API authentication + * - Parse and validate server response + * + * **Used by:** + * - serverStore: Primary consumer for server state management + */ +export class PropsService { + // ───────────────────────────────────────────────────────────────────────────── + // Fetching + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetches server properties from the /props endpoint + * + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns {Promise} Server properties + * @throws {Error} If the request fails or returns invalid data + */ + static async fetch(autoload = false): Promise { + const url = new URL('./props', window.location.href); + if (!autoload) { + url.searchParams.set('autoload', 'false'); + } + + const response = await fetch(url.toString(), { + headers: getAuthHeaders() + }); + + if (!response.ok) { + throw new Error( + `Failed to fetch server properties: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + return data as ApiLlamaCppServerProps; + } + + /** + * Fetches server properties for a specific model (ROUTER mode) + * + * @param modelId - The model ID to fetch properties for + * @param autoload - If false, prevents automatic model loading (default: false) + * @returns {Promise} Server properties for the model + * @throws {Error} If the request fails or returns invalid data + */ + static async fetchForModel(modelId: string, autoload = false): Promise { + const url = new URL('./props', window.location.href); + url.searchParams.set('model', modelId); + if (!autoload) { + url.searchParams.set('autoload', 'false'); + } + + const response = await fetch(url.toString(), { + headers: getAuthHeaders() + }); + + if (!response.ok) { + throw new Error( + `Failed to fetch model properties: ${response.status} ${response.statusText}` + ); + } + + const data = await response.json(); + return data as ApiLlamaCppServerProps; + } +} diff --git a/tools/server/webui/src/lib/services/slots.ts b/tools/server/webui/src/lib/services/slots.ts deleted file mode 100644 index e99297d6a05..00000000000 --- a/tools/server/webui/src/lib/services/slots.ts +++ /dev/null @@ -1,322 +0,0 @@ -import { config } from '$lib/stores/settings.svelte'; - -/** - * SlotsService - Real-time processing state monitoring and token rate calculation - * - * This service provides real-time information about generation progress, token rates, - * and context usage based on timing data from ChatService streaming responses. - * It manages streaming session tracking and provides accurate processing state updates. - * - * **Architecture & Relationships:** - * - **SlotsService** (this class): Processing state monitoring - * - Receives timing data from ChatService streaming responses - * - Calculates token generation rates and context usage - * - Manages streaming session lifecycle - * - Provides real-time updates to UI components - * - * - **ChatService**: Provides timing data from `/chat/completions` streaming - * - **UI Components**: Subscribe to processing state for progress indicators - * - * **Key Features:** - * - **Real-time Monitoring**: Live processing state during generation - * - **Token Rate Calculation**: Accurate tokens/second from timing data - * - **Context Tracking**: Current context usage and remaining capacity - * - **Streaming Lifecycle**: Start/stop tracking for streaming sessions - * - **Timing Data Processing**: Converts streaming timing data to structured state - * - **Error Handling**: Graceful handling when timing data is unavailable - * - * **Processing States:** - * - `idle`: No active processing - * - `generating`: Actively generating tokens - * - * **Token Rate Calculation:** - * Uses timing data from `/chat/completions` streaming response for accurate - * real-time token generation rate measurement. - */ -export class SlotsService { - private callbacks: Set<(state: ApiProcessingState | null) => void> = new Set(); - private isStreamingActive: boolean = false; - private lastKnownState: ApiProcessingState | null = null; - private conversationStates: Map = new Map(); - private activeConversationId: string | null = null; - - /** - * Start streaming session tracking - */ - startStreaming(): void { - this.isStreamingActive = true; - } - - /** - * Stop streaming session tracking - */ - stopStreaming(): void { - this.isStreamingActive = false; - } - - /** - * Clear the current processing state - * Used when switching to a conversation without timing data - */ - clearState(): void { - this.lastKnownState = null; - - for (const callback of this.callbacks) { - try { - callback(null); - } catch (error) { - console.error('Error in clearState callback:', error); - } - } - } - - /** - * Check if currently in a streaming session - */ - isStreaming(): boolean { - return this.isStreamingActive; - } - - /** - * Set the active conversation for statistics display - */ - setActiveConversation(conversationId: string | null): void { - this.activeConversationId = conversationId; - this.notifyCallbacks(); - } - - /** - * Update processing state for a specific conversation - */ - updateConversationState(conversationId: string, state: ApiProcessingState | null): void { - this.conversationStates.set(conversationId, state); - - if (conversationId === this.activeConversationId) { - this.lastKnownState = state; - this.notifyCallbacks(); - } - } - - /** - * Get processing state for a specific conversation - */ - getConversationState(conversationId: string): ApiProcessingState | null { - return this.conversationStates.get(conversationId) || null; - } - - /** - * Clear state for a specific conversation - */ - clearConversationState(conversationId: string): void { - this.conversationStates.delete(conversationId); - - if (conversationId === this.activeConversationId) { - this.lastKnownState = null; - this.notifyCallbacks(); - } - } - - /** - * Notify all callbacks with current state - */ - private notifyCallbacks(): void { - const currentState = this.activeConversationId - ? this.conversationStates.get(this.activeConversationId) || null - : this.lastKnownState; - - for (const callback of this.callbacks) { - try { - callback(currentState); - } catch (error) { - console.error('Error in slots service callback:', error); - } - } - } - - /** - * @deprecated Polling is no longer used - timing data comes from ChatService streaming response - * This method logs a warning if called to help identify outdated usage - */ - fetchAndNotify(): void { - console.warn( - 'SlotsService.fetchAndNotify() is deprecated - use timing data from ChatService instead' - ); - } - - subscribe(callback: (state: ApiProcessingState | null) => void): () => void { - this.callbacks.add(callback); - - if (this.lastKnownState) { - callback(this.lastKnownState); - } - - return () => { - this.callbacks.delete(callback); - }; - } - - /** - * Updates processing state with timing data from ChatService streaming response - */ - async updateFromTimingData( - timingData: { - prompt_n: number; - predicted_n: number; - predicted_per_second: number; - cache_n: number; - prompt_progress?: ChatMessagePromptProgress; - }, - conversationId?: string - ): Promise { - const processingState = await this.parseCompletionTimingData(timingData); - - if (processingState === null) { - console.warn('Failed to parse timing data - skipping update'); - - return; - } - - if (conversationId) { - this.updateConversationState(conversationId, processingState); - } else { - this.lastKnownState = processingState; - this.notifyCallbacks(); - } - } - - /** - * Gets context total from last known slots data or fetches from server - */ - private async getContextTotal(): Promise { - if (this.lastKnownState && this.lastKnownState.contextTotal > 0) { - return this.lastKnownState.contextTotal; - } - - try { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); - - const response = await fetch(`./slots`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } - }); - - if (response.ok) { - const slotsData = await response.json(); - if (Array.isArray(slotsData) && slotsData.length > 0) { - const slot = slotsData[0]; - if (slot.n_ctx && slot.n_ctx > 0) { - return slot.n_ctx; - } - } - } - } catch (error) { - console.warn('Failed to fetch context total from /slots:', error); - } - - return 4096; - } - - private async parseCompletionTimingData( - timingData: Record - ): Promise { - const promptTokens = (timingData.prompt_n as number) || 0; - const predictedTokens = (timingData.predicted_n as number) || 0; - const tokensPerSecond = (timingData.predicted_per_second as number) || 0; - const cacheTokens = (timingData.cache_n as number) || 0; - const promptProgress = timingData.prompt_progress as - | { - total: number; - cache: number; - processed: number; - time_ms: number; - } - | undefined; - - const contextTotal = await this.getContextTotal(); - - if (contextTotal === null) { - console.warn('No context total available - cannot calculate processing state'); - - return null; - } - - const currentConfig = config(); - const outputTokensMax = currentConfig.max_tokens || -1; - - const contextUsed = promptTokens + cacheTokens + predictedTokens; - const outputTokensUsed = predictedTokens; - - const progressPercent = promptProgress - ? Math.round((promptProgress.processed / promptProgress.total) * 100) - : undefined; - - return { - status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', - tokensDecoded: predictedTokens, - tokensRemaining: outputTokensMax - predictedTokens, - contextUsed, - contextTotal, - outputTokensUsed, - outputTokensMax, - hasNextToken: predictedTokens > 0, - tokensPerSecond, - temperature: currentConfig.temperature ?? 0.8, - topP: currentConfig.top_p ?? 0.95, - speculative: false, - progressPercent, - promptTokens, - cacheTokens - }; - } - - /** - * Get current processing state - * Returns the last known state from timing data, or null if no data available - * If activeConversationId is set, returns state for that conversation - */ - async getCurrentState(): Promise { - if (this.activeConversationId) { - const conversationState = this.conversationStates.get(this.activeConversationId); - - if (conversationState) { - return conversationState; - } - } - - if (this.lastKnownState) { - return this.lastKnownState; - } - try { - const { chatStore } = await import('$lib/stores/chat.svelte'); - const messages = chatStore.activeMessages; - - for (let i = messages.length - 1; i >= 0; i--) { - const message = messages[i]; - if (message.role === 'assistant' && message.timings) { - const restoredState = await this.parseCompletionTimingData({ - prompt_n: message.timings.prompt_n || 0, - predicted_n: message.timings.predicted_n || 0, - predicted_per_second: - message.timings.predicted_n && message.timings.predicted_ms - ? (message.timings.predicted_n / message.timings.predicted_ms) * 1000 - : 0, - cache_n: message.timings.cache_n || 0 - }); - - if (restoredState) { - this.lastKnownState = restoredState; - return restoredState; - } - } - } - } catch (error) { - console.warn('Failed to restore timing data from messages:', error); - } - - return null; - } -} - -export const slotsService = new SlotsService(); diff --git a/tools/server/webui/src/lib/stores/chat.svelte.ts b/tools/server/webui/src/lib/stores/chat.svelte.ts index c70b9580cb7..0c17b06bc1b 100644 --- a/tools/server/webui/src/lib/stores/chat.svelte.ts +++ b/tools/server/webui/src/lib/stores/chat.svelte.ts @@ -1,167 +1,352 @@ -import { DatabaseStore } from '$lib/stores/database'; -import { chatService, slotsService } from '$lib/services'; +import { DatabaseService, ChatService } from '$lib/services'; +import { conversationsStore } from '$lib/stores/conversations.svelte'; import { config } from '$lib/stores/settings.svelte'; -import { serverStore } from '$lib/stores/server.svelte'; -import { normalizeModelName } from '$lib/utils/model-names'; -import { filterByLeafNodeId, findLeafNode, findDescendantMessages } from '$lib/utils/branching'; -import { browser } from '$app/environment'; -import { goto } from '$app/navigation'; -import { toast } from 'svelte-sonner'; +import { contextSize, isRouterMode } from '$lib/stores/server.svelte'; +import { selectedModelName, modelsStore } from '$lib/stores/models.svelte'; +import { + normalizeModelName, + filterByLeafNodeId, + findDescendantMessages, + findLeafNode +} from '$lib/utils'; import { SvelteMap } from 'svelte/reactivity'; -import type { ExportedConversations } from '$lib/types/database'; +import { DEFAULT_CONTEXT } from '$lib/constants/default-context'; /** - * ChatStore - Central state management for chat conversations and AI interactions + * chatStore - Active AI interaction and streaming state management * - * This store manages the complete chat experience including: - * - Conversation lifecycle (create, load, delete, update) - * - Message management with branching support for conversation trees - * - Real-time AI response streaming with reasoning content support - * - File attachment handling and processing - * - Context error management and recovery - * - Database persistence through DatabaseStore integration + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API. Represents the + * real-time streaming session, loading states, and UI visualization of AI communication. + * A "chat" is ephemeral - it exists only while the user is actively interacting with the AI. + * - **Conversation**: The persistent database entity storing all messages and metadata. + * Managed by conversationsStore, conversations persist across sessions and page reloads. * - * **Architecture & Relationships:** - * - **ChatService**: Handles low-level API communication with AI models - * - ChatStore orchestrates ChatService for streaming responses - * - ChatService provides abort capabilities and error handling - * - ChatStore manages the UI state while ChatService handles network layer + * This store manages all active AI interactions including real-time streaming, response + * generation, and per-chat loading states. It handles the runtime layer between UI and + * AI backend, supporting concurrent streaming across multiple conversations. * - * - **DatabaseStore**: Provides persistent storage for conversations and messages - * - ChatStore uses DatabaseStore for all CRUD operations - * - Maintains referential integrity for conversation trees - * - Handles message branching and parent-child relationships + * **Architecture & Relationships:** + * - **chatStore** (this class): Active AI session and streaming management + * - Manages real-time AI response streaming via ChatService + * - Tracks per-chat loading and streaming states for concurrent sessions + * - Handles message operations (send, edit, regenerate, branch) + * - Coordinates with conversationsStore for persistence * - * - **SlotsService**: Monitors server resource usage during AI generation - * - ChatStore coordinates slots polling during streaming - * - Provides real-time feedback on server capacity + * - **conversationsStore**: Provides conversation data and message arrays for chat context + * - **ChatService**: Low-level API communication with llama.cpp server + * - **DatabaseService**: Message persistence and retrieval * * **Key Features:** - * - Reactive state management using Svelte 5 runes ($state) - * - Conversation branching for exploring different response paths - * - Streaming AI responses with real-time content updates - * - File attachment support (images, PDFs, text files, audio) - * - Partial response saving when generation is interrupted - * - Message editing with automatic response regeneration + * - **AI Streaming**: Real-time token streaming with abort support + * - **Concurrent Chats**: Independent loading/streaming states per conversation + * - **Message Branching**: Edit, regenerate, and branch conversation trees + * - **Error Handling**: Timeout and server error recovery with user feedback + * - **Graceful Stop**: Save partial responses when stopping generation + * + * **State Management:** + * - Global `isLoading` and `currentResponse` for active chat UI + * - `chatLoadingStates` Map for per-conversation streaming tracking + * - `chatStreamingStates` Map for per-conversation streaming content + * - `processingStates` Map for per-conversation processing state (timing/context info) + * - Automatic state sync when switching between conversations */ class ChatStore { - activeConversation = $state(null); - activeMessages = $state([]); - conversations = $state([]); + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── + + activeProcessingState = $state(null); currentResponse = $state(''); errorDialogState = $state<{ type: 'timeout' | 'server'; message: string } | null>(null); - isInitialized = $state(false); isLoading = $state(false); - conversationLoadingStates = new SvelteMap(); - conversationStreamingStates = new SvelteMap(); - titleUpdateConfirmationCallback?: (currentTitle: string, newTitle: string) => Promise; - - constructor() { - if (browser) { - this.initialize(); + chatLoadingStates = new SvelteMap(); + chatStreamingStates = new SvelteMap(); + private abortControllers = new SvelteMap(); + private processingStates = new SvelteMap(); + private activeConversationId = $state(null); + private isStreamingActive = $state(false); + + // ───────────────────────────────────────────────────────────────────────────── + // Loading State + // ───────────────────────────────────────────────────────────────────────────── + + private setChatLoading(convId: string, loading: boolean): void { + if (loading) { + this.chatLoadingStates.set(convId, true); + if (conversationsStore.activeConversation?.id === convId) this.isLoading = true; + } else { + this.chatLoadingStates.delete(convId); + if (conversationsStore.activeConversation?.id === convId) this.isLoading = false; } } + private isChatLoading(convId: string): boolean { + return this.chatLoadingStates.get(convId) || false; + } + + private setChatStreaming(convId: string, response: string, messageId: string): void { + this.chatStreamingStates.set(convId, { response, messageId }); + if (conversationsStore.activeConversation?.id === convId) this.currentResponse = response; + } + + private clearChatStreaming(convId: string): void { + this.chatStreamingStates.delete(convId); + if (conversationsStore.activeConversation?.id === convId) this.currentResponse = ''; + } + + private getChatStreaming(convId: string): { response: string; messageId: string } | undefined { + return this.chatStreamingStates.get(convId); + } + + syncLoadingStateForChat(convId: string): void { + this.isLoading = this.isChatLoading(convId); + const streamingState = this.getChatStreaming(convId); + this.currentResponse = streamingState?.response || ''; + } + /** - * Initializes the chat store by loading conversations from the database - * Sets up the initial state and loads existing conversations + * Clears global UI state without affecting background streaming. + * Used when navigating to empty/new chat while other chats stream in background. */ - async initialize(): Promise { - try { - await this.loadConversations(); + clearUIState(): void { + this.isLoading = false; + this.currentResponse = ''; + } - this.isInitialized = true; - } catch (error) { - console.error('Failed to initialize chat store:', error); + // ───────────────────────────────────────────────────────────────────────────── + // Processing State + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Set the active conversation for statistics display + */ + setActiveProcessingConversation(conversationId: string | null): void { + this.activeConversationId = conversationId; + + if (conversationId) { + this.activeProcessingState = this.processingStates.get(conversationId) || null; + } else { + this.activeProcessingState = null; } } /** - * Loads all conversations from the database - * Refreshes the conversations list from persistent storage + * Get processing state for a specific conversation */ - async loadConversations(): Promise { - this.conversations = await DatabaseStore.getAllConversations(); + getProcessingState(conversationId: string): ApiProcessingState | null { + return this.processingStates.get(conversationId) || null; } /** - * Creates a new conversation and navigates to it - * @param name - Optional name for the conversation, defaults to timestamped name - * @returns The ID of the created conversation + * Clear processing state for a specific conversation */ - async createConversation(name?: string): Promise { - const conversationName = name || `Chat ${new Date().toLocaleString()}`; - const conversation = await DatabaseStore.createConversation(conversationName); + clearProcessingState(conversationId: string): void { + this.processingStates.delete(conversationId); - this.conversations.unshift(conversation); + if (conversationId === this.activeConversationId) { + this.activeProcessingState = null; + } + } - this.activeConversation = conversation; - this.activeMessages = []; + /** + * Get the current processing state for the active conversation (reactive) + * Returns the direct reactive state for UI binding + */ + getActiveProcessingState(): ApiProcessingState | null { + return this.activeProcessingState; + } - slotsService.setActiveConversation(conversation.id); + /** + * Updates processing state with timing data from streaming response + */ + updateProcessingStateFromTimings( + timingData: { + prompt_n: number; + predicted_n: number; + predicted_per_second: number; + cache_n: number; + prompt_progress?: ChatMessagePromptProgress; + }, + conversationId?: string + ): void { + const processingState = this.parseTimingData(timingData); - const isConvLoading = this.isConversationLoading(conversation.id); - this.isLoading = isConvLoading; + if (processingState === null) { + console.warn('Failed to parse timing data - skipping update'); + return; + } - this.currentResponse = ''; + const targetId = conversationId || this.activeConversationId; + if (targetId) { + this.processingStates.set(targetId, processingState); - await goto(`#/chat/${conversation.id}`); + if (targetId === this.activeConversationId) { + this.activeProcessingState = processingState; + } + } + } - return conversation.id; + /** + * Get current processing state (sync version for reactive access) + */ + getCurrentProcessingStateSync(): ApiProcessingState | null { + return this.activeProcessingState; } /** - * Loads a specific conversation and its messages - * @param convId - The conversation ID to load - * @returns True if conversation was loaded successfully, false otherwise + * Restore processing state from last assistant message timings + * Call this when keepStatsVisible is enabled and we need to show last known stats */ - async loadConversation(convId: string): Promise { - try { - const conversation = await DatabaseStore.getConversation(convId); + restoreProcessingStateFromMessages(messages: DatabaseMessage[], conversationId: string): void { + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === 'assistant' && message.timings) { + const restoredState = this.parseTimingData({ + prompt_n: message.timings.prompt_n || 0, + predicted_n: message.timings.predicted_n || 0, + predicted_per_second: + message.timings.predicted_n && message.timings.predicted_ms + ? (message.timings.predicted_n / message.timings.predicted_ms) * 1000 + : 0, + cache_n: message.timings.cache_n || 0 + }); + + if (restoredState) { + this.processingStates.set(conversationId, restoredState); + + if (conversationId === this.activeConversationId) { + this.activeProcessingState = restoredState; + } - if (!conversation) { - return false; + return; + } } + } + } - this.activeConversation = conversation; + // ───────────────────────────────────────────────────────────────────────────── + // Streaming + // ───────────────────────────────────────────────────────────────────────────── - slotsService.setActiveConversation(convId); + /** + * Start streaming session tracking + */ + startStreaming(): void { + this.isStreamingActive = true; + } - const isConvLoading = this.isConversationLoading(convId); - this.isLoading = isConvLoading; + /** + * Stop streaming session tracking + */ + stopStreaming(): void { + this.isStreamingActive = false; + } - const streamingState = this.getConversationStreaming(convId); - this.currentResponse = streamingState?.response || ''; + /** + * Check if currently in a streaming session + */ + isStreaming(): boolean { + return this.isStreamingActive; + } - if (conversation.currNode) { - const allMessages = await DatabaseStore.getConversationMessages(convId); - this.activeMessages = filterByLeafNodeId( - allMessages, - conversation.currNode, - false - ) as DatabaseMessage[]; - } else { - // Load all messages for conversations without currNode (backward compatibility) - this.activeMessages = await DatabaseStore.getConversationMessages(convId); - } + private getContextTotal(): number { + const activeState = this.getActiveProcessingState(); - return true; - } catch (error) { - console.error('Failed to load conversation:', error); + if (activeState && activeState.contextTotal > 0) { + return activeState.contextTotal; + } - return false; + const propsContextSize = contextSize(); + if (propsContextSize && propsContextSize > 0) { + return propsContextSize; } + + return DEFAULT_CONTEXT; + } + + private parseTimingData(timingData: Record): ApiProcessingState | null { + const promptTokens = (timingData.prompt_n as number) || 0; + const predictedTokens = (timingData.predicted_n as number) || 0; + const tokensPerSecond = (timingData.predicted_per_second as number) || 0; + const cacheTokens = (timingData.cache_n as number) || 0; + const promptProgress = timingData.prompt_progress as + | { + total: number; + cache: number; + processed: number; + time_ms: number; + } + | undefined; + + const contextTotal = this.getContextTotal(); + const currentConfig = config(); + const outputTokensMax = currentConfig.max_tokens || -1; + + const contextUsed = promptTokens + cacheTokens + predictedTokens; + const outputTokensUsed = predictedTokens; + + const progressPercent = promptProgress + ? Math.round((promptProgress.processed / promptProgress.total) * 100) + : undefined; + + return { + status: predictedTokens > 0 ? 'generating' : promptProgress ? 'preparing' : 'idle', + tokensDecoded: predictedTokens, + tokensRemaining: outputTokensMax - predictedTokens, + contextUsed, + contextTotal, + outputTokensUsed, + outputTokensMax, + hasNextToken: predictedTokens > 0, + tokensPerSecond, + temperature: currentConfig.temperature ?? 0.8, + topP: currentConfig.top_p ?? 0.95, + speculative: false, + progressPercent, + promptTokens, + cacheTokens + }; } /** - * Adds a new message to the active conversation - * @param role - The role of the message sender (user/assistant) - * @param content - The message content - * @param type - The message type, defaults to 'text' - * @param parent - Parent message ID, defaults to '-1' for auto-detection - * @param extras - Optional extra data (files, attachments, etc.) - * @returns The created message or null if failed + * Gets the model used in a conversation based on the latest assistant message. + * Returns the model from the most recent assistant message that has a model field set. + * + * @param messages - Array of messages to search through + * @returns The model name or null if no model found */ + getConversationModel(messages: DatabaseMessage[]): string | null { + // Search backwards through messages to find most recent assistant message with model + for (let i = messages.length - 1; i >= 0; i--) { + const message = messages[i]; + if (message.role === 'assistant' && message.model) { + return message.model; + } + } + return null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Error Handling + // ───────────────────────────────────────────────────────────────────────────── + + private isAbortError(error: unknown): boolean { + return error instanceof Error && (error.name === 'AbortError' || error instanceof DOMException); + } + + private showErrorDialog(type: 'timeout' | 'server', message: string): void { + this.errorDialogState = { type, message }; + } + + dismissErrorDialog(): void { + this.errorDialogState = null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Message Operations + // ───────────────────────────────────────────────────────────────────────────── + async addMessage( role: ChatRole, content: string, @@ -169,7 +354,8 @@ class ChatStore { parent: string = '-1', extras?: DatabaseMessageExtra[] ): Promise { - if (!this.activeConversation) { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) { console.error('No active conversation when trying to add message'); return null; } @@ -178,17 +364,14 @@ class ChatStore { let parentId: string | null = null; if (parent === '-1') { - if (this.activeMessages.length > 0) { - parentId = this.activeMessages[this.activeMessages.length - 1].id; + const activeMessages = conversationsStore.activeMessages; + if (activeMessages.length > 0) { + parentId = activeMessages[activeMessages.length - 1].id; } else { - const allMessages = await DatabaseStore.getConversationMessages( - this.activeConversation.id - ); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const rootMessage = allMessages.find((m) => m.parent === null && m.type === 'root'); - if (!rootMessage) { - const rootId = await DatabaseStore.createRootMessage(this.activeConversation.id); - parentId = rootId; + parentId = await DatabaseService.createRootMessage(activeConv.id); } else { parentId = rootMessage.id; } @@ -197,9 +380,9 @@ class ChatStore { parentId = parent; } - const message = await DatabaseStore.createMessageBranch( + const message = await DatabaseService.createMessageBranch( { - convId: this.activeConversation.id, + convId: activeConv.id, role, content, type, @@ -212,12 +395,9 @@ class ChatStore { parentId ); - this.activeMessages.push(message); - - await DatabaseStore.updateCurrentNode(this.activeConversation.id, message.id); - this.activeConversation.currNode = message.id; - - this.updateConversationTimestamp(); + conversationsStore.addMessageToActive(message); + await conversationsStore.updateCurrentNode(message.id); + conversationsStore.updateConversationTimestamp(); return message; } catch (error) { @@ -226,598 +406,273 @@ class ChatStore { } } - /** - * Gets API options from current configuration settings - * Converts settings store values to API-compatible format - * @returns API options object for chat completion requests - */ - private getApiOptions(): Record { - const currentConfig = config(); - const hasValue = (value: unknown): boolean => - value !== undefined && value !== null && value !== ''; - - const apiOptions: Record = { - stream: true, - timings_per_token: true - }; - - if (hasValue(currentConfig.temperature)) { - apiOptions.temperature = Number(currentConfig.temperature); - } - if (hasValue(currentConfig.max_tokens)) { - apiOptions.max_tokens = Number(currentConfig.max_tokens); - } - if (hasValue(currentConfig.dynatemp_range)) { - apiOptions.dynatemp_range = Number(currentConfig.dynatemp_range); - } - if (hasValue(currentConfig.dynatemp_exponent)) { - apiOptions.dynatemp_exponent = Number(currentConfig.dynatemp_exponent); - } - if (hasValue(currentConfig.top_k)) { - apiOptions.top_k = Number(currentConfig.top_k); - } - if (hasValue(currentConfig.top_p)) { - apiOptions.top_p = Number(currentConfig.top_p); - } - if (hasValue(currentConfig.min_p)) { - apiOptions.min_p = Number(currentConfig.min_p); - } - if (hasValue(currentConfig.xtc_probability)) { - apiOptions.xtc_probability = Number(currentConfig.xtc_probability); - } - if (hasValue(currentConfig.xtc_threshold)) { - apiOptions.xtc_threshold = Number(currentConfig.xtc_threshold); - } - if (hasValue(currentConfig.typ_p)) { - apiOptions.typ_p = Number(currentConfig.typ_p); - } - if (hasValue(currentConfig.repeat_last_n)) { - apiOptions.repeat_last_n = Number(currentConfig.repeat_last_n); - } - if (hasValue(currentConfig.repeat_penalty)) { - apiOptions.repeat_penalty = Number(currentConfig.repeat_penalty); - } - if (hasValue(currentConfig.presence_penalty)) { - apiOptions.presence_penalty = Number(currentConfig.presence_penalty); - } - if (hasValue(currentConfig.frequency_penalty)) { - apiOptions.frequency_penalty = Number(currentConfig.frequency_penalty); - } - if (hasValue(currentConfig.dry_multiplier)) { - apiOptions.dry_multiplier = Number(currentConfig.dry_multiplier); - } - if (hasValue(currentConfig.dry_base)) { - apiOptions.dry_base = Number(currentConfig.dry_base); - } - if (hasValue(currentConfig.dry_allowed_length)) { - apiOptions.dry_allowed_length = Number(currentConfig.dry_allowed_length); - } - if (hasValue(currentConfig.dry_penalty_last_n)) { - apiOptions.dry_penalty_last_n = Number(currentConfig.dry_penalty_last_n); - } - if (currentConfig.samplers) { - apiOptions.samplers = currentConfig.samplers; - } - if (currentConfig.custom) { - apiOptions.custom = currentConfig.custom; - } - - return apiOptions; - } - - /** - * Helper methods for per-conversation loading state management - */ - private setConversationLoading(convId: string, loading: boolean): void { - if (loading) { - this.conversationLoadingStates.set(convId, true); - if (this.activeConversation?.id === convId) { - this.isLoading = true; - } - } else { - this.conversationLoadingStates.delete(convId); - if (this.activeConversation?.id === convId) { - this.isLoading = false; - } - } - } - - private isConversationLoading(convId: string): boolean { - return this.conversationLoadingStates.get(convId) || false; - } - - private setConversationStreaming(convId: string, response: string, messageId: string): void { - this.conversationStreamingStates.set(convId, { response, messageId }); - if (this.activeConversation?.id === convId) { - this.currentResponse = response; - } - } - - private clearConversationStreaming(convId: string): void { - this.conversationStreamingStates.delete(convId); - if (this.activeConversation?.id === convId) { - this.currentResponse = ''; - } - } + private async createAssistantMessage(parentId?: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return null; - private getConversationStreaming( - convId: string - ): { response: string; messageId: string } | undefined { - return this.conversationStreamingStates.get(convId); + return await DatabaseService.createMessageBranch( + { + convId: activeConv.id, + type: 'text', + role: 'assistant', + content: '', + timestamp: Date.now(), + thinking: '', + toolCalls: '', + children: [], + model: null + }, + parentId || null + ); } - /** - * Handles streaming chat completion with the AI model - * @param allMessages - All messages in the conversation - * @param assistantMessage - The assistant message to stream content into - * @param onComplete - Optional callback when streaming completes - * @param onError - Optional callback when an error occurs - */ private async streamChatCompletion( allMessages: DatabaseMessage[], assistantMessage: DatabaseMessage, onComplete?: (content: string) => Promise, - onError?: (error: Error) => void + onError?: (error: Error) => void, + modelOverride?: string | null ): Promise { let streamedContent = ''; let streamedReasoningContent = ''; let streamedToolCallContent = ''; - let resolvedModel: string | null = null; let modelPersisted = false; - const currentConfig = config(); - const preferServerPropsModel = !currentConfig.modelSelectorEnabled; - let serverPropsRefreshed = false; - let updateModelFromServerProps: ((persistImmediately?: boolean) => void) | null = null; - - const refreshServerPropsOnce = () => { - if (serverPropsRefreshed) { - return; - } - - serverPropsRefreshed = true; - - const hasExistingProps = serverStore.serverProps !== null; - - serverStore - .fetchServerProps({ silent: hasExistingProps }) - .then(() => { - updateModelFromServerProps?.(true); - }) - .catch((error) => { - console.warn('Failed to refresh server props after streaming started:', error); - }); - }; const recordModel = (modelName: string | null | undefined, persistImmediately = true): void => { - const serverModelName = serverStore.modelName; - const preferredModelSource = preferServerPropsModel - ? (serverModelName ?? modelName ?? null) - : (modelName ?? serverModelName ?? null); - - if (!preferredModelSource) { - return; - } - - const normalizedModel = normalizeModelName(preferredModelSource); - - if (!normalizedModel || normalizedModel === resolvedModel) { - return; - } - + if (!modelName) return; + const normalizedModel = normalizeModelName(modelName); + if (!normalizedModel || normalizedModel === resolvedModel) return; resolvedModel = normalizedModel; - - const messageIndex = this.findMessageIndex(assistantMessage.id); - - this.updateMessageAtIndex(messageIndex, { model: normalizedModel }); - + const messageIndex = conversationsStore.findMessageIndex(assistantMessage.id); + conversationsStore.updateMessageAtIndex(messageIndex, { model: normalizedModel }); if (persistImmediately && !modelPersisted) { modelPersisted = true; - DatabaseStore.updateMessage(assistantMessage.id, { model: normalizedModel }).catch( - (error) => { - console.error('Failed to persist model name:', error); - modelPersisted = false; - resolvedModel = null; - } - ); + DatabaseService.updateMessage(assistantMessage.id, { model: normalizedModel }).catch(() => { + modelPersisted = false; + resolvedModel = null; + }); } }; - if (preferServerPropsModel) { - updateModelFromServerProps = (persistImmediately = true) => { - const currentServerModel = serverStore.modelName; + this.startStreaming(); + this.setActiveProcessingConversation(assistantMessage.convId); - if (!currentServerModel) { - return; - } - - recordModel(currentServerModel, persistImmediately); - }; - - updateModelFromServerProps(false); - } + const abortController = this.getOrCreateAbortController(assistantMessage.convId); - slotsService.startStreaming(); - slotsService.setActiveConversation(assistantMessage.convId); - - await chatService.sendMessage( + await ChatService.sendMessage( allMessages, { ...this.getApiOptions(), - - onFirstValidChunk: () => { - refreshServerPropsOnce(); - }, + ...(modelOverride ? { model: modelOverride } : {}), onChunk: (chunk: string) => { streamedContent += chunk; - this.setConversationStreaming( - assistantMessage.convId, - streamedContent, - assistantMessage.id - ); - - const messageIndex = this.findMessageIndex(assistantMessage.id); - this.updateMessageAtIndex(messageIndex, { - content: streamedContent - }); + this.setChatStreaming(assistantMessage.convId, streamedContent, assistantMessage.id); + const idx = conversationsStore.findMessageIndex(assistantMessage.id); + conversationsStore.updateMessageAtIndex(idx, { content: streamedContent }); }, - onReasoningChunk: (reasoningChunk: string) => { streamedReasoningContent += reasoningChunk; - - const messageIndex = this.findMessageIndex(assistantMessage.id); - - this.updateMessageAtIndex(messageIndex, { thinking: streamedReasoningContent }); + const idx = conversationsStore.findMessageIndex(assistantMessage.id); + conversationsStore.updateMessageAtIndex(idx, { thinking: streamedReasoningContent }); }, - onToolCallChunk: (toolCallChunk: string) => { const chunk = toolCallChunk.trim(); - - if (!chunk) { - return; - } - + if (!chunk) return; streamedToolCallContent = chunk; - - const messageIndex = this.findMessageIndex(assistantMessage.id); - - this.updateMessageAtIndex(messageIndex, { toolCalls: streamedToolCallContent }); + const idx = conversationsStore.findMessageIndex(assistantMessage.id); + conversationsStore.updateMessageAtIndex(idx, { toolCalls: streamedToolCallContent }); }, - - onModel: (modelName: string) => { - recordModel(modelName); + onModel: (modelName: string) => recordModel(modelName), + onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { + const tokensPerSecond = + timings?.predicted_ms && timings?.predicted_n + ? (timings.predicted_n / timings.predicted_ms) * 1000 + : 0; + this.updateProcessingStateFromTimings( + { + prompt_n: timings?.prompt_n || 0, + predicted_n: timings?.predicted_n || 0, + predicted_per_second: tokensPerSecond, + cache_n: timings?.cache_n || 0, + prompt_progress: promptProgress + }, + assistantMessage.convId + ); }, - onComplete: async ( finalContent?: string, reasoningContent?: string, timings?: ChatMessageTimings, toolCallContent?: string ) => { - slotsService.stopStreaming(); - - const updateData: { - content: string; - thinking: string; - toolCalls: string; - timings?: ChatMessageTimings; - model?: string; - } = { + this.stopStreaming(); + + // Build update data - only include model if not already persisted + const updateData: Record = { content: finalContent || streamedContent, thinking: reasoningContent || streamedReasoningContent, toolCalls: toolCallContent || streamedToolCallContent, - timings: timings + timings }; - if (resolvedModel && !modelPersisted) { updateData.model = resolvedModel; - modelPersisted = true; } + await DatabaseService.updateMessage(assistantMessage.id, updateData); - await DatabaseStore.updateMessage(assistantMessage.id, updateData); - - const messageIndex = this.findMessageIndex(assistantMessage.id); - - const localUpdateData: { - timings?: ChatMessageTimings; - model?: string; - toolCalls?: string; - } = { - timings: timings + // Update UI state - always include model and timings if available + const idx = conversationsStore.findMessageIndex(assistantMessage.id); + const uiUpdate: Partial = { + content: updateData.content as string, + toolCalls: updateData.toolCalls as string }; + if (timings) uiUpdate.timings = timings; + if (resolvedModel) uiUpdate.model = resolvedModel; - if (updateData.model) { - localUpdateData.model = updateData.model; - } + conversationsStore.updateMessageAtIndex(idx, uiUpdate); + await conversationsStore.updateCurrentNode(assistantMessage.id); - if (updateData.toolCalls !== undefined) { - localUpdateData.toolCalls = updateData.toolCalls; - } - - this.updateMessageAtIndex(messageIndex, localUpdateData); - - await DatabaseStore.updateCurrentNode(assistantMessage.convId, assistantMessage.id); - - if (this.activeConversation?.id === assistantMessage.convId) { - this.activeConversation.currNode = assistantMessage.id; - await this.refreshActiveMessages(); - } + if (onComplete) await onComplete(streamedContent); + this.setChatLoading(assistantMessage.convId, false); + this.clearChatStreaming(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); - if (onComplete) { - await onComplete(streamedContent); + if (isRouterMode()) { + modelsStore.fetchRouterModels().catch(console.error); } - - this.setConversationLoading(assistantMessage.convId, false); - this.clearConversationStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); }, - onError: (error: Error) => { - slotsService.stopStreaming(); - + this.stopStreaming(); if (this.isAbortError(error)) { - this.setConversationLoading(assistantMessage.convId, false); - this.clearConversationStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); + this.setChatLoading(assistantMessage.convId, false); + this.clearChatStreaming(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); return; } - console.error('Streaming error:', error); - this.setConversationLoading(assistantMessage.convId, false); - this.clearConversationStreaming(assistantMessage.convId); - slotsService.clearConversationState(assistantMessage.convId); - - const messageIndex = this.activeMessages.findIndex( - (m: DatabaseMessage) => m.id === assistantMessage.id - ); - - if (messageIndex !== -1) { - const [failedMessage] = this.activeMessages.splice(messageIndex, 1); - - if (failedMessage) { - DatabaseStore.deleteMessage(failedMessage.id).catch((cleanupError) => { - console.error('Failed to remove assistant message after error:', cleanupError); - }); - } - } - - const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server'; - - this.showErrorDialog(dialogType, error.message); - - if (onError) { - onError(error); + this.setChatLoading(assistantMessage.convId, false); + this.clearChatStreaming(assistantMessage.convId); + this.clearProcessingState(assistantMessage.convId); + const idx = conversationsStore.findMessageIndex(assistantMessage.id); + if (idx !== -1) { + const failedMessage = conversationsStore.removeMessageAtIndex(idx); + if (failedMessage) DatabaseService.deleteMessage(failedMessage.id).catch(console.error); } + this.showErrorDialog(error.name === 'TimeoutError' ? 'timeout' : 'server', error.message); + if (onError) onError(error); } }, - assistantMessage.convId - ); - } - - /** - * Checks if an error is an abort error (user cancelled operation) - * @param error - The error to check - * @returns True if the error is an abort error - */ - private isAbortError(error: unknown): boolean { - return error instanceof Error && (error.name === 'AbortError' || error instanceof DOMException); - } - - private showErrorDialog(type: 'timeout' | 'server', message: string): void { - this.errorDialogState = { type, message }; - } - - dismissErrorDialog(): void { - this.errorDialogState = null; - } - - /** - * Finds the index of a message in the active messages array - * @param messageId - The message ID to find - * @returns The index of the message, or -1 if not found - */ - private findMessageIndex(messageId: string): number { - return this.activeMessages.findIndex((m) => m.id === messageId); - } - - /** - * Updates a message at a specific index with partial data - * @param index - The index of the message to update - * @param updates - Partial message data to update - */ - private updateMessageAtIndex(index: number, updates: Partial): void { - if (index !== -1) { - Object.assign(this.activeMessages[index], updates); - } - } - - /** - * Creates a new assistant message in the database - * @param parentId - Optional parent message ID, defaults to '-1' - * @returns The created assistant message or null if failed - */ - private async createAssistantMessage(parentId?: string): Promise { - if (!this.activeConversation) return null; - - return await DatabaseStore.createMessageBranch( - { - convId: this.activeConversation.id, - type: 'text', - role: 'assistant', - content: '', - timestamp: Date.now(), - thinking: '', - toolCalls: '', - children: [], - model: null - }, - parentId || null + assistantMessage.convId, + abortController.signal ); } - /** - * Updates conversation lastModified timestamp and moves it to top of list - * Ensures recently active conversations appear first in the sidebar - */ - private updateConversationTimestamp(): void { - if (!this.activeConversation) return; - - const chatIndex = this.conversations.findIndex((c) => c.id === this.activeConversation!.id); - - if (chatIndex !== -1) { - this.conversations[chatIndex].lastModified = Date.now(); - const updatedConv = this.conversations.splice(chatIndex, 1)[0]; - this.conversations.unshift(updatedConv); - } - } - - /** - * Sends a new message and generates AI response - * @param content - The message content to send - * @param extras - Optional extra data (files, attachments, etc.) - */ async sendMessage(content: string, extras?: DatabaseMessageExtra[]): Promise { if (!content.trim() && (!extras || extras.length === 0)) return; - - if (this.activeConversation && this.isConversationLoading(this.activeConversation.id)) { - console.log('Cannot send message: current conversation is already processing a message'); - return; - } + const activeConv = conversationsStore.activeConversation; + if (activeConv && this.isChatLoading(activeConv.id)) return; let isNewConversation = false; - - if (!this.activeConversation) { - await this.createConversation(); + if (!activeConv) { + await conversationsStore.createConversation(); isNewConversation = true; } - - if (!this.activeConversation) { - console.error('No active conversation available for sending message'); - return; - } + const currentConv = conversationsStore.activeConversation; + if (!currentConv) return; this.errorDialogState = null; - - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); - - let userMessage: DatabaseMessage | null = null; + this.setChatLoading(currentConv.id, true); + this.clearChatStreaming(currentConv.id); try { - userMessage = await this.addMessage('user', content, 'text', '-1', extras); - - if (!userMessage) { - throw new Error('Failed to add user message'); - } - - if (isNewConversation && content) { - const title = content.trim(); - await this.updateConversationName(this.activeConversation.id, title); - } + const userMessage = await this.addMessage('user', content, 'text', '-1', extras); + if (!userMessage) throw new Error('Failed to add user message'); + if (isNewConversation && content) + await conversationsStore.updateConversationName(currentConv.id, content.trim()); const assistantMessage = await this.createAssistantMessage(userMessage.id); - - if (!assistantMessage) { - throw new Error('Failed to create assistant message'); - } - - this.activeMessages.push(assistantMessage); - - const conversationContext = this.activeMessages.slice(0, -1); - - await this.streamChatCompletion(conversationContext, assistantMessage); + if (!assistantMessage) throw new Error('Failed to create assistant message'); + conversationsStore.addMessageToActive(assistantMessage); + await this.streamChatCompletion( + conversationsStore.activeMessages.slice(0, -1), + assistantMessage + ); } catch (error) { if (this.isAbortError(error)) { - this.setConversationLoading(this.activeConversation!.id, false); + this.setChatLoading(currentConv.id, false); return; } - console.error('Failed to send message:', error); - this.setConversationLoading(this.activeConversation!.id, false); + this.setChatLoading(currentConv.id, false); if (!this.errorDialogState) { - if (error instanceof Error) { - const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server'; - this.showErrorDialog(dialogType, error.message); - } else { - this.showErrorDialog('server', 'Unknown error occurred while sending message'); - } + const dialogType = + error instanceof Error && error.name === 'TimeoutError' ? 'timeout' : 'server'; + this.showErrorDialog(dialogType, error instanceof Error ? error.message : 'Unknown error'); } } } - /** - * Stops the current message generation - * Aborts ongoing requests and saves partial response if available - */ async stopGeneration(): Promise { - if (!this.activeConversation) return; - - const convId = this.activeConversation.id; - - await this.savePartialResponseIfNeeded(convId); - - slotsService.stopStreaming(); - chatService.abort(convId); - - this.setConversationLoading(convId, false); - this.clearConversationStreaming(convId); - slotsService.clearConversationState(convId); + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; + await this.savePartialResponseIfNeeded(activeConv.id); + this.stopStreaming(); + this.abortRequest(activeConv.id); + this.setChatLoading(activeConv.id, false); + this.clearChatStreaming(activeConv.id); + this.clearProcessingState(activeConv.id); } /** - * Gracefully stops generation and saves partial response + * Gets or creates an AbortController for a conversation */ - async gracefulStop(): Promise { - if (!this.isLoading) return; - - slotsService.stopStreaming(); - chatService.abort(); - await this.savePartialResponseIfNeeded(); - - this.conversationLoadingStates.clear(); - this.conversationStreamingStates.clear(); - this.isLoading = false; - this.currentResponse = ''; + private getOrCreateAbortController(convId: string): AbortController { + let controller = this.abortControllers.get(convId); + if (!controller || controller.signal.aborted) { + controller = new AbortController(); + this.abortControllers.set(convId, controller); + } + return controller; } /** - * Saves partial response if generation was interrupted - * Preserves user's partial content and timing data when generation is stopped early + * Aborts any ongoing request for a conversation */ + private abortRequest(convId?: string): void { + if (convId) { + const controller = this.abortControllers.get(convId); + if (controller) { + controller.abort(); + this.abortControllers.delete(convId); + } + } else { + for (const controller of this.abortControllers.values()) { + controller.abort(); + } + this.abortControllers.clear(); + } + } + private async savePartialResponseIfNeeded(convId?: string): Promise { - const conversationId = convId || this.activeConversation?.id; + const conversationId = convId || conversationsStore.activeConversation?.id; if (!conversationId) return; - - const streamingState = this.conversationStreamingStates.get(conversationId); - if (!streamingState || !streamingState.response.trim()) { - return; - } + const streamingState = this.chatStreamingStates.get(conversationId); + if (!streamingState || !streamingState.response.trim()) return; const messages = - conversationId === this.activeConversation?.id - ? this.activeMessages - : await DatabaseStore.getConversationMessages(conversationId); - + conversationId === conversationsStore.activeConversation?.id + ? conversationsStore.activeMessages + : await conversationsStore.getConversationMessages(conversationId); if (!messages.length) return; const lastMessage = messages[messages.length - 1]; - - if (lastMessage && lastMessage.role === 'assistant') { + if (lastMessage?.role === 'assistant') { try { - const updateData: { - content: string; - thinking?: string; - timings?: ChatMessageTimings; - } = { + const updateData: { content: string; thinking?: string; timings?: ChatMessageTimings } = { content: streamingState.response }; - - if (lastMessage.thinking?.trim()) { - updateData.thinking = lastMessage.thinking; - } - - const lastKnownState = await slotsService.getCurrentState(); - + if (lastMessage.thinking?.trim()) updateData.thinking = lastMessage.thinking; + const lastKnownState = this.getCurrentProcessingStateSync(); if (lastKnownState) { updateData.timings = { prompt_n: lastKnownState.promptTokens || 0, @@ -829,446 +684,127 @@ class ChatStore { : undefined }; } - - await DatabaseStore.updateMessage(lastMessage.id, updateData); - + await DatabaseService.updateMessage(lastMessage.id, updateData); lastMessage.content = this.currentResponse; - if (updateData.thinking !== undefined) { - lastMessage.thinking = updateData.thinking; - } - if (updateData.timings) { - lastMessage.timings = updateData.timings; - } + if (updateData.thinking) lastMessage.thinking = updateData.thinking; + if (updateData.timings) lastMessage.timings = updateData.timings; } catch (error) { - lastMessage.content = this.currentResponse; - console.error('Failed to save partial response:', error); - } - } else { - console.error('Last message is not an assistant message'); - } - } - - /** - * Updates a user message and regenerates the assistant response - * @param messageId - The ID of the message to update - * @param newContent - The new content for the message - */ - async updateMessage(messageId: string, newContent: string): Promise { - if (!this.activeConversation) return; - - if (this.isLoading) { - this.stopGeneration(); - } - - try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for update'); - return; - } - - const messageToUpdate = this.activeMessages[messageIndex]; - const originalContent = messageToUpdate.content; - - if (messageToUpdate.role !== 'user') { - console.error('Only user messages can be edited'); - return; - } - - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); - const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - const isFirstUserMessage = - rootMessage && messageToUpdate.parent === rootMessage.id && messageToUpdate.role === 'user'; - - this.updateMessageAtIndex(messageIndex, { content: newContent }); - await DatabaseStore.updateMessage(messageId, { content: newContent }); - - if (isFirstUserMessage && newContent.trim()) { - await this.updateConversationTitleWithConfirmation( - this.activeConversation.id, - newContent.trim(), - this.titleUpdateConfirmationCallback - ); - } - - const messagesToRemove = this.activeMessages.slice(messageIndex + 1); - for (const message of messagesToRemove) { - await DatabaseStore.deleteMessage(message.id); - } - - this.activeMessages = this.activeMessages.slice(0, messageIndex + 1); - this.updateConversationTimestamp(); - - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); - - try { - const assistantMessage = await this.createAssistantMessage(); - if (!assistantMessage) { - throw new Error('Failed to create assistant message'); - } - - this.activeMessages.push(assistantMessage); - await DatabaseStore.updateCurrentNode(this.activeConversation.id, assistantMessage.id); - this.activeConversation.currNode = assistantMessage.id; - - await this.streamChatCompletion( - this.activeMessages.slice(0, -1), - assistantMessage, - undefined, - () => { - const editedMessageIndex = this.findMessageIndex(messageId); - this.updateMessageAtIndex(editedMessageIndex, { content: originalContent }); - } - ); - } catch (regenerateError) { - console.error('Failed to regenerate response:', regenerateError); - this.setConversationLoading(this.activeConversation!.id, false); - - const messageIndex = this.findMessageIndex(messageId); - this.updateMessageAtIndex(messageIndex, { content: originalContent }); - } - } catch (error) { - if (this.isAbortError(error)) { - return; - } - - console.error('Failed to update message:', error); - } - } - - /** - * Regenerates an assistant message with a new response - * @param messageId - The ID of the assistant message to regenerate - */ - async regenerateMessage(messageId: string): Promise { - if (!this.activeConversation || this.isLoading) return; - - try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for regeneration'); - return; - } - - const messageToRegenerate = this.activeMessages[messageIndex]; - if (messageToRegenerate.role !== 'assistant') { - console.error('Only assistant messages can be regenerated'); - return; - } - - const messagesToRemove = this.activeMessages.slice(messageIndex); - for (const message of messagesToRemove) { - await DatabaseStore.deleteMessage(message.id); - } - - this.activeMessages = this.activeMessages.slice(0, messageIndex); - this.updateConversationTimestamp(); - - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); - - try { - const parentMessageId = - this.activeMessages.length > 0 - ? this.activeMessages[this.activeMessages.length - 1].id - : null; - - const assistantMessage = await this.createAssistantMessage(parentMessageId); - - if (!assistantMessage) { - throw new Error('Failed to create assistant message'); - } - - this.activeMessages.push(assistantMessage); - - const conversationContext = this.activeMessages.slice(0, -1); - - await this.streamChatCompletion(conversationContext, assistantMessage); - } catch (regenerateError) { - console.error('Failed to regenerate response:', regenerateError); - this.setConversationLoading(this.activeConversation!.id, false); - } - } catch (error) { - if (this.isAbortError(error)) return; - console.error('Failed to regenerate message:', error); - } - } - - /** - * Updates the name of a conversation - * @param convId - The conversation ID to update - * @param name - The new name for the conversation - */ - async updateConversationName(convId: string, name: string): Promise { - try { - await DatabaseStore.updateConversation(convId, { name }); - - const convIndex = this.conversations.findIndex((c) => c.id === convId); - - if (convIndex !== -1) { - this.conversations[convIndex].name = name; - } - - if (this.activeConversation?.id === convId) { - this.activeConversation.name = name; - } - } catch (error) { - console.error('Failed to update conversation name:', error); - } - } - - /** - * Sets the callback function for title update confirmations - * @param callback - Function to call when confirmation is needed - */ - setTitleUpdateConfirmationCallback( - callback: (currentTitle: string, newTitle: string) => Promise - ): void { - this.titleUpdateConfirmationCallback = callback; - } - - /** - * Updates conversation title with optional confirmation dialog based on settings - * @param convId - The conversation ID to update - * @param newTitle - The new title content - * @param onConfirmationNeeded - Callback when user confirmation is needed - * @returns Promise - True if title was updated, false if cancelled - */ - async updateConversationTitleWithConfirmation( - convId: string, - newTitle: string, - onConfirmationNeeded?: (currentTitle: string, newTitle: string) => Promise - ): Promise { - try { - const currentConfig = config(); - - if (currentConfig.askForTitleConfirmation && onConfirmationNeeded) { - const conversation = await DatabaseStore.getConversation(convId); - if (!conversation) return false; - - const shouldUpdate = await onConfirmationNeeded(conversation.name, newTitle); - if (!shouldUpdate) return false; - } - - await this.updateConversationName(convId, newTitle); - return true; - } catch (error) { - console.error('Failed to update conversation title with confirmation:', error); - return false; - } - } - - /** - * Downloads a conversation as JSON file - * @param convId - The conversation ID to download - */ - async downloadConversation(convId: string): Promise { - if (!this.activeConversation || this.activeConversation.id !== convId) { - // Load the conversation if not currently active - const conversation = await DatabaseStore.getConversation(convId); - if (!conversation) return; - - const messages = await DatabaseStore.getConversationMessages(convId); - const conversationData = { - conv: conversation, - messages - }; - - this.triggerDownload(conversationData); - } else { - // Use current active conversation data - const conversationData: ExportedConversations = { - conv: this.activeConversation!, - messages: this.activeMessages - }; - - this.triggerDownload(conversationData); + lastMessage.content = this.currentResponse; + console.error('Failed to save partial response:', error); + } } } - /** - * Triggers file download in browser - * @param data - Data to download (expected: { conv: DatabaseConversation, messages: DatabaseMessage[] }) - * @param filename - Optional filename - */ - private triggerDownload(data: ExportedConversations, filename?: string): void { - const conversation = - 'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined; - if (!conversation) { - console.error('Invalid data: missing conversation'); - return; - } - const conversationName = conversation.name ? conversation.name.trim() : ''; - const convId = conversation.id || 'unknown'; - const truncatedSuffix = conversationName - .toLowerCase() - .replace(/[^a-z0-9]/gi, '_') - .replace(/_+/g, '_') - .substring(0, 20); - const downloadFilename = filename || `conversation_${convId}_${truncatedSuffix}.json`; - - const conversationJson = JSON.stringify(data, null, 2); - const blob = new Blob([conversationJson], { - type: 'application/json' - }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = downloadFilename; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - } + async updateMessage(messageId: string, newContent: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; + if (this.isLoading) this.stopGeneration(); - /** - * Exports all conversations with their messages as a JSON file - * Returns the list of exported conversations - */ - async exportAllConversations(): Promise { try { - const allConversations = await DatabaseStore.getAllConversations(); - if (allConversations.length === 0) { - throw new Error('No conversations to export'); - } - - const allData: ExportedConversations = await Promise.all( - allConversations.map(async (conv) => { - const messages = await DatabaseStore.getConversationMessages(conv.id); - return { conv, messages }; - }) - ); - - const blob = new Blob([JSON.stringify(allData, null, 2)], { - type: 'application/json' - }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = `all_conversations_${new Date().toISOString().split('T')[0]}.json`; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - - toast.success(`All conversations (${allConversations.length}) prepared for download`); - return allConversations; - } catch (err) { - console.error('Failed to export conversations:', err); - throw err; - } - } - - /** - * Imports conversations from a JSON file. - * Supports both single conversation (object) and multiple conversations (array). - * Uses DatabaseStore for safe, encapsulated data access - * Returns the list of imported conversations - */ - async importConversations(): Promise { - return new Promise((resolve, reject) => { - const input = document.createElement('input'); - input.type = 'file'; - input.accept = '.json'; - - input.onchange = async (e) => { - const file = (e.target as HTMLInputElement)?.files?.[0]; - if (!file) { - reject(new Error('No file selected')); - return; - } + const messageIndex = conversationsStore.findMessageIndex(messageId); + if (messageIndex === -1) return; - try { - const text = await file.text(); - const parsedData = JSON.parse(text); - let importedData: ExportedConversations; - - if (Array.isArray(parsedData)) { - importedData = parsedData; - } else if ( - parsedData && - typeof parsedData === 'object' && - 'conv' in parsedData && - 'messages' in parsedData - ) { - // Single conversation object - importedData = [parsedData]; - } else { - throw new Error( - 'Invalid file format: expected array of conversations or single conversation object' - ); - } + const messageToUpdate = conversationsStore.activeMessages[messageIndex]; + const originalContent = messageToUpdate.content; + if (messageToUpdate.role !== 'user') return; - const result = await DatabaseStore.importConversations(importedData); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + const isFirstUserMessage = rootMessage && messageToUpdate.parent === rootMessage.id; - // Refresh UI - await this.loadConversations(); + conversationsStore.updateMessageAtIndex(messageIndex, { content: newContent }); + await DatabaseService.updateMessage(messageId, { content: newContent }); - toast.success(`Imported ${result.imported} conversation(s), skipped ${result.skipped}`); + if (isFirstUserMessage && newContent.trim()) { + await conversationsStore.updateConversationTitleWithConfirmation( + activeConv.id, + newContent.trim(), + conversationsStore.titleUpdateConfirmationCallback + ); + } - // Extract the conversation objects from imported data - const importedConversations = importedData.map((item) => item.conv); - resolve(importedConversations); - } catch (err: unknown) { - const message = err instanceof Error ? err.message : 'Unknown error'; - console.error('Failed to import conversations:', err); - toast.error('Import failed', { - description: message + const messagesToRemove = conversationsStore.activeMessages.slice(messageIndex + 1); + for (const message of messagesToRemove) await DatabaseService.deleteMessage(message.id); + conversationsStore.sliceActiveMessages(messageIndex + 1); + conversationsStore.updateConversationTimestamp(); + + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); + + const assistantMessage = await this.createAssistantMessage(); + if (!assistantMessage) throw new Error('Failed to create assistant message'); + conversationsStore.addMessageToActive(assistantMessage); + await conversationsStore.updateCurrentNode(assistantMessage.id); + await this.streamChatCompletion( + conversationsStore.activeMessages.slice(0, -1), + assistantMessage, + undefined, + () => { + conversationsStore.updateMessageAtIndex(conversationsStore.findMessageIndex(messageId), { + content: originalContent }); - reject(new Error(`Import failed: ${message}`)); } - }; - - input.click(); - }); + ); + } catch (error) { + if (!this.isAbortError(error)) console.error('Failed to update message:', error); + } } - /** - * Deletes a conversation and all its messages - * @param convId - The conversation ID to delete - */ - async deleteConversation(convId: string): Promise { - try { - await DatabaseStore.deleteConversation(convId); + // ───────────────────────────────────────────────────────────────────────────── + // Regeneration + // ───────────────────────────────────────────────────────────────────────────── - this.conversations = this.conversations.filter((c) => c.id !== convId); + async regenerateMessage(messageId: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; - if (this.activeConversation?.id === convId) { - this.activeConversation = null; - this.activeMessages = []; - await goto(`?new_chat=true#/`); - } + try { + const messageIndex = conversationsStore.findMessageIndex(messageId); + if (messageIndex === -1) return; + const messageToRegenerate = conversationsStore.activeMessages[messageIndex]; + if (messageToRegenerate.role !== 'assistant') return; + + const messagesToRemove = conversationsStore.activeMessages.slice(messageIndex); + for (const message of messagesToRemove) await DatabaseService.deleteMessage(message.id); + conversationsStore.sliceActiveMessages(messageIndex); + conversationsStore.updateConversationTimestamp(); + + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); + + const parentMessageId = + conversationsStore.activeMessages.length > 0 + ? conversationsStore.activeMessages[conversationsStore.activeMessages.length - 1].id + : undefined; + const assistantMessage = await this.createAssistantMessage(parentMessageId); + if (!assistantMessage) throw new Error('Failed to create assistant message'); + conversationsStore.addMessageToActive(assistantMessage); + await this.streamChatCompletion( + conversationsStore.activeMessages.slice(0, -1), + assistantMessage + ); } catch (error) { - console.error('Failed to delete conversation:', error); + if (!this.isAbortError(error)) console.error('Failed to regenerate message:', error); + this.setChatLoading(activeConv?.id || '', false); } } - /** - * Gets information about what messages will be deleted when deleting a specific message - * @param messageId - The ID of the message to be deleted - * @returns Object with deletion info including count and types of messages - */ async getDeletionInfo(messageId: string): Promise<{ totalCount: number; userMessages: number; assistantMessages: number; messageTypes: string[]; }> { - if (!this.activeConversation) { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return { totalCount: 0, userMessages: 0, assistantMessages: 0, messageTypes: [] }; - } - - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const descendants = findDescendantMessages(allMessages, messageId); const allToDelete = [messageId, ...descendants]; - const messagesToDelete = allMessages.filter((m) => allToDelete.includes(m.id)); - - let userMessages = 0; - let assistantMessages = 0; + let userMessages = 0, + assistantMessages = 0; const messageTypes: string[] = []; - for (const msg of messagesToDelete) { if (msg.role === 'user') { userMessages++; @@ -1278,409 +814,187 @@ class ChatStore { if (!messageTypes.includes('assistant response')) messageTypes.push('assistant response'); } } - - return { - totalCount: allToDelete.length, - userMessages, - assistantMessages, - messageTypes - }; + return { totalCount: allToDelete.length, userMessages, assistantMessages, messageTypes }; } - /** - * Deletes a message and all its descendants, updating conversation path if needed - * @param messageId - The ID of the message to delete - */ async deleteMessage(messageId: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; try { - if (!this.activeConversation) return; - - // Get all messages to find siblings before deletion - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const messageToDelete = allMessages.find((m) => m.id === messageId); + if (!messageToDelete) return; - if (!messageToDelete) { - console.error('Message to delete not found'); - return; - } - - // Check if the deleted message is in the current conversation path - const currentPath = filterByLeafNodeId( - allMessages, - this.activeConversation.currNode || '', - false - ); + const currentPath = filterByLeafNodeId(allMessages, activeConv.currNode || '', false); const isInCurrentPath = currentPath.some((m) => m.id === messageId); - // If the deleted message is in the current path, we need to update currNode if (isInCurrentPath && messageToDelete.parent) { - // Find all siblings (messages with same parent) const siblings = allMessages.filter( (m) => m.parent === messageToDelete.parent && m.id !== messageId ); - if (siblings.length > 0) { - // Find the latest sibling (highest timestamp) const latestSibling = siblings.reduce((latest, sibling) => sibling.timestamp > latest.timestamp ? sibling : latest ); - - // Find the leaf node for this sibling branch to get the complete conversation path - const leafNodeId = findLeafNode(allMessages, latestSibling.id); - - // Update conversation to use the leaf node of the latest remaining sibling - await DatabaseStore.updateCurrentNode(this.activeConversation.id, leafNodeId); - this.activeConversation.currNode = leafNodeId; - } else { - // No siblings left, navigate to parent if it exists - if (messageToDelete.parent) { - const parentLeafId = findLeafNode(allMessages, messageToDelete.parent); - await DatabaseStore.updateCurrentNode(this.activeConversation.id, parentLeafId); - this.activeConversation.currNode = parentLeafId; - } + await conversationsStore.updateCurrentNode(findLeafNode(allMessages, latestSibling.id)); + } else if (messageToDelete.parent) { + await conversationsStore.updateCurrentNode( + findLeafNode(allMessages, messageToDelete.parent) + ); } } - - // Use cascading deletion to remove the message and all its descendants - await DatabaseStore.deleteMessageCascading(this.activeConversation.id, messageId); - - // Refresh active messages to show the updated branch - await this.refreshActiveMessages(); - - // Update conversation timestamp - this.updateConversationTimestamp(); + await DatabaseService.deleteMessageCascading(activeConv.id, messageId); + await conversationsStore.refreshActiveMessages(); + conversationsStore.updateConversationTimestamp(); } catch (error) { console.error('Failed to delete message:', error); } } - /** - * Clears the active conversation and messages - * Used when navigating away from chat or starting fresh - * Note: Does not stop ongoing streaming to allow background completion - */ - clearActiveConversation(): void { - this.activeConversation = null; - this.activeMessages = []; - this.isLoading = false; - this.currentResponse = ''; - slotsService.setActiveConversation(null); - } - - /** Refreshes active messages based on currNode after branch navigation */ - async refreshActiveMessages(): Promise { - if (!this.activeConversation) return; - - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); - if (allMessages.length === 0) { - this.activeMessages = []; - return; - } - - const leafNodeId = - this.activeConversation.currNode || - allMessages.reduce((latest, msg) => (msg.timestamp > latest.timestamp ? msg : latest)).id; - - const currentPath = filterByLeafNodeId(allMessages, leafNodeId, false) as DatabaseMessage[]; - - this.activeMessages.length = 0; - this.activeMessages.push(...currentPath); - } - - /** - * Navigates to a specific sibling branch by updating currNode and refreshing messages - * @param siblingId - The sibling message ID to navigate to - */ - async navigateToSibling(siblingId: string): Promise { - if (!this.activeConversation) return; - - // Get the current first user message before navigation - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); - const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - const currentFirstUserMessage = this.activeMessages.find( - (m) => m.role === 'user' && m.parent === rootMessage?.id - ); - - const currentLeafNodeId = findLeafNode(allMessages, siblingId); - - await DatabaseStore.updateCurrentNode(this.activeConversation.id, currentLeafNodeId); - this.activeConversation.currNode = currentLeafNodeId; - await this.refreshActiveMessages(); - - // Only show title dialog if we're navigating between different first user message siblings - if (rootMessage && this.activeMessages.length > 0) { - // Find the first user message in the new active path - const newFirstUserMessage = this.activeMessages.find( - (m) => m.role === 'user' && m.parent === rootMessage.id - ); - - // Only show dialog if: - // 1. We have a new first user message - // 2. It's different from the previous one (different ID or content) - // 3. The new message has content - if ( - newFirstUserMessage && - newFirstUserMessage.content.trim() && - (!currentFirstUserMessage || - newFirstUserMessage.id !== currentFirstUserMessage.id || - newFirstUserMessage.content.trim() !== currentFirstUserMessage.content.trim()) - ) { - await this.updateConversationTitleWithConfirmation( - this.activeConversation.id, - newFirstUserMessage.content.trim(), - this.titleUpdateConfirmationCallback - ); - } - } - } + // ───────────────────────────────────────────────────────────────────────────── + // Editing + // ───────────────────────────────────────────────────────────────────────────── - /** - * Edits an assistant message with optional branching - * @param messageId - The ID of the assistant message to edit - * @param newContent - The new content for the message - * @param shouldBranch - Whether to create a branch or replace in-place - */ async editAssistantMessage( messageId: string, newContent: string, shouldBranch: boolean ): Promise { - if (!this.activeConversation || this.isLoading) return; - + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; try { - const messageIndex = this.findMessageIndex(messageId); - - if (messageIndex === -1) { - console.error('Message not found for editing'); - return; - } - - const messageToEdit = this.activeMessages[messageIndex]; - - if (messageToEdit.role !== 'assistant') { - console.error('Only assistant messages can be edited with this method'); - return; - } + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'assistant') return; if (shouldBranch) { - const newMessage = await DatabaseStore.createMessageBranch( + const newMessage = await DatabaseService.createMessageBranch( { - convId: messageToEdit.convId, - type: messageToEdit.type, + convId: msg.convId, + type: msg.type, timestamp: Date.now(), - role: messageToEdit.role, + role: msg.role, content: newContent, - thinking: messageToEdit.thinking || '', - toolCalls: messageToEdit.toolCalls || '', + thinking: msg.thinking || '', + toolCalls: msg.toolCalls || '', children: [], - model: messageToEdit.model // Preserve original model info when branching + model: msg.model }, - messageToEdit.parent! + msg.parent! ); - - await DatabaseStore.updateCurrentNode(this.activeConversation.id, newMessage.id); - this.activeConversation.currNode = newMessage.id; + await conversationsStore.updateCurrentNode(newMessage.id); } else { - await DatabaseStore.updateMessage(messageToEdit.id, { - content: newContent, - timestamp: Date.now() - }); - - // Ensure currNode points to the edited message to maintain correct path - await DatabaseStore.updateCurrentNode(this.activeConversation.id, messageToEdit.id); - this.activeConversation.currNode = messageToEdit.id; - - this.updateMessageAtIndex(messageIndex, { + await DatabaseService.updateMessage(msg.id, { content: newContent, timestamp: Date.now() }); + await conversationsStore.updateCurrentNode(msg.id); + conversationsStore.updateMessageAtIndex(idx, { content: newContent, timestamp: Date.now() }); } - - this.updateConversationTimestamp(); - await this.refreshActiveMessages(); + conversationsStore.updateConversationTimestamp(); + await conversationsStore.refreshActiveMessages(); } catch (error) { console.error('Failed to edit assistant message:', error); } } - /** - * Edits a user message and preserves all responses below - * Updates the message content in-place without deleting or regenerating responses - * - * **Use Case**: When you want to fix a typo or rephrase a question without losing the assistant's response - * - * **Important Behavior:** - * - Does NOT create a branch (unlike editMessageWithBranching) - * - Does NOT regenerate assistant responses - * - Only updates the user message content in the database - * - Preserves the entire conversation tree below the edited message - * - Updates conversation title if this is the first user message - * - * @param messageId - The ID of the user message to edit - * @param newContent - The new content for the message - */ async editUserMessagePreserveResponses(messageId: string, newContent: string): Promise { - if (!this.activeConversation) return; - + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for editing'); - return; - } - - const messageToEdit = this.activeMessages[messageIndex]; - if (messageToEdit.role !== 'user') { - console.error('Only user messages can be edited with this method'); - return; - } - - // Simply update the message content in-place - await DatabaseStore.updateMessage(messageId, { - content: newContent, - timestamp: Date.now() - }); + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'user') return; - this.updateMessageAtIndex(messageIndex, { + await DatabaseService.updateMessage(messageId, { content: newContent, timestamp: Date.now() }); + conversationsStore.updateMessageAtIndex(idx, { content: newContent, timestamp: Date.now() }); - // Check if first user message for title update - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - const isFirstUserMessage = - rootMessage && messageToEdit.parent === rootMessage.id && messageToEdit.role === 'user'; - - if (isFirstUserMessage && newContent.trim()) { - await this.updateConversationTitleWithConfirmation( - this.activeConversation.id, + if (rootMessage && msg.parent === rootMessage.id && newContent.trim()) { + await conversationsStore.updateConversationTitleWithConfirmation( + activeConv.id, newContent.trim(), - this.titleUpdateConfirmationCallback + conversationsStore.titleUpdateConfirmationCallback ); } - - this.updateConversationTimestamp(); + conversationsStore.updateConversationTimestamp(); } catch (error) { console.error('Failed to edit user message:', error); } } - /** - * Edits a message by creating a new branch with the edited content - * @param messageId - The ID of the message to edit - * @param newContent - The new content for the message - */ async editMessageWithBranching(messageId: string, newContent: string): Promise { - if (!this.activeConversation || this.isLoading) return; - + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for editing'); - return; - } - - const messageToEdit = this.activeMessages[messageIndex]; - if (messageToEdit.role !== 'user') { - console.error('Only user messages can be edited'); - return; - } + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'user') return; - // Check if this is the first user message in the conversation - // First user message is one that has the root message as its parent - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - const isFirstUserMessage = - rootMessage && messageToEdit.parent === rootMessage.id && messageToEdit.role === 'user'; - - let parentId = messageToEdit.parent; - - if (parentId === undefined || parentId === null) { - const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); - if (rootMessage) { - parentId = rootMessage.id; - } else { - console.error('No root message found for editing'); - return; - } - } + const isFirstUserMessage = rootMessage && msg.parent === rootMessage.id; + const parentId = msg.parent || rootMessage?.id; + if (!parentId) return; - const newMessage = await DatabaseStore.createMessageBranch( + const newMessage = await DatabaseService.createMessageBranch( { - convId: messageToEdit.convId, - type: messageToEdit.type, + convId: msg.convId, + type: msg.type, timestamp: Date.now(), - role: messageToEdit.role, + role: msg.role, content: newContent, - thinking: messageToEdit.thinking || '', - toolCalls: messageToEdit.toolCalls || '', + thinking: msg.thinking || '', + toolCalls: msg.toolCalls || '', children: [], - extra: messageToEdit.extra ? JSON.parse(JSON.stringify(messageToEdit.extra)) : undefined, - model: messageToEdit.model // Preserve original model info when branching + extra: msg.extra ? JSON.parse(JSON.stringify(msg.extra)) : undefined, + model: msg.model }, parentId ); + await conversationsStore.updateCurrentNode(newMessage.id); + conversationsStore.updateConversationTimestamp(); - await DatabaseStore.updateCurrentNode(this.activeConversation.id, newMessage.id); - this.activeConversation.currNode = newMessage.id; - this.updateConversationTimestamp(); - - // If this is the first user message, update the conversation title with confirmation if needed if (isFirstUserMessage && newContent.trim()) { - await this.updateConversationTitleWithConfirmation( - this.activeConversation.id, + await conversationsStore.updateConversationTitleWithConfirmation( + activeConv.id, newContent.trim(), - this.titleUpdateConfirmationCallback + conversationsStore.titleUpdateConfirmationCallback ); } - - await this.refreshActiveMessages(); - - if (messageToEdit.role === 'user') { - await this.generateResponseForMessage(newMessage.id); - } + await conversationsStore.refreshActiveMessages(); + await this.generateResponseForMessage(newMessage.id); } catch (error) { console.error('Failed to edit message with branching:', error); } } - /** - * Regenerates an assistant message by creating a new branch with a new response - * @param messageId - The ID of the assistant message to regenerate - */ - async regenerateMessageWithBranching(messageId: string): Promise { - if (!this.activeConversation || this.isLoading) return; - + async regenerateMessageWithBranching(messageId: string, modelOverride?: string): Promise { + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for regeneration'); - return; - } - - const messageToRegenerate = this.activeMessages[messageIndex]; - if (messageToRegenerate.role !== 'assistant') { - console.error('Only assistant messages can be regenerated'); - return; - } + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'assistant') return; - // Find parent message in all conversation messages, not just active path - const conversationMessages = await DatabaseStore.getConversationMessages( - this.activeConversation.id - ); - const parentMessage = conversationMessages.find((m) => m.id === messageToRegenerate.parent); - if (!parentMessage) { - console.error('Parent message not found for regeneration'); - return; - } + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); + const parentMessage = allMessages.find((m) => m.id === msg.parent); + if (!parentMessage) return; - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); - const newAssistantMessage = await DatabaseStore.createMessageBranch( + const newAssistantMessage = await DatabaseService.createMessageBranch( { - convId: this.activeConversation.id, + convId: activeConv.id, type: 'text', timestamp: Date.now(), role: 'assistant', @@ -1692,54 +1006,49 @@ class ChatStore { }, parentMessage.id ); + await conversationsStore.updateCurrentNode(newAssistantMessage.id); + conversationsStore.updateConversationTimestamp(); + await conversationsStore.refreshActiveMessages(); - await DatabaseStore.updateCurrentNode(this.activeConversation.id, newAssistantMessage.id); - this.activeConversation.currNode = newAssistantMessage.id; - this.updateConversationTimestamp(); - await this.refreshActiveMessages(); - - const allConversationMessages = await DatabaseStore.getConversationMessages( - this.activeConversation.id - ); const conversationPath = filterByLeafNodeId( - allConversationMessages, + allMessages, parentMessage.id, false ) as DatabaseMessage[]; - - await this.streamChatCompletion(conversationPath, newAssistantMessage); + // Use modelOverride if provided, otherwise use the original message's model + // If neither is available, don't pass model (will use global selection) + const modelToUse = modelOverride || msg.model || undefined; + await this.streamChatCompletion( + conversationPath, + newAssistantMessage, + undefined, + undefined, + modelToUse + ); } catch (error) { - if (this.isAbortError(error)) return; - - console.error('Failed to regenerate message with branching:', error); - this.setConversationLoading(this.activeConversation!.id, false); + if (!this.isAbortError(error)) + console.error('Failed to regenerate message with branching:', error); + this.setChatLoading(activeConv?.id || '', false); } } - /** - * Generates a new assistant response for a given user message - * @param userMessageId - ID of user message to respond to - */ private async generateResponseForMessage(userMessageId: string): Promise { - if (!this.activeConversation) return; - + const activeConv = conversationsStore.activeConversation; + if (!activeConv) return; this.errorDialogState = null; - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); try { - // Get conversation path up to the user message - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const conversationPath = filterByLeafNodeId( allMessages, userMessageId, false ) as DatabaseMessage[]; - - // Create new assistant message branch - const assistantMessage = await DatabaseStore.createMessageBranch( + const assistantMessage = await DatabaseService.createMessageBranch( { - convId: this.activeConversation.id, + convId: activeConv.id, type: 'text', timestamp: Date.now(), role: 'assistant', @@ -1751,120 +1060,83 @@ class ChatStore { }, userMessageId ); - - // Add assistant message to active messages immediately for UI reactivity - this.activeMessages.push(assistantMessage); - - // Stream response to new assistant message + conversationsStore.addMessageToActive(assistantMessage); await this.streamChatCompletion(conversationPath, assistantMessage); } catch (error) { console.error('Failed to generate response:', error); - this.setConversationLoading(this.activeConversation!.id, false); + this.setChatLoading(activeConv.id, false); } } - /** - * Continues generation for an existing assistant message - * @param messageId - The ID of the assistant message to continue - */ async continueAssistantMessage(messageId: string): Promise { - if (!this.activeConversation || this.isLoading) return; - + const activeConv = conversationsStore.activeConversation; + if (!activeConv || this.isLoading) return; try { - const messageIndex = this.findMessageIndex(messageId); - if (messageIndex === -1) { - console.error('Message not found for continuation'); - return; - } - - const messageToContinue = this.activeMessages[messageIndex]; - if (messageToContinue.role !== 'assistant') { - console.error('Only assistant messages can be continued'); - return; - } - - // Race condition protection: Check if this specific conversation is already loading - // This prevents multiple rapid clicks on "Continue" from creating concurrent operations - if (this.isConversationLoading(this.activeConversation.id)) { - console.warn('Continuation already in progress for this conversation'); - return; - } + const idx = conversationsStore.findMessageIndex(messageId); + if (idx === -1) return; + const msg = conversationsStore.activeMessages[idx]; + if (msg.role !== 'assistant') return; + if (this.isChatLoading(activeConv.id)) return; this.errorDialogState = null; - this.setConversationLoading(this.activeConversation.id, true); - this.clearConversationStreaming(this.activeConversation.id); + this.setChatLoading(activeConv.id, true); + this.clearChatStreaming(activeConv.id); - // IMPORTANT: Fetch the latest content from the database to ensure we have - // the most up-to-date content, especially after a stopped generation - // This prevents issues where the in-memory state might be stale - const allMessages = await DatabaseStore.getConversationMessages(this.activeConversation.id); + const allMessages = await conversationsStore.getConversationMessages(activeConv.id); const dbMessage = allMessages.find((m) => m.id === messageId); - if (!dbMessage) { - console.error('Message not found in database for continuation'); - this.setConversationLoading(this.activeConversation.id, false); - + this.setChatLoading(activeConv.id, false); return; } - // Use content from database as the source of truth const originalContent = dbMessage.content; const originalThinking = dbMessage.thinking || ''; - - // Get conversation context up to (but not including) the message to continue - const conversationContext = this.activeMessages.slice(0, messageIndex); - + const conversationContext = conversationsStore.activeMessages.slice(0, idx); const contextWithContinue = [ - ...conversationContext.map((msg) => { - if ('id' in msg && 'convId' in msg && 'timestamp' in msg) { - return msg as DatabaseMessage & { extra?: DatabaseMessageExtra[] }; - } - return msg as ApiChatMessageData; - }), - { - role: 'assistant' as const, - content: originalContent - } + ...conversationContext, + { role: 'assistant' as const, content: originalContent } ]; - let appendedContent = ''; - let appendedThinking = ''; - let hasReceivedContent = false; + let appendedContent = '', + appendedThinking = '', + hasReceivedContent = false; + + const abortController = this.getOrCreateAbortController(msg.convId); - await chatService.sendMessage( + await ChatService.sendMessage( contextWithContinue, { ...this.getApiOptions(), - onChunk: (chunk: string) => { hasReceivedContent = true; appendedContent += chunk; - // Preserve originalContent exactly as-is, including any trailing whitespace - // The concatenation naturally preserves any whitespace at the end of originalContent const fullContent = originalContent + appendedContent; - - this.setConversationStreaming( - messageToContinue.convId, - fullContent, - messageToContinue.id - ); - - this.updateMessageAtIndex(messageIndex, { - content: fullContent - }); + this.setChatStreaming(msg.convId, fullContent, msg.id); + conversationsStore.updateMessageAtIndex(idx, { content: fullContent }); }, - onReasoningChunk: (reasoningChunk: string) => { hasReceivedContent = true; appendedThinking += reasoningChunk; - - const fullThinking = originalThinking + appendedThinking; - - this.updateMessageAtIndex(messageIndex, { - thinking: fullThinking + conversationsStore.updateMessageAtIndex(idx, { + thinking: originalThinking + appendedThinking }); }, - + onTimings: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => { + const tokensPerSecond = + timings?.predicted_ms && timings?.predicted_n + ? (timings.predicted_n / timings.predicted_ms) * 1000 + : 0; + this.updateProcessingStateFromTimings( + { + prompt_n: timings?.prompt_n || 0, + predicted_n: timings?.predicted_n || 0, + predicted_per_second: tokensPerSecond, + cache_n: timings?.cache_n || 0, + prompt_progress: promptProgress + }, + msg.convId + ); + }, onComplete: async ( finalContent?: string, reasoningContent?: string, @@ -1872,158 +1144,152 @@ class ChatStore { ) => { const fullContent = originalContent + (finalContent || appendedContent); const fullThinking = originalThinking + (reasoningContent || appendedThinking); - - const updateData: { - content: string; - thinking: string; - timestamp: number; - timings?: ChatMessageTimings; - } = { + await DatabaseService.updateMessage(msg.id, { content: fullContent, thinking: fullThinking, timestamp: Date.now(), - timings: timings - }; - - await DatabaseStore.updateMessage(messageToContinue.id, updateData); - - this.updateMessageAtIndex(messageIndex, updateData); - - this.updateConversationTimestamp(); - - this.setConversationLoading(messageToContinue.convId, false); - this.clearConversationStreaming(messageToContinue.convId); - slotsService.clearConversationState(messageToContinue.convId); + timings + }); + conversationsStore.updateMessageAtIndex(idx, { + content: fullContent, + thinking: fullThinking, + timestamp: Date.now(), + timings + }); + conversationsStore.updateConversationTimestamp(); + this.setChatLoading(msg.convId, false); + this.clearChatStreaming(msg.convId); + this.clearProcessingState(msg.convId); }, - onError: async (error: Error) => { if (this.isAbortError(error)) { - // User cancelled - save partial continuation if any content was received if (hasReceivedContent && appendedContent) { - const partialContent = originalContent + appendedContent; - const partialThinking = originalThinking + appendedThinking; - - await DatabaseStore.updateMessage(messageToContinue.id, { - content: partialContent, - thinking: partialThinking, + await DatabaseService.updateMessage(msg.id, { + content: originalContent + appendedContent, + thinking: originalThinking + appendedThinking, timestamp: Date.now() }); - - this.updateMessageAtIndex(messageIndex, { - content: partialContent, - thinking: partialThinking, + conversationsStore.updateMessageAtIndex(idx, { + content: originalContent + appendedContent, + thinking: originalThinking + appendedThinking, timestamp: Date.now() }); } - - this.setConversationLoading(messageToContinue.convId, false); - this.clearConversationStreaming(messageToContinue.convId); - slotsService.clearConversationState(messageToContinue.convId); - + this.setChatLoading(msg.convId, false); + this.clearChatStreaming(msg.convId); + this.clearProcessingState(msg.convId); return; } - - // Non-abort error - rollback to original content console.error('Continue generation error:', error); - - // Rollback: Restore original content in UI - this.updateMessageAtIndex(messageIndex, { + conversationsStore.updateMessageAtIndex(idx, { content: originalContent, thinking: originalThinking }); - - // Ensure database has original content (in case of partial writes) - await DatabaseStore.updateMessage(messageToContinue.id, { + await DatabaseService.updateMessage(msg.id, { content: originalContent, thinking: originalThinking }); - - this.setConversationLoading(messageToContinue.convId, false); - this.clearConversationStreaming(messageToContinue.convId); - slotsService.clearConversationState(messageToContinue.convId); - - const dialogType = error.name === 'TimeoutError' ? 'timeout' : 'server'; - this.showErrorDialog(dialogType, error.message); + this.setChatLoading(msg.convId, false); + this.clearChatStreaming(msg.convId); + this.clearProcessingState(msg.convId); + this.showErrorDialog( + error.name === 'TimeoutError' ? 'timeout' : 'server', + error.message + ); } }, - messageToContinue.convId + msg.convId, + abortController.signal ); } catch (error) { - if (this.isAbortError(error)) return; - console.error('Failed to continue message:', error); - if (this.activeConversation) { - this.setConversationLoading(this.activeConversation.id, false); - } + if (!this.isAbortError(error)) console.error('Failed to continue message:', error); + if (activeConv) this.setChatLoading(activeConv.id, false); } } - /** - * Public methods for accessing per-conversation states - */ - public isConversationLoadingPublic(convId: string): boolean { - return this.isConversationLoading(convId); + public isChatLoadingPublic(convId: string): boolean { + return this.isChatLoading(convId); } - - public getConversationStreamingPublic( + public getChatStreamingPublic( convId: string ): { response: string; messageId: string } | undefined { - return this.getConversationStreaming(convId); + return this.getChatStreaming(convId); } - - public getAllLoadingConversations(): string[] { - return Array.from(this.conversationLoadingStates.keys()); + public getAllLoadingChats(): string[] { + return Array.from(this.chatLoadingStates.keys()); + } + public getAllStreamingChats(): string[] { + return Array.from(this.chatStreamingStates.keys()); } - public getAllStreamingConversations(): string[] { - return Array.from(this.conversationStreamingStates.keys()); + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + private getApiOptions(): Record { + const currentConfig = config(); + const hasValue = (value: unknown): boolean => + value !== undefined && value !== null && value !== ''; + + const apiOptions: Record = { stream: true, timings_per_token: true }; + + // Model selection (required in ROUTER mode) + if (isRouterMode()) { + const modelName = selectedModelName(); + if (modelName) apiOptions.model = modelName; + } + + // Config options needed by ChatService + if (currentConfig.systemMessage) apiOptions.systemMessage = currentConfig.systemMessage; + if (currentConfig.disableReasoningFormat) apiOptions.disableReasoningFormat = true; + + if (hasValue(currentConfig.temperature)) + apiOptions.temperature = Number(currentConfig.temperature); + if (hasValue(currentConfig.max_tokens)) + apiOptions.max_tokens = Number(currentConfig.max_tokens); + if (hasValue(currentConfig.dynatemp_range)) + apiOptions.dynatemp_range = Number(currentConfig.dynatemp_range); + if (hasValue(currentConfig.dynatemp_exponent)) + apiOptions.dynatemp_exponent = Number(currentConfig.dynatemp_exponent); + if (hasValue(currentConfig.top_k)) apiOptions.top_k = Number(currentConfig.top_k); + if (hasValue(currentConfig.top_p)) apiOptions.top_p = Number(currentConfig.top_p); + if (hasValue(currentConfig.min_p)) apiOptions.min_p = Number(currentConfig.min_p); + if (hasValue(currentConfig.xtc_probability)) + apiOptions.xtc_probability = Number(currentConfig.xtc_probability); + if (hasValue(currentConfig.xtc_threshold)) + apiOptions.xtc_threshold = Number(currentConfig.xtc_threshold); + if (hasValue(currentConfig.typ_p)) apiOptions.typ_p = Number(currentConfig.typ_p); + if (hasValue(currentConfig.repeat_last_n)) + apiOptions.repeat_last_n = Number(currentConfig.repeat_last_n); + if (hasValue(currentConfig.repeat_penalty)) + apiOptions.repeat_penalty = Number(currentConfig.repeat_penalty); + if (hasValue(currentConfig.presence_penalty)) + apiOptions.presence_penalty = Number(currentConfig.presence_penalty); + if (hasValue(currentConfig.frequency_penalty)) + apiOptions.frequency_penalty = Number(currentConfig.frequency_penalty); + if (hasValue(currentConfig.dry_multiplier)) + apiOptions.dry_multiplier = Number(currentConfig.dry_multiplier); + if (hasValue(currentConfig.dry_base)) apiOptions.dry_base = Number(currentConfig.dry_base); + if (hasValue(currentConfig.dry_allowed_length)) + apiOptions.dry_allowed_length = Number(currentConfig.dry_allowed_length); + if (hasValue(currentConfig.dry_penalty_last_n)) + apiOptions.dry_penalty_last_n = Number(currentConfig.dry_penalty_last_n); + if (currentConfig.samplers) apiOptions.samplers = currentConfig.samplers; + if (currentConfig.custom) apiOptions.custom = currentConfig.custom; + + return apiOptions; } } export const chatStore = new ChatStore(); -export const conversations = () => chatStore.conversations; -export const activeConversation = () => chatStore.activeConversation; -export const activeMessages = () => chatStore.activeMessages; export const isLoading = () => chatStore.isLoading; export const currentResponse = () => chatStore.currentResponse; -export const isInitialized = () => chatStore.isInitialized; export const errorDialog = () => chatStore.errorDialogState; +export const activeProcessingState = () => chatStore.activeProcessingState; +export const isChatStreaming = () => chatStore.isStreaming(); -export const createConversation = chatStore.createConversation.bind(chatStore); -export const downloadConversation = chatStore.downloadConversation.bind(chatStore); -export const exportAllConversations = chatStore.exportAllConversations.bind(chatStore); -export const importConversations = chatStore.importConversations.bind(chatStore); -export const deleteConversation = chatStore.deleteConversation.bind(chatStore); -export const sendMessage = chatStore.sendMessage.bind(chatStore); -export const dismissErrorDialog = chatStore.dismissErrorDialog.bind(chatStore); - -export const gracefulStop = chatStore.gracefulStop.bind(chatStore); - -// Branching operations -export const refreshActiveMessages = chatStore.refreshActiveMessages.bind(chatStore); -export const navigateToSibling = chatStore.navigateToSibling.bind(chatStore); -export const editAssistantMessage = chatStore.editAssistantMessage.bind(chatStore); -export const editMessageWithBranching = chatStore.editMessageWithBranching.bind(chatStore); -export const editUserMessagePreserveResponses = - chatStore.editUserMessagePreserveResponses.bind(chatStore); -export const regenerateMessageWithBranching = - chatStore.regenerateMessageWithBranching.bind(chatStore); -export const continueAssistantMessage = chatStore.continueAssistantMessage.bind(chatStore); -export const deleteMessage = chatStore.deleteMessage.bind(chatStore); -export const getDeletionInfo = chatStore.getDeletionInfo.bind(chatStore); -export const updateConversationName = chatStore.updateConversationName.bind(chatStore); -export const setTitleUpdateConfirmationCallback = - chatStore.setTitleUpdateConfirmationCallback.bind(chatStore); - -export function stopGeneration() { - chatStore.stopGeneration(); -} -export const messages = () => chatStore.activeMessages; - -// Per-conversation state access -export const isConversationLoading = (convId: string) => - chatStore.isConversationLoadingPublic(convId); -export const getConversationStreaming = (convId: string) => - chatStore.getConversationStreamingPublic(convId); -export const getAllLoadingConversations = () => chatStore.getAllLoadingConversations(); -export const getAllStreamingConversations = () => chatStore.getAllStreamingConversations(); +export const isChatLoading = (convId: string) => chatStore.isChatLoadingPublic(convId); +export const getChatStreaming = (convId: string) => chatStore.getChatStreamingPublic(convId); +export const getAllLoadingChats = () => chatStore.getAllLoadingChats(); +export const getAllStreamingChats = () => chatStore.getAllStreamingChats(); diff --git a/tools/server/webui/src/lib/stores/conversations.svelte.ts b/tools/server/webui/src/lib/stores/conversations.svelte.ts new file mode 100644 index 00000000000..44ef36d6ee5 --- /dev/null +++ b/tools/server/webui/src/lib/stores/conversations.svelte.ts @@ -0,0 +1,627 @@ +import { browser } from '$app/environment'; +import { goto } from '$app/navigation'; +import { toast } from 'svelte-sonner'; +import { DatabaseService } from '$lib/services/database'; +import { config } from '$lib/stores/settings.svelte'; +import { filterByLeafNodeId, findLeafNode } from '$lib/utils'; +import { AttachmentType } from '$lib/enums'; + +/** + * conversationsStore - Persistent conversation data and lifecycle management + * + * **Terminology - Chat vs Conversation:** + * - **Chat**: The active interaction space with the Chat Completions API. Represents the + * real-time streaming session, loading states, and UI visualization of AI communication. + * Managed by chatStore, a "chat" is ephemeral and exists during active AI interactions. + * - **Conversation**: The persistent database entity storing all messages and metadata. + * A "conversation" survives across sessions, page reloads, and browser restarts. + * It contains the complete message history, branching structure, and conversation metadata. + * + * This store manages all conversation-level data and operations including creation, loading, + * deletion, and navigation. It maintains the list of conversations and the currently active + * conversation with its message history, providing reactive state for UI components. + * + * **Architecture & Relationships:** + * - **conversationsStore** (this class): Persistent conversation data management + * - Manages conversation list and active conversation state + * - Handles conversation CRUD operations via DatabaseService + * - Maintains active message array for current conversation + * - Coordinates branching navigation (currNode tracking) + * + * - **chatStore**: Uses conversation data as context for active AI streaming + * - **DatabaseService**: Low-level IndexedDB storage for conversations and messages + * + * **Key Features:** + * - **Conversation Lifecycle**: Create, load, update, delete conversations + * - **Message Management**: Active message array with branching support + * - **Import/Export**: JSON-based conversation backup and restore + * - **Branch Navigation**: Navigate between message tree branches + * - **Title Management**: Auto-update titles with confirmation dialogs + * - **Reactive State**: Svelte 5 runes for automatic UI updates + * + * **State Properties:** + * - `conversations`: All conversations sorted by last modified + * - `activeConversation`: Currently viewed conversation + * - `activeMessages`: Messages in current conversation path + * - `isInitialized`: Store initialization status + */ +class ConversationsStore { + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── + + /** List of all conversations */ + conversations = $state([]); + + /** Currently active conversation */ + activeConversation = $state(null); + + /** Messages in the active conversation (filtered by currNode path) */ + activeMessages = $state([]); + + /** Whether the store has been initialized */ + isInitialized = $state(false); + + /** Callback for title update confirmation dialog */ + titleUpdateConfirmationCallback?: (currentTitle: string, newTitle: string) => Promise; + + // ───────────────────────────────────────────────────────────────────────────── + // Modalities + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Modalities used in the active conversation. + * Computed from attachments in activeMessages. + * Used to filter available models - models must support all used modalities. + */ + usedModalities: ModelModalities = $derived.by(() => { + return this.calculateModalitiesFromMessages(this.activeMessages); + }); + + /** + * Calculate modalities from a list of messages. + * Helper method used by both usedModalities and getModalitiesUpToMessage. + */ + private calculateModalitiesFromMessages(messages: DatabaseMessage[]): ModelModalities { + const modalities: ModelModalities = { vision: false, audio: false }; + + for (const message of messages) { + if (!message.extra) continue; + + for (const extra of message.extra) { + if (extra.type === AttachmentType.IMAGE) { + modalities.vision = true; + } + + // PDF only requires vision if processed as images + if (extra.type === AttachmentType.PDF) { + const pdfExtra = extra as DatabaseMessageExtraPdfFile; + + if (pdfExtra.processedAsImages) { + modalities.vision = true; + } + } + + if (extra.type === AttachmentType.AUDIO) { + modalities.audio = true; + } + } + + if (modalities.vision && modalities.audio) break; + } + + return modalities; + } + + /** + * Get modalities used in messages BEFORE the specified message. + * Used for regeneration - only consider context that was available when generating this message. + */ + getModalitiesUpToMessage(messageId: string): ModelModalities { + const messageIndex = this.activeMessages.findIndex((m) => m.id === messageId); + + if (messageIndex === -1) { + return this.usedModalities; + } + + const messagesBefore = this.activeMessages.slice(0, messageIndex); + return this.calculateModalitiesFromMessages(messagesBefore); + } + + constructor() { + if (browser) { + this.initialize(); + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Lifecycle + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Initializes the conversations store by loading conversations from the database + */ + async initialize(): Promise { + try { + await this.loadConversations(); + this.isInitialized = true; + } catch (error) { + console.error('Failed to initialize conversations store:', error); + } + } + + /** + * Loads all conversations from the database + */ + async loadConversations(): Promise { + this.conversations = await DatabaseService.getAllConversations(); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Conversation CRUD + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Creates a new conversation and navigates to it + * @param name - Optional name for the conversation + * @returns The ID of the created conversation + */ + async createConversation(name?: string): Promise { + const conversationName = name || `Chat ${new Date().toLocaleString()}`; + const conversation = await DatabaseService.createConversation(conversationName); + + this.conversations.unshift(conversation); + this.activeConversation = conversation; + this.activeMessages = []; + + await goto(`#/chat/${conversation.id}`); + + return conversation.id; + } + + /** + * Loads a specific conversation and its messages + * @param convId - The conversation ID to load + * @returns True if conversation was loaded successfully + */ + async loadConversation(convId: string): Promise { + try { + const conversation = await DatabaseService.getConversation(convId); + + if (!conversation) { + return false; + } + + this.activeConversation = conversation; + + if (conversation.currNode) { + const allMessages = await DatabaseService.getConversationMessages(convId); + this.activeMessages = filterByLeafNodeId( + allMessages, + conversation.currNode, + false + ) as DatabaseMessage[]; + } else { + this.activeMessages = await DatabaseService.getConversationMessages(convId); + } + + return true; + } catch (error) { + console.error('Failed to load conversation:', error); + return false; + } + } + + /** + * Clears the active conversation and messages + * Used when navigating away from chat or starting fresh + */ + clearActiveConversation(): void { + this.activeConversation = null; + this.activeMessages = []; + // Active processing conversation is now managed by chatStore + } + + // ───────────────────────────────────────────────────────────────────────────── + // Message Management + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Refreshes active messages based on currNode after branch navigation + */ + async refreshActiveMessages(): Promise { + if (!this.activeConversation) return; + + const allMessages = await DatabaseService.getConversationMessages(this.activeConversation.id); + + if (allMessages.length === 0) { + this.activeMessages = []; + return; + } + + const leafNodeId = + this.activeConversation.currNode || + allMessages.reduce((latest, msg) => (msg.timestamp > latest.timestamp ? msg : latest)).id; + + const currentPath = filterByLeafNodeId(allMessages, leafNodeId, false) as DatabaseMessage[]; + + this.activeMessages.length = 0; + this.activeMessages.push(...currentPath); + } + + /** + * Updates the name of a conversation + * @param convId - The conversation ID to update + * @param name - The new name for the conversation + */ + async updateConversationName(convId: string, name: string): Promise { + try { + await DatabaseService.updateConversation(convId, { name }); + + const convIndex = this.conversations.findIndex((c) => c.id === convId); + + if (convIndex !== -1) { + this.conversations[convIndex].name = name; + } + + if (this.activeConversation?.id === convId) { + this.activeConversation.name = name; + } + } catch (error) { + console.error('Failed to update conversation name:', error); + } + } + + /** + * Updates conversation title with optional confirmation dialog based on settings + * @param convId - The conversation ID to update + * @param newTitle - The new title content + * @param onConfirmationNeeded - Callback when user confirmation is needed + * @returns True if title was updated, false if cancelled + */ + async updateConversationTitleWithConfirmation( + convId: string, + newTitle: string, + onConfirmationNeeded?: (currentTitle: string, newTitle: string) => Promise + ): Promise { + try { + const currentConfig = config(); + + if (currentConfig.askForTitleConfirmation && onConfirmationNeeded) { + const conversation = await DatabaseService.getConversation(convId); + if (!conversation) return false; + + const shouldUpdate = await onConfirmationNeeded(conversation.name, newTitle); + if (!shouldUpdate) return false; + } + + await this.updateConversationName(convId, newTitle); + return true; + } catch (error) { + console.error('Failed to update conversation title with confirmation:', error); + return false; + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Navigation + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Updates the current node of the active conversation + * @param nodeId - The new current node ID + */ + async updateCurrentNode(nodeId: string): Promise { + if (!this.activeConversation) return; + + await DatabaseService.updateCurrentNode(this.activeConversation.id, nodeId); + this.activeConversation.currNode = nodeId; + } + + /** + * Updates conversation lastModified timestamp and moves it to top of list + */ + updateConversationTimestamp(): void { + if (!this.activeConversation) return; + + const chatIndex = this.conversations.findIndex((c) => c.id === this.activeConversation!.id); + + if (chatIndex !== -1) { + this.conversations[chatIndex].lastModified = Date.now(); + const updatedConv = this.conversations.splice(chatIndex, 1)[0]; + this.conversations.unshift(updatedConv); + } + } + + /** + * Navigates to a specific sibling branch by updating currNode and refreshing messages + * @param siblingId - The sibling message ID to navigate to + */ + async navigateToSibling(siblingId: string): Promise { + if (!this.activeConversation) return; + + const allMessages = await DatabaseService.getConversationMessages(this.activeConversation.id); + const rootMessage = allMessages.find((m) => m.type === 'root' && m.parent === null); + const currentFirstUserMessage = this.activeMessages.find( + (m) => m.role === 'user' && m.parent === rootMessage?.id + ); + + const currentLeafNodeId = findLeafNode(allMessages, siblingId); + + await DatabaseService.updateCurrentNode(this.activeConversation.id, currentLeafNodeId); + this.activeConversation.currNode = currentLeafNodeId; + await this.refreshActiveMessages(); + + // Only show title dialog if we're navigating between different first user message siblings + if (rootMessage && this.activeMessages.length > 0) { + const newFirstUserMessage = this.activeMessages.find( + (m) => m.role === 'user' && m.parent === rootMessage.id + ); + + if ( + newFirstUserMessage && + newFirstUserMessage.content.trim() && + (!currentFirstUserMessage || + newFirstUserMessage.id !== currentFirstUserMessage.id || + newFirstUserMessage.content.trim() !== currentFirstUserMessage.content.trim()) + ) { + await this.updateConversationTitleWithConfirmation( + this.activeConversation.id, + newFirstUserMessage.content.trim(), + this.titleUpdateConfirmationCallback + ); + } + } + } + + /** + * Deletes a conversation and all its messages + * @param convId - The conversation ID to delete + */ + async deleteConversation(convId: string): Promise { + try { + await DatabaseService.deleteConversation(convId); + + this.conversations = this.conversations.filter((c) => c.id !== convId); + + if (this.activeConversation?.id === convId) { + this.activeConversation = null; + this.activeMessages = []; + await goto(`?new_chat=true#/`); + } + } catch (error) { + console.error('Failed to delete conversation:', error); + } + } + + // ───────────────────────────────────────────────────────────────────────────── + // Import/Export + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Downloads a conversation as JSON file + * @param convId - The conversation ID to download + */ + async downloadConversation(convId: string): Promise { + let conversation: DatabaseConversation | null; + let messages: DatabaseMessage[]; + + if (this.activeConversation?.id === convId) { + conversation = this.activeConversation; + messages = this.activeMessages; + } else { + conversation = await DatabaseService.getConversation(convId); + if (!conversation) return; + messages = await DatabaseService.getConversationMessages(convId); + } + + this.triggerDownload({ conv: conversation, messages }); + } + + /** + * Exports all conversations with their messages as a JSON file + * @returns The list of exported conversations + */ + async exportAllConversations(): Promise { + const allConversations = await DatabaseService.getAllConversations(); + + if (allConversations.length === 0) { + throw new Error('No conversations to export'); + } + + const allData = await Promise.all( + allConversations.map(async (conv) => { + const messages = await DatabaseService.getConversationMessages(conv.id); + return { conv, messages }; + }) + ); + + const blob = new Blob([JSON.stringify(allData, null, 2)], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `all_conversations_${new Date().toISOString().split('T')[0]}.json`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + + toast.success(`All conversations (${allConversations.length}) prepared for download`); + + return allConversations; + } + + /** + * Imports conversations from a JSON file + * Opens file picker and processes the selected file + * @returns The list of imported conversations + */ + async importConversations(): Promise { + return new Promise((resolve, reject) => { + const input = document.createElement('input'); + input.type = 'file'; + input.accept = '.json'; + + input.onchange = async (e) => { + const file = (e.target as HTMLInputElement)?.files?.[0]; + + if (!file) { + reject(new Error('No file selected')); + return; + } + + try { + const text = await file.text(); + const parsedData = JSON.parse(text); + let importedData: ExportedConversations; + + if (Array.isArray(parsedData)) { + importedData = parsedData; + } else if ( + parsedData && + typeof parsedData === 'object' && + 'conv' in parsedData && + 'messages' in parsedData + ) { + importedData = [parsedData]; + } else { + throw new Error('Invalid file format'); + } + + const result = await DatabaseService.importConversations(importedData); + toast.success(`Imported ${result.imported} conversation(s), skipped ${result.skipped}`); + + await this.loadConversations(); + + const importedConversations = ( + Array.isArray(importedData) ? importedData : [importedData] + ).map((item) => item.conv); + + resolve(importedConversations); + } catch (err: unknown) { + const message = err instanceof Error ? err.message : 'Unknown error'; + console.error('Failed to import conversations:', err); + toast.error('Import failed', { description: message }); + reject(new Error(`Import failed: ${message}`)); + } + }; + + input.click(); + }); + } + + /** + * Gets all messages for a specific conversation + * @param convId - The conversation ID + * @returns Array of messages + */ + async getConversationMessages(convId: string): Promise { + return await DatabaseService.getConversationMessages(convId); + } + + /** + * Adds a message to the active messages array + * Used by chatStore when creating new messages + * @param message - The message to add + */ + addMessageToActive(message: DatabaseMessage): void { + this.activeMessages.push(message); + } + + /** + * Updates a message at a specific index in active messages + * Creates a new object to trigger Svelte 5 reactivity + * @param index - The index of the message to update + * @param updates - Partial message data to update + */ + updateMessageAtIndex(index: number, updates: Partial): void { + if (index !== -1 && this.activeMessages[index]) { + // Create new object to trigger Svelte 5 reactivity + this.activeMessages[index] = { ...this.activeMessages[index], ...updates }; + } + } + + /** + * Finds the index of a message in active messages + * @param messageId - The message ID to find + * @returns The index of the message, or -1 if not found + */ + findMessageIndex(messageId: string): number { + return this.activeMessages.findIndex((m) => m.id === messageId); + } + + /** + * Removes messages from active messages starting at an index + * @param startIndex - The index to start removing from + */ + sliceActiveMessages(startIndex: number): void { + this.activeMessages = this.activeMessages.slice(0, startIndex); + } + + /** + * Removes a message from active messages by index + * @param index - The index to remove + * @returns The removed message or undefined + */ + removeMessageAtIndex(index: number): DatabaseMessage | undefined { + if (index !== -1) { + return this.activeMessages.splice(index, 1)[0]; + } + return undefined; + } + + /** + * Triggers file download in browser + * @param data - The data to download + * @param filename - Optional filename for the download + */ + private triggerDownload(data: ExportedConversations, filename?: string): void { + const conversation = + 'conv' in data ? data.conv : Array.isArray(data) ? data[0]?.conv : undefined; + + if (!conversation) { + console.error('Invalid data: missing conversation'); + return; + } + + const conversationName = conversation.name?.trim() || ''; + const truncatedSuffix = conversationName + .toLowerCase() + .replace(/[^a-z0-9]/gi, '_') + .replace(/_+/g, '_') + .substring(0, 20); + const downloadFilename = filename || `conversation_${conversation.id}_${truncatedSuffix}.json`; + + const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = downloadFilename; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Sets the callback function for title update confirmations + * @param callback - Function to call when confirmation is needed + */ + setTitleUpdateConfirmationCallback( + callback: (currentTitle: string, newTitle: string) => Promise + ): void { + this.titleUpdateConfirmationCallback = callback; + } +} + +export const conversationsStore = new ConversationsStore(); + +export const conversations = () => conversationsStore.conversations; +export const activeConversation = () => conversationsStore.activeConversation; +export const activeMessages = () => conversationsStore.activeMessages; +export const isConversationsInitialized = () => conversationsStore.isInitialized; +export const usedModalities = () => conversationsStore.usedModalities; diff --git a/tools/server/webui/src/lib/stores/models.svelte.ts b/tools/server/webui/src/lib/stores/models.svelte.ts index bcb68826ce8..2e834af5a00 100644 --- a/tools/server/webui/src/lib/stores/models.svelte.ts +++ b/tools/server/webui/src/lib/stores/models.svelte.ts @@ -1,76 +1,221 @@ +import { SvelteSet } from 'svelte/reactivity'; import { ModelsService } from '$lib/services/models'; -import { persisted } from '$lib/stores/persisted.svelte'; -import { SELECTED_MODEL_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; -import type { ModelOption } from '$lib/types/models'; +import { PropsService } from '$lib/services/props'; +import { ServerModelStatus, ModelModality } from '$lib/enums'; +import { serverStore } from '$lib/stores/server.svelte'; + +/** + * modelsStore - Reactive store for model management in both MODEL and ROUTER modes + * + * This store manages: + * - Available models list + * - Selected model for new conversations + * - Loaded models tracking (ROUTER mode) + * - Model usage tracking per conversation + * - Automatic unloading of unused models + * + * **Architecture & Relationships:** + * - **ModelsService**: Stateless service for model API communication + * - **PropsService**: Stateless service for props/modalities fetching + * - **modelsStore** (this class): Reactive store for model state + * - **conversationsStore**: Tracks which conversations use which models + * + * **API Inconsistency Workaround:** + * In MODEL mode, `/props` returns modalities for the single model. + * In ROUTER mode, `/props` has no modalities - must use `/props?model=` per model. + * This store normalizes this behavior so consumers don't need to know the server mode. + * + * **Key Features:** + * - **MODEL mode**: Single model, always loaded + * - **ROUTER mode**: Multi-model with load/unload capability + * - **Auto-unload**: Automatically unloads models not used by any conversation + * - **Lazy loading**: ensureModelLoaded() loads models on demand + */ +class ModelsStore { + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── -type PersistedModelSelection = { - id: string; - model: string; -}; + models = $state([]); + routerModels = $state([]); + loading = $state(false); + updating = $state(false); + error = $state(null); + selectedModelId = $state(null); + selectedModelName = $state(null); -class ModelsStore { - private _models = $state([]); - private _loading = $state(false); - private _updating = $state(false); - private _error = $state(null); - private _selectedModelId = $state(null); - private _selectedModelName = $state(null); - private _persistedSelection = persisted( - SELECTED_MODEL_LOCALSTORAGE_KEY, - null - ); - - constructor() { - const persisted = this._persistedSelection.value; - if (persisted) { - this._selectedModelId = persisted.id; - this._selectedModelName = persisted.model; + private modelUsage = $state>>(new Map()); + private modelLoadingStates = $state>(new Map()); + + /** + * Model-specific props cache + * Key: modelId, Value: props data including modalities + */ + private modelPropsCache = $state>(new Map()); + private modelPropsFetching = $state>(new Set()); + + /** + * Version counter for props cache - used to trigger reactivity when props are updated + */ + propsCacheVersion = $state(0); + + // ───────────────────────────────────────────────────────────────────────────── + // Computed Getters + // ───────────────────────────────────────────────────────────────────────────── + + get selectedModel(): ModelOption | null { + if (!this.selectedModelId) return null; + return this.models.find((model) => model.id === this.selectedModelId) ?? null; + } + + get loadedModelIds(): string[] { + return this.routerModels + .filter((m) => m.status.value === ServerModelStatus.LOADED) + .map((m) => m.id); + } + + get loadingModelIds(): string[] { + return Array.from(this.modelLoadingStates.entries()) + .filter(([, loading]) => loading) + .map(([id]) => id); + } + + /** + * Get model name in MODEL mode (single model). + * Extracts from model_path or model_alias from server props. + * In ROUTER mode, returns null (model is per-conversation). + */ + get singleModelName(): string | null { + if (serverStore.isRouterMode) return null; + + const props = serverStore.props; + if (props?.model_alias) return props.model_alias; + if (!props?.model_path) return null; + + return props.model_path.split(/(\\|\/)/).pop() || null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Modalities + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get modalities for a specific model + * Returns cached modalities from model props + */ + getModelModalities(modelId: string): ModelModalities | null { + // First check if modalities are stored in the model option + const model = this.models.find((m) => m.model === modelId || m.id === modelId); + if (model?.modalities) { + return model.modalities; + } + + // Fall back to props cache + const props = this.modelPropsCache.get(modelId); + if (props?.modalities) { + return { + vision: props.modalities.vision ?? false, + audio: props.modalities.audio ?? false + }; } + + return null; } - get models(): ModelOption[] { - return this._models; + /** + * Check if a model supports vision modality + */ + modelSupportsVision(modelId: string): boolean { + return this.getModelModalities(modelId)?.vision ?? false; } - get loading(): boolean { - return this._loading; + /** + * Check if a model supports audio modality + */ + modelSupportsAudio(modelId: string): boolean { + return this.getModelModalities(modelId)?.audio ?? false; + } + + /** + * Get model modalities as an array of ModelModality enum values + */ + getModelModalitiesArray(modelId: string): ModelModality[] { + const modalities = this.getModelModalities(modelId); + if (!modalities) return []; + + const result: ModelModality[] = []; + + if (modalities.vision) result.push(ModelModality.VISION); + if (modalities.audio) result.push(ModelModality.AUDIO); + + return result; + } + + /** + * Get props for a specific model (from cache) + */ + getModelProps(modelId: string): ApiLlamaCppServerProps | null { + return this.modelPropsCache.get(modelId) ?? null; } - get updating(): boolean { - return this._updating; + /** + * Check if props are being fetched for a model + */ + isModelPropsFetching(modelId: string): boolean { + return this.modelPropsFetching.has(modelId); } - get error(): string | null { - return this._error; + // ───────────────────────────────────────────────────────────────────────────── + // Status Queries + // ───────────────────────────────────────────────────────────────────────────── + + isModelLoaded(modelId: string): boolean { + const model = this.routerModels.find((m) => m.id === modelId); + return model?.status.value === ServerModelStatus.LOADED || false; } - get selectedModelId(): string | null { - return this._selectedModelId; + isModelOperationInProgress(modelId: string): boolean { + return this.modelLoadingStates.get(modelId) ?? false; } - get selectedModelName(): string | null { - return this._selectedModelName; + getModelStatus(modelId: string): ServerModelStatus | null { + const model = this.routerModels.find((m) => m.id === modelId); + return model?.status.value ?? null; } - get selectedModel(): ModelOption | null { - if (!this._selectedModelId) { - return null; - } + getModelUsage(modelId: string): SvelteSet { + return this.modelUsage.get(modelId) ?? new SvelteSet(); + } - return this._models.find((model) => model.id === this._selectedModelId) ?? null; + isModelInUse(modelId: string): boolean { + const usage = this.modelUsage.get(modelId); + return usage !== undefined && usage.size > 0; } + // ───────────────────────────────────────────────────────────────────────────── + // Data Fetching + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Fetch list of models from server and detect server role + * Also fetches modalities for MODEL mode (single model) + */ async fetch(force = false): Promise { - if (this._loading) return; - if (this._models.length > 0 && !force) return; + if (this.loading) return; + if (this.models.length > 0 && !force) return; - this._loading = true; - this._error = null; + this.loading = true; + this.error = null; try { + // Ensure server props are loaded (for role detection and MODEL mode modalities) + if (!serverStore.props) { + await serverStore.fetch(); + } + const response = await ModelsService.list(); - const models: ModelOption[] = response.data.map((item, index) => { + const models: ModelOption[] = response.data.map((item: ApiModelDataEntry, index: number) => { const details = response.models?.[index]; const rawCapabilities = Array.isArray(details?.capabilities) ? details?.capabilities : []; const displayNameSource = @@ -82,106 +227,355 @@ class ModelsStore { name: displayName, model: details?.model || item.id, description: details?.description, - capabilities: rawCapabilities.filter((value): value is string => Boolean(value)), + capabilities: rawCapabilities.filter((value: unknown): value is string => Boolean(value)), details: details?.details, meta: item.meta ?? null } satisfies ModelOption; }); - this._models = models; - - const selection = this.determineInitialSelection(models); - - this._selectedModelId = selection.id; - this._selectedModelName = selection.model; - this._persistedSelection.value = - selection.id && selection.model ? { id: selection.id, model: selection.model } : null; + this.models = models; + + // In MODEL mode, populate modalities from serverStore.props (single model) + // WORKAROUND: In MODEL mode, /props returns modalities for the single model, + // but /v1/models doesn't include modalities. We bridge this gap here. + const serverProps = serverStore.props; + if (serverStore.isModelMode && this.models.length > 0 && serverProps?.modalities) { + const modalities: ModelModalities = { + vision: serverProps.modalities.vision ?? false, + audio: serverProps.modalities.audio ?? false + }; + // Cache props for the single model + this.modelPropsCache.set(this.models[0].model, serverProps); + // Update model with modalities + this.models = this.models.map((model, index) => + index === 0 ? { ...model, modalities } : model + ); + } } catch (error) { - this._models = []; - this._error = error instanceof Error ? error.message : 'Failed to load models'; - + this.models = []; + this.error = error instanceof Error ? error.message : 'Failed to load models'; throw error; } finally { - this._loading = false; + this.loading = false; } } - async select(modelId: string): Promise { - if (!modelId || this._updating) { - return; + /** + * Fetch router models with full metadata (ROUTER mode only) + * This fetches the /models endpoint which returns status info for each model + */ + async fetchRouterModels(): Promise { + try { + const response = await ModelsService.listRouter(); + this.routerModels = response.data; + await this.fetchModalitiesForLoadedModels(); + } catch (error) { + console.warn('Failed to fetch router models:', error); + this.routerModels = []; } + } - if (this._selectedModelId === modelId) { - return; + /** + * Fetch props for a specific model from /props endpoint + * Uses caching to avoid redundant requests + * + * @param modelId - Model identifier to fetch props for + * @returns Props data or null if fetch failed + */ + async fetchModelProps(modelId: string): Promise { + // Return cached props if available + const cached = this.modelPropsCache.get(modelId); + if (cached) return cached; + + // Avoid duplicate fetches + if (this.modelPropsFetching.has(modelId)) return null; + + this.modelPropsFetching.add(modelId); + + try { + const props = await PropsService.fetchForModel(modelId); + this.modelPropsCache.set(modelId, props); + return props; + } catch (error) { + console.warn(`Failed to fetch props for model ${modelId}:`, error); + return null; + } finally { + this.modelPropsFetching.delete(modelId); + } + } + + /** + * Fetch modalities for all loaded models from /props endpoint + * This updates the modalities field in models array + */ + async fetchModalitiesForLoadedModels(): Promise { + const loadedModelIds = this.loadedModelIds; + if (loadedModelIds.length === 0) return; + + // Fetch props for each loaded model in parallel + const propsPromises = loadedModelIds.map((modelId) => this.fetchModelProps(modelId)); + + try { + const results = await Promise.all(propsPromises); + + // Update models with modalities + this.models = this.models.map((model) => { + const modelIndex = loadedModelIds.indexOf(model.model); + if (modelIndex === -1) return model; + + const props = results[modelIndex]; + if (!props?.modalities) return model; + + const modalities: ModelModalities = { + vision: props.modalities.vision ?? false, + audio: props.modalities.audio ?? false + }; + + return { ...model, modalities }; + }); + + // Increment version to trigger reactivity + this.propsCacheVersion++; + } catch (error) { + console.warn('Failed to fetch modalities for loaded models:', error); } + } + + /** + * Update modalities for a specific model + * Called when a model is loaded or when we need fresh modality data + */ + async updateModelModalities(modelId: string): Promise { + try { + const props = await this.fetchModelProps(modelId); + if (!props?.modalities) return; + + const modalities: ModelModalities = { + vision: props.modalities.vision ?? false, + audio: props.modalities.audio ?? false + }; - const option = this._models.find((model) => model.id === modelId); - if (!option) { - throw new Error('Selected model is not available'); + this.models = this.models.map((model) => + model.model === modelId ? { ...model, modalities } : model + ); + + // Increment version to trigger reactivity + this.propsCacheVersion++; + } catch (error) { + console.warn(`Failed to update modalities for model ${modelId}:`, error); } + } - this._updating = true; - this._error = null; + // ───────────────────────────────────────────────────────────────────────────── + // Model Selection + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Select a model for new conversations + */ + async selectModelById(modelId: string): Promise { + if (!modelId || this.updating) return; + if (this.selectedModelId === modelId) return; + + const option = this.models.find((model) => model.id === modelId); + if (!option) throw new Error('Selected model is not available'); + + this.updating = true; + this.error = null; try { - this._selectedModelId = option.id; - this._selectedModelName = option.model; - this._persistedSelection.value = { id: option.id, model: option.model }; + this.selectedModelId = option.id; + this.selectedModelName = option.model; } finally { - this._updating = false; + this.updating = false; } } - private toDisplayName(id: string): string { - const segments = id.split(/\\|\//); - const candidate = segments.pop(); + /** + * Select a model by its model name (used for syncing with conversation model) + * @param modelName - Model name to select (e.g., "unsloth/gemma-3-12b-it-GGUF:latest") + */ + selectModelByName(modelName: string): void { + const option = this.models.find((model) => model.model === modelName); + if (option) { + this.selectedModelId = option.id; + this.selectedModelName = option.model; + } + } - return candidate && candidate.trim().length > 0 ? candidate : id; + clearSelection(): void { + this.selectedModelId = null; + this.selectedModelName = null; + } + + findModelByName(modelName: string): ModelOption | null { + return this.models.find((model) => model.model === modelName) ?? null; + } + + findModelById(modelId: string): ModelOption | null { + return this.models.find((model) => model.id === modelId) ?? null; + } + + hasModel(modelName: string): boolean { + return this.models.some((model) => model.model === modelName); } + // ───────────────────────────────────────────────────────────────────────────── + // Loading/Unloading Models + // ───────────────────────────────────────────────────────────────────────────── + /** - * Determines which model should be selected after fetching the models list. - * Priority: current selection > persisted selection > first available model > none + * WORKAROUND: Polling for model status after load/unload operations. + * + * Currently, the `/models/load` and `/models/unload` endpoints return success + * before the operation actually completes on the server. This means an immediate + * request to `/models` returns stale status (e.g., "loading" after load request, + * "loaded" after unload request). + * + * TODO: Remove this polling once llama-server properly waits for the operation + * to complete before returning success from `/load` and `/unload` endpoints. + * At that point, a single `fetchRouterModels()` call after the operation will + * be sufficient to get the correct status. */ - private determineInitialSelection(models: ModelOption[]): { - id: string | null; - model: string | null; - } { - const persisted = this._persistedSelection.value; - let nextSelectionId = this._selectedModelId ?? persisted?.id ?? null; - let nextSelectionName = this._selectedModelName ?? persisted?.model ?? null; - if (nextSelectionId) { - const match = models.find((m) => m.id === nextSelectionId); + /** Polling interval in ms for checking model status */ + private static readonly STATUS_POLL_INTERVAL = 500; + /** Maximum polling attempts before giving up */ + private static readonly STATUS_POLL_MAX_ATTEMPTS = 60; // 30 seconds max - if (match) { - nextSelectionId = match.id; - nextSelectionName = match.model; - } else if (models[0]) { - nextSelectionId = models[0].id; - nextSelectionName = models[0].model; - } else { - nextSelectionId = null; - nextSelectionName = null; + /** + * Poll for expected model status after load/unload operation. + * Keeps polling until the model reaches the expected status or max attempts reached. + * + * @param modelId - Model identifier to check + * @param expectedStatus - Expected status to wait for + * @returns Promise that resolves when expected status is reached + */ + private async pollForModelStatus( + modelId: string, + expectedStatus: ServerModelStatus + ): Promise { + for (let attempt = 0; attempt < ModelsStore.STATUS_POLL_MAX_ATTEMPTS; attempt++) { + await this.fetchRouterModels(); + + const currentStatus = this.getModelStatus(modelId); + if (currentStatus === expectedStatus) { + return; } - } else if (models[0]) { - nextSelectionId = models[0].id; - nextSelectionName = models[0].model; + + // Wait before next poll + await new Promise((resolve) => setTimeout(resolve, ModelsStore.STATUS_POLL_INTERVAL)); + } + + console.warn( + `Model ${modelId} did not reach expected status ${expectedStatus} after ${ModelsStore.STATUS_POLL_MAX_ATTEMPTS} attempts` + ); + } + + /** + * Load a model (ROUTER mode) + * @param modelId - Model identifier to load + */ + async loadModel(modelId: string): Promise { + if (this.isModelLoaded(modelId)) { + return; } - return { id: nextSelectionId, model: nextSelectionName }; + if (this.modelLoadingStates.get(modelId)) return; + + this.modelLoadingStates.set(modelId, true); + this.error = null; + + try { + await ModelsService.load(modelId); + + // Poll until model is loaded + await this.pollForModelStatus(modelId, ServerModelStatus.LOADED); + + await this.updateModelModalities(modelId); + } catch (error) { + this.error = error instanceof Error ? error.message : 'Failed to load model'; + throw error; + } finally { + this.modelLoadingStates.set(modelId, false); + } + } + + /** + * Unload a model (ROUTER mode) + * @param modelId - Model identifier to unload + */ + async unloadModel(modelId: string): Promise { + if (!this.isModelLoaded(modelId)) { + return; + } + + if (this.modelLoadingStates.get(modelId)) return; + + this.modelLoadingStates.set(modelId, true); + this.error = null; + + try { + await ModelsService.unload(modelId); + + await this.pollForModelStatus(modelId, ServerModelStatus.UNLOADED); + } catch (error) { + this.error = error instanceof Error ? error.message : 'Failed to unload model'; + throw error; + } finally { + this.modelLoadingStates.set(modelId, false); + } + } + + /** + * Ensure a model is loaded before use + * @param modelId - Model identifier to ensure is loaded + */ + async ensureModelLoaded(modelId: string): Promise { + if (this.isModelLoaded(modelId)) { + return; + } + + await this.loadModel(modelId); + } + + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + private toDisplayName(id: string): string { + const segments = id.split(/\\|\//); + const candidate = segments.pop(); + + return candidate && candidate.trim().length > 0 ? candidate : id; + } + + clear(): void { + this.models = []; + this.routerModels = []; + this.loading = false; + this.updating = false; + this.error = null; + this.selectedModelId = null; + this.selectedModelName = null; + this.modelUsage.clear(); + this.modelLoadingStates.clear(); + this.modelPropsCache.clear(); + this.modelPropsFetching.clear(); } } export const modelsStore = new ModelsStore(); export const modelOptions = () => modelsStore.models; +export const routerModels = () => modelsStore.routerModels; export const modelsLoading = () => modelsStore.loading; export const modelsUpdating = () => modelsStore.updating; export const modelsError = () => modelsStore.error; export const selectedModelId = () => modelsStore.selectedModelId; export const selectedModelName = () => modelsStore.selectedModelName; export const selectedModelOption = () => modelsStore.selectedModel; - -export const fetchModels = modelsStore.fetch.bind(modelsStore); -export const selectModel = modelsStore.select.bind(modelsStore); +export const loadedModelIds = () => modelsStore.loadedModelIds; +export const loadingModelIds = () => modelsStore.loadingModelIds; +export const propsCacheVersion = () => modelsStore.propsCacheVersion; +export const singleModelName = () => modelsStore.singleModelName; diff --git a/tools/server/webui/src/lib/stores/server.svelte.ts b/tools/server/webui/src/lib/stores/server.svelte.ts index e95c0bcea2f..fd2d335bed3 100644 --- a/tools/server/webui/src/lib/stores/server.svelte.ts +++ b/tools/server/webui/src/lib/stores/server.svelte.ts @@ -1,331 +1,136 @@ -import { browser } from '$app/environment'; -import { SERVER_PROPS_LOCALSTORAGE_KEY } from '$lib/constants/localstorage-keys'; -import { ChatService } from '$lib/services/chat'; -import { config } from '$lib/stores/settings.svelte'; +import { PropsService } from '$lib/services/props'; +import { ServerRole } from '$lib/enums'; /** - * ServerStore - Server state management and capability detection + * serverStore - Server connection state, configuration, and role detection * - * This store manages communication with the llama.cpp server to retrieve and maintain - * server properties, model information, and capability detection. It provides reactive - * state for server connectivity, model capabilities, and endpoint availability. + * This store manages the server connection state and properties fetched from `/props`. + * It provides reactive state for server configuration and role detection. * * **Architecture & Relationships:** - * - **ServerStore** (this class): Server state and capability management - * - Fetches and caches server properties from `/props` endpoint - * - Detects model capabilities (vision, audio support) - * - Tests endpoint availability (slots endpoint) - * - Provides reactive server state for UI components - * - * - **ChatService**: Uses server properties for request validation - * - **SlotsService**: Depends on slots endpoint availability detection - * - **UI Components**: Subscribe to server state for capability-based rendering + * - **PropsService**: Stateless service for fetching `/props` data + * - **serverStore** (this class): Reactive store for server state + * - **modelsStore**: Independent store for model management (uses PropsService directly) * * **Key Features:** - * - **Server Properties**: Model path, context size, build information - * - **Capability Detection**: Vision and audio modality support - * - **Endpoint Testing**: Slots endpoint availability checking - * - **Error Handling**: User-friendly error messages for connection issues - * - **Reactive State**: Svelte 5 runes for automatic UI updates - * - **State Management**: Loading states and error recovery - * - * **Server Capabilities Detected:** - * - Model name extraction from file path - * - Vision support (multimodal image processing) - * - Audio support (speech processing) - * - Slots endpoint availability (for processing state monitoring) - * - Context window size and token limits + * - **Server State**: Connection status, loading, error handling + * - **Role Detection**: MODEL (single model) vs ROUTER (multi-model) + * - **Default Params**: Server-wide generation defaults */ - class ServerStore { - constructor() { - if (!browser) return; - - const cachedProps = this.readCachedServerProps(); - if (cachedProps) { - this._serverProps = cachedProps; - } - } - - private _serverProps = $state(null); - private _loading = $state(false); - private _error = $state(null); - private _serverWarning = $state(null); - private _slotsEndpointAvailable = $state(null); - private fetchServerPropsPromise: Promise | null = null; - - private readCachedServerProps(): ApiLlamaCppServerProps | null { - if (!browser) return null; + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── - try { - const raw = localStorage.getItem(SERVER_PROPS_LOCALSTORAGE_KEY); - if (!raw) return null; - - return JSON.parse(raw) as ApiLlamaCppServerProps; - } catch (error) { - console.warn('Failed to read cached server props from localStorage:', error); - return null; - } - } - - private persistServerProps(props: ApiLlamaCppServerProps | null): void { - if (!browser) return; - - try { - if (props) { - localStorage.setItem(SERVER_PROPS_LOCALSTORAGE_KEY, JSON.stringify(props)); - } else { - localStorage.removeItem(SERVER_PROPS_LOCALSTORAGE_KEY); - } - } catch (error) { - console.warn('Failed to persist server props to localStorage:', error); - } - } - - get serverProps(): ApiLlamaCppServerProps | null { - return this._serverProps; - } - - get loading(): boolean { - return this._loading; - } - - get error(): string | null { - return this._error; - } - - get serverWarning(): string | null { - return this._serverWarning; - } - - get modelName(): string | null { - if (this._serverProps?.model_alias) { - return this._serverProps.model_alias; - } - if (!this._serverProps?.model_path) return null; - return this._serverProps.model_path.split(/(\\|\/)/).pop() || null; - } - - get supportedModalities(): string[] { - const modalities: string[] = []; - if (this._serverProps?.modalities?.audio) { - modalities.push('audio'); - } - if (this._serverProps?.modalities?.vision) { - modalities.push('vision'); - } - return modalities; - } + props = $state(null); + loading = $state(false); + error = $state(null); + role = $state(null); + private fetchPromise: Promise | null = null; - get supportsVision(): boolean { - return this._serverProps?.modalities?.vision ?? false; - } + // ───────────────────────────────────────────────────────────────────────────── + // Getters + // ───────────────────────────────────────────────────────────────────────────── - get supportsAudio(): boolean { - return this._serverProps?.modalities?.audio ?? false; + get defaultParams(): ApiLlamaCppServerProps['default_generation_settings']['params'] | null { + return this.props?.default_generation_settings?.params || null; } - get slotsEndpointAvailable(): boolean | null { - return this._slotsEndpointAvailable; + get contextSize(): number | null { + return this.props?.default_generation_settings?.n_ctx ?? null; } - get serverDefaultParams(): - | ApiLlamaCppServerProps['default_generation_settings']['params'] - | null { - return this._serverProps?.default_generation_settings?.params || null; + get isRouterMode(): boolean { + return this.role === ServerRole.ROUTER; } - /** - * Check if slots endpoint is available based on server properties and endpoint support - */ - private async checkSlotsEndpointAvailability(): Promise { - if (!this._serverProps) { - this._slotsEndpointAvailable = false; - return; - } - - if (this._serverProps.total_slots <= 0) { - this._slotsEndpointAvailable = false; - return; - } - - try { - const currentConfig = config(); - const apiKey = currentConfig.apiKey?.toString().trim(); - - const response = await fetch(`./slots`, { - headers: { - ...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}) - } - }); - - if (response.status === 501) { - console.info('Slots endpoint not implemented - server started without --slots flag'); - this._slotsEndpointAvailable = false; - return; - } - - this._slotsEndpointAvailable = true; - } catch (error) { - console.warn('Unable to test slots endpoint availability:', error); - this._slotsEndpointAvailable = false; - } + get isModelMode(): boolean { + return this.role === ServerRole.MODEL; } - /** - * Fetches server properties from the server - */ - async fetchServerProps(options: { silent?: boolean } = {}): Promise { - const { silent = false } = options; - const isSilent = silent && this._serverProps !== null; - - if (this.fetchServerPropsPromise) { - return this.fetchServerPropsPromise; - } + // ───────────────────────────────────────────────────────────────────────────── + // Data Handling + // ───────────────────────────────────────────────────────────────────────────── - if (!isSilent) { - this._loading = true; - this._error = null; - this._serverWarning = null; - } + async fetch(): Promise { + if (this.fetchPromise) return this.fetchPromise; - const hadProps = this._serverProps !== null; + this.loading = true; + this.error = null; const fetchPromise = (async () => { try { - const props = await ChatService.getServerProps(); - this._serverProps = props; - this.persistServerProps(props); - this._error = null; - this._serverWarning = null; - await this.checkSlotsEndpointAvailability(); + const props = await PropsService.fetch(); + this.props = props; + this.error = null; + this.detectRole(props); } catch (error) { - if (isSilent && hadProps) { - console.warn('Silent server props refresh failed, keeping cached data:', error); - return; - } - - this.handleFetchServerPropsError(error, hadProps); + this.error = this.getErrorMessage(error); + console.error('Error fetching server properties:', error); } finally { - if (!isSilent) { - this._loading = false; - } - - this.fetchServerPropsPromise = null; + this.loading = false; + this.fetchPromise = null; } })(); - this.fetchServerPropsPromise = fetchPromise; - + this.fetchPromise = fetchPromise; await fetchPromise; } - /** - * Handles fetch failures by attempting to recover cached server props and - * updating the user-facing error or warning state appropriately. - */ - private handleFetchServerPropsError(error: unknown, hadProps: boolean): void { - const { errorMessage, isOfflineLikeError, isServerSideError } = this.normalizeFetchError(error); - - let cachedProps: ApiLlamaCppServerProps | null = null; - - if (!hadProps) { - cachedProps = this.readCachedServerProps(); - - if (cachedProps) { - this._serverProps = cachedProps; - this._error = null; - - if (isOfflineLikeError || isServerSideError) { - this._serverWarning = errorMessage; - } - - console.warn( - 'Failed to refresh server properties, using cached values from localStorage:', - errorMessage - ); - } else { - this._error = errorMessage; - } - } else { - this._error = null; - - if (isOfflineLikeError || isServerSideError) { - this._serverWarning = errorMessage; - } - - console.warn( - 'Failed to refresh server properties, continuing with cached values:', - errorMessage - ); - } - - console.error('Error fetching server properties:', error); - } - - private normalizeFetchError(error: unknown): { - errorMessage: string; - isOfflineLikeError: boolean; - isServerSideError: boolean; - } { - let errorMessage = 'Failed to connect to server'; - let isOfflineLikeError = false; - let isServerSideError = false; - + private getErrorMessage(error: unknown): string { if (error instanceof Error) { const message = error.message || ''; if (error.name === 'TypeError' && message.includes('fetch')) { - errorMessage = 'Server is not running or unreachable'; - isOfflineLikeError = true; + return 'Server is not running or unreachable'; } else if (message.includes('ECONNREFUSED')) { - errorMessage = 'Connection refused - server may be offline'; - isOfflineLikeError = true; + return 'Connection refused - server may be offline'; } else if (message.includes('ENOTFOUND')) { - errorMessage = 'Server not found - check server address'; - isOfflineLikeError = true; + return 'Server not found - check server address'; } else if (message.includes('ETIMEDOUT')) { - errorMessage = 'Request timed out - the server took too long to respond'; - isOfflineLikeError = true; + return 'Request timed out'; } else if (message.includes('503')) { - errorMessage = 'Server temporarily unavailable - try again shortly'; - isServerSideError = true; + return 'Server temporarily unavailable'; } else if (message.includes('500')) { - errorMessage = 'Server error - check server logs'; - isServerSideError = true; + return 'Server error - check server logs'; } else if (message.includes('404')) { - errorMessage = 'Server endpoint not found'; + return 'Server endpoint not found'; } else if (message.includes('403') || message.includes('401')) { - errorMessage = 'Access denied'; + return 'Access denied'; } } - return { errorMessage, isOfflineLikeError, isServerSideError }; + return 'Failed to connect to server'; } - /** - * Clears the server state - */ clear(): void { - this._serverProps = null; - this._error = null; - this._serverWarning = null; - this._loading = false; - this._slotsEndpointAvailable = null; - this.fetchServerPropsPromise = null; - this.persistServerProps(null); + this.props = null; + this.error = null; + this.loading = false; + this.role = null; + this.fetchPromise = null; + } + + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + private detectRole(props: ApiLlamaCppServerProps): void { + const newRole = props?.role === ServerRole.ROUTER ? ServerRole.ROUTER : ServerRole.MODEL; + if (this.role !== newRole) { + this.role = newRole; + console.info(`Server running in ${newRole === ServerRole.ROUTER ? 'ROUTER' : 'MODEL'} mode`); + } } } export const serverStore = new ServerStore(); -export const serverProps = () => serverStore.serverProps; +export const serverProps = () => serverStore.props; export const serverLoading = () => serverStore.loading; export const serverError = () => serverStore.error; -export const serverWarning = () => serverStore.serverWarning; -export const modelName = () => serverStore.modelName; -export const supportedModalities = () => serverStore.supportedModalities; -export const supportsVision = () => serverStore.supportsVision; -export const supportsAudio = () => serverStore.supportsAudio; -export const slotsEndpointAvailable = () => serverStore.slotsEndpointAvailable; -export const serverDefaultParams = () => serverStore.serverDefaultParams; +export const serverRole = () => serverStore.role; +export const defaultParams = () => serverStore.defaultParams; +export const contextSize = () => serverStore.contextSize; +export const isRouterMode = () => serverStore.isRouterMode; +export const isModelMode = () => serverStore.isModelMode; diff --git a/tools/server/webui/src/lib/stores/settings.svelte.ts b/tools/server/webui/src/lib/stores/settings.svelte.ts index b10f0dd3a41..5140995eea4 100644 --- a/tools/server/webui/src/lib/stores/settings.svelte.ts +++ b/tools/server/webui/src/lib/stores/settings.svelte.ts @@ -1,12 +1,12 @@ /** - * SettingsStore - Application configuration and theme management + * settingsStore - Application configuration and theme management * * This store manages all application settings including AI model parameters, UI preferences, * and theme configuration. It provides persistent storage through localStorage with reactive * state management using Svelte 5 runes. * * **Architecture & Relationships:** - * - **SettingsStore** (this class): Configuration state management + * - **settingsStore** (this class): Configuration state management * - Manages AI model parameters (temperature, max tokens, etc.) * - Handles theme switching and persistence * - Provides localStorage synchronization @@ -33,23 +33,39 @@ import { browser } from '$app/environment'; import { SETTING_CONFIG_DEFAULT } from '$lib/constants/settings-config'; -import { normalizeFloatingPoint } from '$lib/utils/precision'; import { ParameterSyncService } from '$lib/services/parameter-sync'; import { serverStore } from '$lib/stores/server.svelte'; -import { setConfigValue, getConfigValue, configToParameterRecord } from '$lib/utils/config-helpers'; +import { + configToParameterRecord, + normalizeFloatingPoint, + getConfigValue, + setConfigValue +} from '$lib/utils'; +import { + CONFIG_LOCALSTORAGE_KEY, + USER_OVERRIDES_LOCALSTORAGE_KEY +} from '$lib/constants/localstorage-keys'; class SettingsStore { + // ───────────────────────────────────────────────────────────────────────────── + // State + // ───────────────────────────────────────────────────────────────────────────── + config = $state({ ...SETTING_CONFIG_DEFAULT }); theme = $state('auto'); isInitialized = $state(false); userOverrides = $state>(new Set()); + // ───────────────────────────────────────────────────────────────────────────── + // Utilities (private helpers) + // ───────────────────────────────────────────────────────────────────────────── + /** * Helper method to get server defaults with null safety * Centralizes the pattern of getting and extracting server defaults */ private getServerDefaults(): Record { - const serverParams = serverStore.serverDefaultParams; + const serverParams = serverStore.defaultParams; return serverParams ? ParameterSyncService.extractServerDefaults(serverParams) : {}; } @@ -59,6 +75,10 @@ class SettingsStore { } } + // ───────────────────────────────────────────────────────────────────────────── + // Lifecycle + // ───────────────────────────────────────────────────────────────────────────── + /** * Initialize the settings store by loading from localStorage */ @@ -80,7 +100,7 @@ class SettingsStore { if (!browser) return; try { - const storedConfigRaw = localStorage.getItem('config'); + const storedConfigRaw = localStorage.getItem(CONFIG_LOCALSTORAGE_KEY); const savedVal = JSON.parse(storedConfigRaw || '{}'); // Merge with defaults to prevent breaking changes @@ -90,7 +110,9 @@ class SettingsStore { }; // Load user overrides - const savedOverrides = JSON.parse(localStorage.getItem('userOverrides') || '[]'); + const savedOverrides = JSON.parse( + localStorage.getItem(USER_OVERRIDES_LOCALSTORAGE_KEY) || '[]' + ); this.userOverrides = new Set(savedOverrides); } catch (error) { console.warn('Failed to parse config from localStorage, using defaults:', error); @@ -107,6 +129,10 @@ class SettingsStore { this.theme = localStorage.getItem('theme') || 'auto'; } + // ───────────────────────────────────────────────────────────────────────────── + // Config Updates + // ───────────────────────────────────────────────────────────────────────────── + /** * Update a specific configuration setting * @param key - The configuration key to update @@ -170,9 +196,12 @@ class SettingsStore { if (!browser) return; try { - localStorage.setItem('config', JSON.stringify(this.config)); + localStorage.setItem(CONFIG_LOCALSTORAGE_KEY, JSON.stringify(this.config)); - localStorage.setItem('userOverrides', JSON.stringify(Array.from(this.userOverrides))); + localStorage.setItem( + USER_OVERRIDES_LOCALSTORAGE_KEY, + JSON.stringify(Array.from(this.userOverrides)) + ); } catch (error) { console.error('Failed to save config to localStorage:', error); } @@ -204,6 +233,10 @@ class SettingsStore { } } + // ───────────────────────────────────────────────────────────────────────────── + // Reset + // ───────────────────────────────────────────────────────────────────────────── + /** * Reset configuration to defaults */ @@ -229,28 +262,38 @@ class SettingsStore { } /** - * Get a specific configuration value - * @param key - The configuration key to get - * @returns The configuration value + * Reset a parameter to server default (or webui default if no server default) */ - getConfig(key: K): SettingsConfigType[K] { - return this.config[key]; - } + resetParameterToServerDefault(key: string): void { + const serverDefaults = this.getServerDefaults(); - /** - * Get the entire configuration object - * @returns The complete configuration object - */ - getAllConfig(): SettingsConfigType { - return { ...this.config }; + if (serverDefaults[key] !== undefined) { + const value = normalizeFloatingPoint(serverDefaults[key]); + + this.config[key as keyof SettingsConfigType] = + value as SettingsConfigType[keyof SettingsConfigType]; + } else { + if (key in SETTING_CONFIG_DEFAULT) { + const defaultValue = getConfigValue(SETTING_CONFIG_DEFAULT, key); + + setConfigValue(this.config, key, defaultValue); + } + } + + this.userOverrides.delete(key); + this.saveConfig(); } + // ───────────────────────────────────────────────────────────────────────────── + // Server Sync + // ───────────────────────────────────────────────────────────────────────────── + /** * Initialize settings with props defaults when server properties are first loaded * This sets up the default values from /props endpoint */ syncWithServerDefaults(): void { - const serverParams = serverStore.serverDefaultParams; + const serverParams = serverStore.defaultParams; if (!serverParams) { console.warn('No server parameters available for initialization'); @@ -278,15 +321,6 @@ class SettingsStore { console.log('Current user overrides after sync:', Array.from(this.userOverrides)); } - /** - * Clear all user overrides (for debugging) - */ - clearAllUserOverrides(): void { - this.userOverrides.clear(); - this.saveConfig(); - console.log('Cleared all user overrides'); - } - /** * Reset all parameters to their default values (from props) * This is used by the "Reset to Default" functionality @@ -315,6 +349,27 @@ class SettingsStore { this.saveConfig(); } + // ───────────────────────────────────────────────────────────────────────────── + // Utilities + // ───────────────────────────────────────────────────────────────────────────── + + /** + * Get a specific configuration value + * @param key - The configuration key to get + * @returns The configuration value + */ + getConfig(key: K): SettingsConfigType[K] { + return this.config[key]; + } + + /** + * Get the entire configuration object + * @returns The complete configuration object + */ + getAllConfig(): SettingsConfigType { + return { ...this.config }; + } + /** * Get parameter information including source for a specific parameter */ @@ -330,29 +385,6 @@ class SettingsStore { ); } - /** - * Reset a parameter to server default (or webui default if no server default) - */ - resetParameterToServerDefault(key: string): void { - const serverDefaults = this.getServerDefaults(); - - if (serverDefaults[key] !== undefined) { - const value = normalizeFloatingPoint(serverDefaults[key]); - - this.config[key as keyof SettingsConfigType] = - value as SettingsConfigType[keyof SettingsConfigType]; - } else { - if (key in SETTING_CONFIG_DEFAULT) { - const defaultValue = getConfigValue(SETTING_CONFIG_DEFAULT, key); - - setConfigValue(this.config, key, defaultValue); - } - } - - this.userOverrides.delete(key); - this.saveConfig(); - } - /** * Get diff between current settings and server defaults */ @@ -367,30 +399,19 @@ class SettingsStore { return ParameterSyncService.createParameterDiff(configAsRecord, serverDefaults); } + + /** + * Clear all user overrides (for debugging) + */ + clearAllUserOverrides(): void { + this.userOverrides.clear(); + this.saveConfig(); + console.log('Cleared all user overrides'); + } } -// Create and export the settings store instance export const settingsStore = new SettingsStore(); -// Export reactive getters for easy access in components export const config = () => settingsStore.config; export const theme = () => settingsStore.theme; export const isInitialized = () => settingsStore.isInitialized; - -// Export bound methods for easy access -export const updateConfig = settingsStore.updateConfig.bind(settingsStore); -export const updateMultipleConfig = settingsStore.updateMultipleConfig.bind(settingsStore); -export const updateTheme = settingsStore.updateTheme.bind(settingsStore); -export const resetConfig = settingsStore.resetConfig.bind(settingsStore); -export const resetTheme = settingsStore.resetTheme.bind(settingsStore); -export const resetAll = settingsStore.resetAll.bind(settingsStore); -export const getConfig = settingsStore.getConfig.bind(settingsStore); -export const getAllConfig = settingsStore.getAllConfig.bind(settingsStore); -export const syncWithServerDefaults = settingsStore.syncWithServerDefaults.bind(settingsStore); -export const forceSyncWithServerDefaults = - settingsStore.forceSyncWithServerDefaults.bind(settingsStore); -export const getParameterInfo = settingsStore.getParameterInfo.bind(settingsStore); -export const resetParameterToServerDefault = - settingsStore.resetParameterToServerDefault.bind(settingsStore); -export const getParameterDiff = settingsStore.getParameterDiff.bind(settingsStore); -export const clearAllUserOverrides = settingsStore.clearAllUserOverrides.bind(settingsStore); diff --git a/tools/server/webui/src/lib/types/api.d.ts b/tools/server/webui/src/lib/types/api.d.ts index 1a8bc649899..4bc92b57bcd 100644 --- a/tools/server/webui/src/lib/types/api.d.ts +++ b/tools/server/webui/src/lib/types/api.d.ts @@ -1,3 +1,4 @@ +import type { ServerModelStatus, ServerRole } from '$lib/enums'; import type { ChatMessagePromptProgress } from './chat'; export interface ApiChatMessageContentPart { @@ -36,11 +37,38 @@ export interface ApiChatMessageData { timestamp?: number; } +/** + * Model status object from /models endpoint + */ +export interface ApiModelStatus { + /** Status value: loaded, unloaded, loading, failed */ + value: ServerModelStatus; + /** Command line arguments used when loading (only for loaded models) */ + args?: string[]; +} + +/** + * Model entry from /models endpoint (ROUTER mode) + * Based on actual API response structure + */ export interface ApiModelDataEntry { + /** Model identifier (e.g., "ggml-org/Qwen2.5-Omni-7B-GGUF:latest") */ id: string; + /** Model name (optional, usually same as id - not always returned by API) */ + name?: string; + /** Object type, always "model" */ object: string; - created: number; + /** Owner, usually "llamacpp" */ owned_by: string; + /** Creation timestamp */ + created: number; + /** Whether model files are in HuggingFace cache */ + in_cache: boolean; + /** Path to model manifest file */ + path: string; + /** Current status of the model */ + status: ApiModelStatus; + /** Legacy meta field (may be present in older responses) */ meta?: Record | null; } @@ -139,6 +167,7 @@ export interface ApiLlamaCppServerProps { }; total_slots: number; model_path: string; + role: ServerRole; modalities: { vision: boolean; audio: boolean; @@ -314,3 +343,81 @@ export interface ApiProcessingState { promptTokens?: number; cacheTokens?: number; } + +/** + * Router model metadata - extended from ApiModelDataEntry with additional router-specific fields + * @deprecated Use ApiModelDataEntry instead - the /models endpoint returns this structure directly + */ +export interface ApiRouterModelMeta { + /** Model identifier (e.g., "ggml-org/Qwen2.5-Omni-7B-GGUF:latest") */ + name: string; + /** Path to model file or manifest */ + path: string; + /** Optional path to multimodal projector */ + path_mmproj?: string; + /** Whether model is in HuggingFace cache */ + in_cache: boolean; + /** Port where model instance is running (0 if not loaded) */ + port?: number; + /** Current status of the model */ + status: ApiModelStatus; + /** Error message if status is FAILED */ + error?: string; +} + +/** + * Request to load a model + */ +export interface ApiRouterModelsLoadRequest { + model: string; +} + +/** + * Response from loading a model + */ +export interface ApiRouterModelsLoadResponse { + success: boolean; + error?: string; +} + +/** + * Request to check model status + */ +export interface ApiRouterModelsStatusRequest { + model: string; +} + +/** + * Response with model status + */ +export interface ApiRouterModelsStatusResponse { + model: string; + status: ModelStatus; + port?: number; + error?: string; +} + +/** + * Response with list of all models from /models endpoint + * Note: This is the same as ApiModelListResponse - the endpoint returns the same structure + * regardless of server mode (MODEL or ROUTER) + */ +export interface ApiRouterModelsListResponse { + object: string; + data: ApiModelDataEntry[]; +} + +/** + * Request to unload a model + */ +export interface ApiRouterModelsUnloadRequest { + model: string; +} + +/** + * Response from unloading a model + */ +export interface ApiRouterModelsUnloadResponse { + success: boolean; + error?: string; +} diff --git a/tools/server/webui/src/lib/types/chat.d.ts b/tools/server/webui/src/lib/types/chat.d.ts index ee3990b04b9..0eafb80cbfd 100644 --- a/tools/server/webui/src/lib/types/chat.d.ts +++ b/tools/server/webui/src/lib/types/chat.d.ts @@ -16,7 +16,6 @@ export interface ChatAttachmentDisplayItem { name: string; size?: number; preview?: string; - type: string; isImage: boolean; uploadedFile?: ChatUploadedFile; attachment?: DatabaseMessageExtra; @@ -29,7 +28,6 @@ export interface ChatAttachmentPreviewItem { attachment?: DatabaseMessageExtra; preview?: string; name?: string; - type?: string; size?: number; textContent?: string; } diff --git a/tools/server/webui/src/lib/types/database.d.ts b/tools/server/webui/src/lib/types/database.d.ts index 16debc6d67d..1a336e059cf 100644 --- a/tools/server/webui/src/lib/types/database.d.ts +++ b/tools/server/webui/src/lib/types/database.d.ts @@ -1,4 +1,5 @@ -import type { ChatMessageTimings } from './chat'; +import type { ChatMessageTimings, ChatRole, ChatMessageType } from '$lib/types/chat'; +import { AttachmentType } from '$lib/enums'; export interface DatabaseConversation { currNode: string | null; @@ -8,38 +9,39 @@ export interface DatabaseConversation { } export interface DatabaseMessageExtraAudioFile { - type: 'audioFile'; + type: AttachmentType.AUDIO; name: string; base64Data: string; mimeType: string; } export interface DatabaseMessageExtraImageFile { - type: 'imageFile'; + type: AttachmentType.IMAGE; name: string; base64Url: string; } -export interface DatabaseMessageExtraTextFile { - type: 'textFile'; +/** + * Legacy format from old webui - pasted content was stored as "context" type + * @deprecated Use DatabaseMessageExtraTextFile instead + */ +export interface DatabaseMessageExtraLegacyContext { + type: AttachmentType.LEGACY_CONTEXT; name: string; content: string; } export interface DatabaseMessageExtraPdfFile { - type: 'pdfFile'; + type: AttachmentType.PDF; + base64Data: string; name: string; content: string; // Text content extracted from PDF images?: string[]; // Optional: PDF pages as base64 images processedAsImages: boolean; // Whether PDF was processed as images } -/** - * Legacy format from old webui - pasted content was stored as "context" type - * @deprecated Use DatabaseMessageExtraTextFile instead - */ -export interface DatabaseMessageExtraLegacyContext { - type: 'context'; +export interface DatabaseMessageExtraTextFile { + type: AttachmentType.TEXT; name: string; content: string; } diff --git a/tools/server/webui/src/lib/types/index.ts b/tools/server/webui/src/lib/types/index.ts new file mode 100644 index 00000000000..2a21c6dcfaf --- /dev/null +++ b/tools/server/webui/src/lib/types/index.ts @@ -0,0 +1,70 @@ +/** + * Unified exports for all type definitions + * Import types from '$lib/types' for cleaner imports + */ + +// API types +export type { + ApiChatMessageContentPart, + ApiContextSizeError, + ApiErrorResponse, + ApiChatMessageData, + ApiModelStatus, + ApiModelDataEntry, + ApiModelDetails, + ApiModelListResponse, + ApiLlamaCppServerProps, + ApiChatCompletionRequest, + ApiChatCompletionToolCallFunctionDelta, + ApiChatCompletionToolCallDelta, + ApiChatCompletionToolCall, + ApiChatCompletionStreamChunk, + ApiChatCompletionResponse, + ApiSlotData, + ApiProcessingState, + ApiRouterModelMeta, + ApiRouterModelsLoadRequest, + ApiRouterModelsLoadResponse, + ApiRouterModelsStatusRequest, + ApiRouterModelsStatusResponse, + ApiRouterModelsListResponse, + ApiRouterModelsUnloadRequest, + ApiRouterModelsUnloadResponse +} from './api'; + +// Chat types +export type { + ChatMessageType, + ChatRole, + ChatUploadedFile, + ChatAttachmentDisplayItem, + ChatAttachmentPreviewItem, + ChatMessageSiblingInfo, + ChatMessagePromptProgress, + ChatMessageTimings +} from './chat'; + +// Database types +export type { + DatabaseConversation, + DatabaseMessageExtraAudioFile, + DatabaseMessageExtraImageFile, + DatabaseMessageExtraLegacyContext, + DatabaseMessageExtraPdfFile, + DatabaseMessageExtraTextFile, + DatabaseMessageExtra, + DatabaseMessage, + ExportedConversation, + ExportedConversations +} from './database'; + +// Model types +export type { ModelModalities, ModelOption } from './models'; + +// Settings types +export type { + SettingsConfigValue, + SettingsFieldConfig, + SettingsChatServiceOptions, + SettingsConfigType +} from './settings'; diff --git a/tools/server/webui/src/lib/types/models.d.ts b/tools/server/webui/src/lib/types/models.d.ts index 3b6bad5f0fe..ef44a2cb6d4 100644 --- a/tools/server/webui/src/lib/types/models.d.ts +++ b/tools/server/webui/src/lib/types/models.d.ts @@ -1,11 +1,21 @@ import type { ApiModelDataEntry, ApiModelDetails } from '$lib/types/api'; +/** + * Model modalities - vision and audio capabilities + */ +export interface ModelModalities { + vision: boolean; + audio: boolean; +} + export interface ModelOption { id: string; name: string; model: string; description?: string; capabilities: string[]; + /** Model modalities from /props endpoint */ + modalities?: ModelModalities; details?: ApiModelDetails['details']; meta?: ApiModelDataEntry['meta']; } diff --git a/tools/server/webui/src/lib/types/settings.d.ts b/tools/server/webui/src/lib/types/settings.d.ts index b47842b66e6..40de98b7084 100644 --- a/tools/server/webui/src/lib/types/settings.d.ts +++ b/tools/server/webui/src/lib/types/settings.d.ts @@ -14,6 +14,12 @@ export interface SettingsFieldConfig { export interface SettingsChatServiceOptions { stream?: boolean; + // Model (required in ROUTER mode, optional in MODEL mode) + model?: string; + // System message to inject + systemMessage?: string; + // Disable reasoning format (use 'none' instead of 'auto') + disableReasoningFormat?: boolean; // Generation parameters temperature?: number; max_tokens?: number; @@ -45,7 +51,7 @@ export interface SettingsChatServiceOptions { onReasoningChunk?: (chunk: string) => void; onToolCallChunk?: (chunk: string) => void; onModel?: (model: string) => void; - onFirstValidChunk?: () => void; + onTimings?: (timings: ChatMessageTimings, promptProgress?: ChatMessagePromptProgress) => void; onComplete?: ( response: string, reasoningContent?: string, diff --git a/tools/server/webui/src/lib/utils/api-headers.ts b/tools/server/webui/src/lib/utils/api-headers.ts new file mode 100644 index 00000000000..77ce3e88cb1 --- /dev/null +++ b/tools/server/webui/src/lib/utils/api-headers.ts @@ -0,0 +1,22 @@ +import { config } from '$lib/stores/settings.svelte'; + +/** + * Get authorization headers for API requests + * Includes Bearer token if API key is configured + */ +export function getAuthHeaders(): Record { + const currentConfig = config(); + const apiKey = currentConfig.apiKey?.toString().trim(); + + return apiKey ? { Authorization: `Bearer ${apiKey}` } : {}; +} + +/** + * Get standard JSON headers with optional authorization + */ +export function getJsonHeaders(): Record { + return { + 'Content-Type': 'application/json', + ...getAuthHeaders() + }; +} diff --git a/tools/server/webui/src/lib/utils/attachment-display.ts b/tools/server/webui/src/lib/utils/attachment-display.ts new file mode 100644 index 00000000000..750aaa38d73 --- /dev/null +++ b/tools/server/webui/src/lib/utils/attachment-display.ts @@ -0,0 +1,61 @@ +import { FileTypeCategory } from '$lib/enums'; +import { getFileTypeCategory, getFileTypeCategoryByExtension, isImageFile } from '$lib/utils'; + +export interface AttachmentDisplayItemsOptions { + uploadedFiles?: ChatUploadedFile[]; + attachments?: DatabaseMessageExtra[]; +} + +/** + * Gets the file type category from an uploaded file, checking both MIME type and extension + */ +function getUploadedFileCategory(file: ChatUploadedFile): FileTypeCategory | null { + const categoryByMime = getFileTypeCategory(file.type); + + if (categoryByMime) { + return categoryByMime; + } + + return getFileTypeCategoryByExtension(file.name); +} + +/** + * Creates a unified list of display items from uploaded files and stored attachments. + * Items are returned in reverse order (newest first). + */ +export function getAttachmentDisplayItems( + options: AttachmentDisplayItemsOptions +): ChatAttachmentDisplayItem[] { + const { uploadedFiles = [], attachments = [] } = options; + const items: ChatAttachmentDisplayItem[] = []; + + // Add uploaded files (ChatForm) + for (const file of uploadedFiles) { + items.push({ + id: file.id, + name: file.name, + size: file.size, + preview: file.preview, + isImage: getUploadedFileCategory(file) === FileTypeCategory.IMAGE, + uploadedFile: file, + textContent: file.textContent + }); + } + + // Add stored attachments (ChatMessage) + for (const [index, attachment] of attachments.entries()) { + const isImage = isImageFile(attachment); + + items.push({ + id: `attachment-${index}`, + name: attachment.name, + preview: isImage && 'base64Url' in attachment ? attachment.base64Url : undefined, + isImage, + attachment, + attachmentIndex: index, + textContent: 'content' in attachment ? attachment.content : undefined + }); + } + + return items.reverse(); +} diff --git a/tools/server/webui/src/lib/utils/attachment-type.ts b/tools/server/webui/src/lib/utils/attachment-type.ts new file mode 100644 index 00000000000..9e9f0960122 --- /dev/null +++ b/tools/server/webui/src/lib/utils/attachment-type.ts @@ -0,0 +1,105 @@ +import { AttachmentType, FileTypeCategory } from '$lib/enums'; +import { getFileTypeCategory, getFileTypeCategoryByExtension } from '$lib/utils'; + +/** + * Gets the file type category from an uploaded file, checking both MIME type and extension + * @param uploadedFile - The uploaded file to check + * @returns The file type category or null if not recognized + */ +function getUploadedFileCategory(uploadedFile: ChatUploadedFile): FileTypeCategory | null { + // First try MIME type + const categoryByMime = getFileTypeCategory(uploadedFile.type); + + if (categoryByMime) { + return categoryByMime; + } + + // Fallback to extension (browsers don't always provide correct MIME types) + return getFileTypeCategoryByExtension(uploadedFile.name); +} + +/** + * Determines if an attachment or uploaded file is a text file + * @param uploadedFile - Optional uploaded file + * @param attachment - Optional database attachment + * @returns true if the file is a text file + */ +export function isTextFile( + attachment?: DatabaseMessageExtra, + uploadedFile?: ChatUploadedFile +): boolean { + if (uploadedFile) { + return getUploadedFileCategory(uploadedFile) === FileTypeCategory.TEXT; + } + + if (attachment) { + return ( + attachment.type === AttachmentType.TEXT || attachment.type === AttachmentType.LEGACY_CONTEXT + ); + } + + return false; +} + +/** + * Determines if an attachment or uploaded file is an image + * @param uploadedFile - Optional uploaded file + * @param attachment - Optional database attachment + * @returns true if the file is an image + */ +export function isImageFile( + attachment?: DatabaseMessageExtra, + uploadedFile?: ChatUploadedFile +): boolean { + if (uploadedFile) { + return getUploadedFileCategory(uploadedFile) === FileTypeCategory.IMAGE; + } + + if (attachment) { + return attachment.type === AttachmentType.IMAGE; + } + + return false; +} + +/** + * Determines if an attachment or uploaded file is a PDF + * @param uploadedFile - Optional uploaded file + * @param attachment - Optional database attachment + * @returns true if the file is a PDF + */ +export function isPdfFile( + attachment?: DatabaseMessageExtra, + uploadedFile?: ChatUploadedFile +): boolean { + if (uploadedFile) { + return getUploadedFileCategory(uploadedFile) === FileTypeCategory.PDF; + } + + if (attachment) { + return attachment.type === AttachmentType.PDF; + } + + return false; +} + +/** + * Determines if an attachment or uploaded file is an audio file + * @param uploadedFile - Optional uploaded file + * @param attachment - Optional database attachment + * @returns true if the file is an audio file + */ +export function isAudioFile( + attachment?: DatabaseMessageExtra, + uploadedFile?: ChatUploadedFile +): boolean { + if (uploadedFile) { + return getUploadedFileCategory(uploadedFile) === FileTypeCategory.AUDIO; + } + + if (attachment) { + return attachment.type === AttachmentType.AUDIO; + } + + return false; +} diff --git a/tools/server/webui/src/lib/utils/audio-recording.ts b/tools/server/webui/src/lib/utils/audio-recording.ts index acf4c6d1fae..2a21985d1a6 100644 --- a/tools/server/webui/src/lib/utils/audio-recording.ts +++ b/tools/server/webui/src/lib/utils/audio-recording.ts @@ -1,4 +1,4 @@ -import { MimeTypeAudio } from '$lib/enums/files'; +import { MimeTypeAudio } from '$lib/enums'; /** * AudioRecorder - Browser-based audio recording with MediaRecorder API diff --git a/tools/server/webui/src/lib/utils/browser-only.ts b/tools/server/webui/src/lib/utils/browser-only.ts new file mode 100644 index 00000000000..0af800638ba --- /dev/null +++ b/tools/server/webui/src/lib/utils/browser-only.ts @@ -0,0 +1,35 @@ +/** + * Browser-only utility exports + * + * These utilities require browser APIs (DOM, Canvas, MediaRecorder, etc.) + * and cannot be imported during SSR. Import from '$lib/utils/browser-only' + * only in client-side code or components that are not server-rendered. + */ + +// Audio utilities (MediaRecorder API) +export { + AudioRecorder, + convertToWav, + createAudioFile, + isAudioRecordingSupported +} from './audio-recording'; + +// PDF processing utilities (pdfjs-dist with DOMMatrix) +export { + convertPDFToText, + convertPDFToImage, + isPdfFile as isPdfFileFromFile, + isApplicationMimeType +} from './pdf-processing'; + +// File conversion utilities (depends on pdf-processing) +export { parseFilesToMessageExtras, type FileProcessingResult } from './convert-files-to-extra'; + +// File upload processing utilities (depends on pdf-processing, svg-to-png, webp-to-png) +export { processFilesToChatUploaded } from './process-uploaded-files'; + +// SVG utilities (Canvas/Image API) +export { svgBase64UrlToPngDataURL, isSvgFile, isSvgMimeType } from './svg-to-png'; + +// WebP utilities (Canvas/Image API) +export { webpBase64UrlToPngDataURL, isWebpFile, isWebpMimeType } from './webp-to-png'; diff --git a/tools/server/webui/src/lib/utils/config-helpers.ts b/tools/server/webui/src/lib/utils/config-helpers.ts index 2d023f8d5c5..b85242d85db 100644 --- a/tools/server/webui/src/lib/utils/config-helpers.ts +++ b/tools/server/webui/src/lib/utils/config-helpers.ts @@ -5,8 +5,6 @@ * with dynamic keys while maintaining TypeScript type safety. */ -import type { SettingsConfigType } from '$lib/types/settings'; - /** * Type-safe helper to access config properties dynamically * Provides better type safety than direct casting to Record diff --git a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts index 70c6f772d99..6eb50f6dce4 100644 --- a/tools/server/webui/src/lib/utils/convert-files-to-extra.ts +++ b/tools/server/webui/src/lib/utils/convert-files-to-extra.ts @@ -1,10 +1,10 @@ import { convertPDFToImage, convertPDFToText } from './pdf-processing'; import { isSvgMimeType, svgBase64UrlToPngDataURL } from './svg-to-png'; import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png'; -import { FileTypeCategory } from '$lib/enums/files'; +import { FileTypeCategory, AttachmentType } from '$lib/enums'; import { config, settingsStore } from '$lib/stores/settings.svelte'; -import { supportsVision } from '$lib/stores/server.svelte'; -import { getFileTypeCategory } from '$lib/utils/file-type'; +import { modelsStore } from '$lib/stores/models.svelte'; +import { getFileTypeCategory } from '$lib/utils'; import { readFileAsText, isLikelyTextFile } from './text-files'; import { toast } from 'svelte-sonner'; @@ -31,7 +31,8 @@ export interface FileProcessingResult { } export async function parseFilesToMessageExtras( - files: ChatUploadedFile[] + files: ChatUploadedFile[], + activeModelId?: string ): Promise { const extras: DatabaseMessageExtra[] = []; const emptyFiles: string[] = []; @@ -56,7 +57,7 @@ export async function parseFilesToMessageExtras( } extras.push({ - type: 'imageFile', + type: AttachmentType.IMAGE, name: file.name, base64Url }); @@ -67,7 +68,7 @@ export async function parseFilesToMessageExtras( const base64Data = await readFileAsBase64(file.file); extras.push({ - type: 'audioFile', + type: AttachmentType.AUDIO, name: file.name, base64Data: base64Data, mimeType: file.type @@ -80,7 +81,10 @@ export async function parseFilesToMessageExtras( // Always get base64 data for preview functionality const base64Data = await readFileAsBase64(file.file); const currentConfig = config(); - const hasVisionSupport = supportsVision(); + // Use per-model vision check for router mode + const hasVisionSupport = activeModelId + ? modelsStore.modelSupportsVision(activeModelId) + : false; // Force PDF-to-text for non-vision models let shouldProcessAsImages = Boolean(currentConfig.pdfAsImage) && hasVisionSupport; @@ -117,7 +121,7 @@ export async function parseFilesToMessageExtras( ); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: `PDF file with ${images.length} pages`, images: images, @@ -134,7 +138,7 @@ export async function parseFilesToMessageExtras( const content = await convertPDFToText(file.file); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: content, processedAsImages: false, @@ -151,7 +155,7 @@ export async function parseFilesToMessageExtras( }); extras.push({ - type: 'pdfFile', + type: AttachmentType.PDF, name: file.name, content: content, processedAsImages: false, @@ -171,7 +175,7 @@ export async function parseFilesToMessageExtras( emptyFiles.push(file.name); } else if (isLikelyTextFile(content)) { extras.push({ - type: 'textFile', + type: AttachmentType.TEXT, name: file.name, content: content }); diff --git a/tools/server/webui/src/lib/utils/file-preview.ts b/tools/server/webui/src/lib/utils/file-preview.ts index 3f887ec535f..115f8727a97 100644 --- a/tools/server/webui/src/lib/utils/file-preview.ts +++ b/tools/server/webui/src/lib/utils/file-preview.ts @@ -1,25 +1,38 @@ /** - * Formats file size in bytes to human readable format - * @param bytes - File size in bytes - * @returns Formatted file size string + * Gets a display label for a file type from various input formats + * + * Handles: + * - MIME types: 'application/pdf' → 'PDF' + * - AttachmentType values: 'PDF', 'AUDIO' → 'PDF', 'AUDIO' + * - File names: 'document.pdf' → 'PDF' + * - Unknown: returns 'FILE' + * + * @param input - MIME type, AttachmentType value, or file name + * @returns Formatted file type label (uppercase) */ -export function formatFileSize(bytes: number): string { - if (bytes === 0) return '0 Bytes'; +export function getFileTypeLabel(input: string | undefined): string { + if (!input) return 'FILE'; - const k = 1024; - const sizes = ['Bytes', 'KB', 'MB', 'GB']; - const i = Math.floor(Math.log(bytes) / Math.log(k)); + // Handle MIME types (contains '/') + if (input.includes('/')) { + const subtype = input.split('/').pop(); + if (subtype) { + // Handle special cases like 'vnd.ms-excel' → 'EXCEL' + if (subtype.includes('.')) { + return subtype.split('.').pop()?.toUpperCase() || 'FILE'; + } + return subtype.toUpperCase(); + } + } - return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; -} + // Handle file names (contains '.') + if (input.includes('.')) { + const ext = input.split('.').pop(); + if (ext) return ext.toUpperCase(); + } -/** - * Gets a display label for a file type - * @param fileType - The file type/mime type - * @returns Formatted file type label - */ -export function getFileTypeLabel(fileType: string): string { - return fileType.split('/').pop()?.toUpperCase() || 'FILE'; + // Handle AttachmentType or other plain strings + return input.toUpperCase(); } /** diff --git a/tools/server/webui/src/lib/utils/file-type.ts b/tools/server/webui/src/lib/utils/file-type.ts index ccfc2a3de12..f096b463d40 100644 --- a/tools/server/webui/src/lib/utils/file-type.ts +++ b/tools/server/webui/src/lib/utils/file-type.ts @@ -4,42 +4,151 @@ import { PDF_FILE_TYPES, TEXT_FILE_TYPES } from '$lib/constants/supported-file-types'; -import { FileTypeCategory } from '$lib/enums/files'; +import { + FileExtensionAudio, + FileExtensionImage, + FileExtensionPdf, + FileExtensionText, + FileTypeCategory, + MimeTypeApplication, + MimeTypeAudio, + MimeTypeImage, + MimeTypeText +} from '$lib/enums'; export function getFileTypeCategory(mimeType: string): FileTypeCategory | null { - if ( - Object.values(IMAGE_FILE_TYPES).some((type) => - (type.mimeTypes as readonly string[]).includes(mimeType) - ) - ) { - return FileTypeCategory.IMAGE; - } + switch (mimeType) { + // Images + case MimeTypeImage.JPEG: + case MimeTypeImage.PNG: + case MimeTypeImage.GIF: + case MimeTypeImage.WEBP: + case MimeTypeImage.SVG: + return FileTypeCategory.IMAGE; - if ( - Object.values(AUDIO_FILE_TYPES).some((type) => - (type.mimeTypes as readonly string[]).includes(mimeType) - ) - ) { - return FileTypeCategory.AUDIO; - } + // Audio + case MimeTypeAudio.MP3_MPEG: + case MimeTypeAudio.MP3: + case MimeTypeAudio.MP4: + case MimeTypeAudio.WAV: + case MimeTypeAudio.WEBM: + case MimeTypeAudio.WEBM_OPUS: + return FileTypeCategory.AUDIO; - if ( - Object.values(PDF_FILE_TYPES).some((type) => - (type.mimeTypes as readonly string[]).includes(mimeType) - ) - ) { - return FileTypeCategory.PDF; - } + // PDF + case MimeTypeApplication.PDF: + return FileTypeCategory.PDF; - if ( - Object.values(TEXT_FILE_TYPES).some((type) => - (type.mimeTypes as readonly string[]).includes(mimeType) - ) - ) { - return FileTypeCategory.TEXT; + // Text + case MimeTypeText.PLAIN: + case MimeTypeText.MARKDOWN: + case MimeTypeText.ASCIIDOC: + case MimeTypeText.JAVASCRIPT: + case MimeTypeText.JAVASCRIPT_APP: + case MimeTypeText.TYPESCRIPT: + case MimeTypeText.JSX: + case MimeTypeText.TSX: + case MimeTypeText.CSS: + case MimeTypeText.HTML: + case MimeTypeText.JSON: + case MimeTypeText.XML_TEXT: + case MimeTypeText.XML_APP: + case MimeTypeText.YAML_TEXT: + case MimeTypeText.YAML_APP: + case MimeTypeText.CSV: + case MimeTypeText.PYTHON: + case MimeTypeText.JAVA: + case MimeTypeText.CPP_SRC: + case MimeTypeText.C_SRC: + case MimeTypeText.C_HDR: + case MimeTypeText.PHP: + case MimeTypeText.RUBY: + case MimeTypeText.GO: + case MimeTypeText.RUST: + case MimeTypeText.SHELL: + case MimeTypeText.BAT: + case MimeTypeText.SQL: + case MimeTypeText.R: + case MimeTypeText.SCALA: + case MimeTypeText.KOTLIN: + case MimeTypeText.SWIFT: + case MimeTypeText.DART: + case MimeTypeText.VUE: + case MimeTypeText.SVELTE: + case MimeTypeText.LATEX: + case MimeTypeText.BIBTEX: + return FileTypeCategory.TEXT; + + default: + return null; } +} - return null; +export function getFileTypeCategoryByExtension(filename: string): FileTypeCategory | null { + const extension = filename.toLowerCase().substring(filename.lastIndexOf('.')); + + switch (extension) { + // Images + case FileExtensionImage.JPG: + case FileExtensionImage.JPEG: + case FileExtensionImage.PNG: + case FileExtensionImage.GIF: + case FileExtensionImage.WEBP: + case FileExtensionImage.SVG: + return FileTypeCategory.IMAGE; + + // Audio + case FileExtensionAudio.MP3: + case FileExtensionAudio.WAV: + return FileTypeCategory.AUDIO; + + // PDF + case FileExtensionPdf.PDF: + return FileTypeCategory.PDF; + + // Text + case FileExtensionText.TXT: + case FileExtensionText.MD: + case FileExtensionText.ADOC: + case FileExtensionText.JS: + case FileExtensionText.TS: + case FileExtensionText.JSX: + case FileExtensionText.TSX: + case FileExtensionText.CSS: + case FileExtensionText.HTML: + case FileExtensionText.HTM: + case FileExtensionText.JSON: + case FileExtensionText.XML: + case FileExtensionText.YAML: + case FileExtensionText.YML: + case FileExtensionText.CSV: + case FileExtensionText.LOG: + case FileExtensionText.PY: + case FileExtensionText.JAVA: + case FileExtensionText.CPP: + case FileExtensionText.C: + case FileExtensionText.H: + case FileExtensionText.PHP: + case FileExtensionText.RB: + case FileExtensionText.GO: + case FileExtensionText.RS: + case FileExtensionText.SH: + case FileExtensionText.BAT: + case FileExtensionText.SQL: + case FileExtensionText.R: + case FileExtensionText.SCALA: + case FileExtensionText.KT: + case FileExtensionText.SWIFT: + case FileExtensionText.DART: + case FileExtensionText.VUE: + case FileExtensionText.SVELTE: + case FileExtensionText.TEX: + case FileExtensionText.BIB: + return FileTypeCategory.TEXT; + + default: + return null; + } } export function getFileTypeByExtension(filename: string): string | null { diff --git a/tools/server/webui/src/lib/utils/formatters.ts b/tools/server/webui/src/lib/utils/formatters.ts new file mode 100644 index 00000000000..ae9f59a39c3 --- /dev/null +++ b/tools/server/webui/src/lib/utils/formatters.ts @@ -0,0 +1,53 @@ +/** + * Formats file size in bytes to human readable format + * Supports Bytes, KB, MB, and GB + * + * @param bytes - File size in bytes (or unknown for safety) + * @returns Formatted file size string + */ +export function formatFileSize(bytes: number | unknown): string { + if (typeof bytes !== 'number') return 'Unknown'; + if (bytes === 0) return '0 Bytes'; + + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + + return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; +} + +/** + * Format parameter count to human-readable format (B, M, K) + * + * @param params - Parameter count + * @returns Human-readable parameter count + */ +export function formatParameters(params: number | unknown): string { + if (typeof params !== 'number') return 'Unknown'; + + if (params >= 1e9) { + return `${(params / 1e9).toFixed(2)}B`; + } + + if (params >= 1e6) { + return `${(params / 1e6).toFixed(2)}M`; + } + + if (params >= 1e3) { + return `${(params / 1e3).toFixed(2)}K`; + } + + return params.toString(); +} + +/** + * Format number with locale-specific thousands separators + * + * @param num - Number to format + * @returns Human-readable number + */ +export function formatNumber(num: number | unknown): string { + if (typeof num !== 'number') return 'Unknown'; + + return num.toLocaleString(); +} diff --git a/tools/server/webui/src/lib/utils/index.ts b/tools/server/webui/src/lib/utils/index.ts new file mode 100644 index 00000000000..d8a893ed645 --- /dev/null +++ b/tools/server/webui/src/lib/utils/index.ts @@ -0,0 +1,87 @@ +/** + * Unified exports for all utility functions + * Import utilities from '$lib/utils' for cleaner imports + * + * For browser-only utilities (pdf-processing, audio-recording, svg-to-png, + * webp-to-png, process-uploaded-files, convert-files-to-extra), use: + * import { ... } from '$lib/utils/browser-only' + */ + +// API utilities +export { getAuthHeaders, getJsonHeaders } from './api-headers'; +export { validateApiKey } from './api-key-validation'; + +// Attachment utilities +export { + getAttachmentDisplayItems, + type AttachmentDisplayItemsOptions +} from './attachment-display'; +export { isTextFile, isImageFile, isPdfFile, isAudioFile } from './attachment-type'; + +// Textarea utilities +export { default as autoResizeTextarea } from './autoresize-textarea'; + +// Branching utilities +export { + filterByLeafNodeId, + findLeafNode, + findDescendantMessages, + getMessageSiblings, + getMessageDisplayList, + hasMessageSiblings, + getNextSibling, + getPreviousSibling +} from './branching'; + +// Config helpers +export { setConfigValue, getConfigValue, configToParameterRecord } from './config-helpers'; + +// Conversation utilities +export { createMessageCountMap, getMessageCount } from './conversation-utils'; + +// Clipboard utilities +export { copyToClipboard, copyCodeToClipboard } from './copy'; + +// File preview utilities +export { getFileTypeLabel, getPreviewText } from './file-preview'; + +// File type utilities +export { + getFileTypeCategory, + getFileTypeCategoryByExtension, + getFileTypeByExtension, + isFileTypeSupported +} from './file-type'; + +// Formatting utilities +export { formatFileSize, formatParameters, formatNumber } from './formatters'; + +// IME utilities +export { isIMEComposing } from './is-ime-composing'; + +// LaTeX utilities +export { maskInlineLaTeX, preprocessLaTeX } from './latex-protection'; + +// Modality file validation utilities +export { + isFileTypeSupportedByModel, + filterFilesByModalities, + generateModalityErrorMessage, + generateModalityAwareAcceptString, + type ModalityCapabilities +} from './modality-file-validation'; + +// Model name utilities +export { normalizeModelName, isValidModelName } from './model-names'; + +// Portal utilities +export { portalToBody } from './portal-to-body'; + +// Precision utilities +export { normalizeFloatingPoint, normalizeNumber } from './precision'; + +// Syntax highlighting utilities +export { getLanguageFromFilename } from './syntax-highlight-language'; + +// Text file utilities +export { isTextFileByName, readFileAsText, isLikelyTextFile } from './text-files'; diff --git a/tools/server/webui/src/lib/utils/modality-file-validation.ts b/tools/server/webui/src/lib/utils/modality-file-validation.ts index c77bf88c3ab..e3c00f9e97d 100644 --- a/tools/server/webui/src/lib/utils/modality-file-validation.ts +++ b/tools/server/webui/src/lib/utils/modality-file-validation.ts @@ -3,8 +3,7 @@ * Ensures only compatible file types are processed based on model capabilities */ -import { getFileTypeCategory } from '$lib/utils/file-type'; -import { supportsVision, supportsAudio } from '$lib/stores/server.svelte'; +import { getFileTypeCategory } from '$lib/utils'; import { FileExtensionAudio, FileExtensionImage, @@ -15,15 +14,26 @@ import { MimeTypeApplication, MimeTypeText, FileTypeCategory -} from '$lib/enums/files'; +} from '$lib/enums'; + +/** Modality capabilities for file validation */ +export interface ModalityCapabilities { + hasVision: boolean; + hasAudio: boolean; +} /** - * Check if a file type is supported by the current model's modalities + * Check if a file type is supported by the given modalities * @param filename - The filename to check * @param mimeType - The MIME type of the file - * @returns true if the file type is supported by the current model + * @param capabilities - The modality capabilities to check against + * @returns true if the file type is supported */ -export function isFileTypeSupportedByModel(filename: string, mimeType?: string): boolean { +export function isFileTypeSupportedByModel( + filename: string, + mimeType: string | undefined, + capabilities: ModalityCapabilities +): boolean { const category = mimeType ? getFileTypeCategory(mimeType) : null; // If we can't determine the category from MIME type, fall back to general support check @@ -44,11 +54,11 @@ export function isFileTypeSupportedByModel(filename: string, mimeType?: string): case FileTypeCategory.IMAGE: // Images require vision support - return supportsVision(); + return capabilities.hasVision; case FileTypeCategory.AUDIO: // Audio files require audio support - return supportsAudio(); + return capabilities.hasAudio; default: // Unknown categories - be conservative and allow @@ -59,9 +69,13 @@ export function isFileTypeSupportedByModel(filename: string, mimeType?: string): /** * Filter files based on model modalities and return supported/unsupported lists * @param files - Array of files to filter + * @param capabilities - The modality capabilities to check against * @returns Object with supportedFiles and unsupportedFiles arrays */ -export function filterFilesByModalities(files: File[]): { +export function filterFilesByModalities( + files: File[], + capabilities: ModalityCapabilities +): { supportedFiles: File[]; unsupportedFiles: File[]; modalityReasons: Record; @@ -70,8 +84,7 @@ export function filterFilesByModalities(files: File[]): { const unsupportedFiles: File[] = []; const modalityReasons: Record = {}; - const hasVision = supportsVision(); - const hasAudio = supportsAudio(); + const { hasVision, hasAudio } = capabilities; for (const file of files) { const category = getFileTypeCategory(file.type); @@ -119,16 +132,17 @@ export function filterFilesByModalities(files: File[]): { * Generate a user-friendly error message for unsupported files * @param unsupportedFiles - Array of unsupported files * @param modalityReasons - Reasons why files are unsupported + * @param capabilities - The modality capabilities to check against * @returns Formatted error message */ export function generateModalityErrorMessage( unsupportedFiles: File[], - modalityReasons: Record + modalityReasons: Record, + capabilities: ModalityCapabilities ): string { if (unsupportedFiles.length === 0) return ''; - const hasVision = supportsVision(); - const hasAudio = supportsAudio(); + const { hasVision, hasAudio } = capabilities; let message = ''; @@ -152,12 +166,12 @@ export function generateModalityErrorMessage( } /** - * Generate file input accept string based on current model modalities + * Generate file input accept string based on model modalities + * @param capabilities - The modality capabilities to check against * @returns Accept string for HTML file input element */ -export function generateModalityAwareAcceptString(): string { - const hasVision = supportsVision(); - const hasAudio = supportsAudio(); +export function generateModalityAwareAcceptString(capabilities: ModalityCapabilities): string { + const { hasVision, hasAudio } = capabilities; const acceptedExtensions: string[] = []; const acceptedMimeTypes: string[] = []; diff --git a/tools/server/webui/src/lib/utils/model-names.test.ts b/tools/server/webui/src/lib/utils/model-names.test.ts index e19e92f7770..ca85df3d30a 100644 --- a/tools/server/webui/src/lib/utils/model-names.test.ts +++ b/tools/server/webui/src/lib/utils/model-names.test.ts @@ -2,12 +2,19 @@ import { describe, expect, it } from 'vitest'; import { isValidModelName, normalizeModelName } from './model-names'; describe('normalizeModelName', () => { - it('extracts filename from forward slash path', () => { - expect(normalizeModelName('models/model-name-1')).toBe('model-name-1'); + it('preserves Hugging Face org/model format (single slash)', () => { + // Single slash is treated as Hugging Face format and preserved + expect(normalizeModelName('meta-llama/Llama-3.1-8B')).toBe('meta-llama/Llama-3.1-8B'); + expect(normalizeModelName('models/model-name-1')).toBe('models/model-name-1'); + }); + + it('extracts filename from multi-segment paths', () => { + // Multiple slashes -> extract just the filename expect(normalizeModelName('path/to/model/model-name-2')).toBe('model-name-2'); + expect(normalizeModelName('/absolute/path/to/model')).toBe('model'); }); - it('extracts filename from backslash path', () => { + it('extracts filename from backslash paths', () => { expect(normalizeModelName('C\\Models\\model-name-1')).toBe('model-name-1'); expect(normalizeModelName('path\\to\\model\\model-name-2')).toBe('model-name-2'); }); diff --git a/tools/server/webui/src/lib/utils/model-names.ts b/tools/server/webui/src/lib/utils/model-names.ts index b1ea9d95361..c0a1e1c578f 100644 --- a/tools/server/webui/src/lib/utils/model-names.ts +++ b/tools/server/webui/src/lib/utils/model-names.ts @@ -1,16 +1,19 @@ /** - * Normalizes a model name by extracting the filename from a path. + * Normalizes a model name by extracting the filename from a path, but preserves Hugging Face repository format. * * Handles both forward slashes (/) and backslashes (\) as path separators. - * If the model name is just a filename (no path), returns it as-is. + * - If the model name has exactly one slash (org/model format), preserves the full "org/model" name + * - If the model name has no slash or multiple slashes, extracts just the filename + * - If the model name is just a filename (no path), returns it as-is. * * @param modelName - The model name or path to normalize - * @returns The normalized model name (filename only) + * @returns The normalized model name * * @example - * normalizeModelName('models/llama-3.1-8b') // Returns: 'llama-3.1-8b' - * normalizeModelName('C:\\Models\\gpt-4') // Returns: 'gpt-4' - * normalizeModelName('simple-model') // Returns: 'simple-model' + * normalizeModelName('models/llama-3.1-8b') // Returns: 'llama-3.1-8b' (multiple slashes -> filename) + * normalizeModelName('C:\\Models\\gpt-4') // Returns: 'gpt-4' (multiple slashes -> filename) + * normalizeModelName('meta-llama/Llama-3.1-8B') // Returns: 'meta-llama/Llama-3.1-8B' (Hugging Face format) + * normalizeModelName('simple-model') // Returns: 'simple-model' (no slash) * normalizeModelName(' spaced ') // Returns: 'spaced' * normalizeModelName('') // Returns: '' */ @@ -22,6 +25,20 @@ export function normalizeModelName(modelName: string): string { } const segments = trimmed.split(/[\\/]/); + + // If we have exactly 2 segments (one slash), treat it as Hugging Face repo format + // and preserve the full "org/model" format + if (segments.length === 2) { + const [org, model] = segments; + const trimmedOrg = org?.trim(); + const trimmedModel = model?.trim(); + + if (trimmedOrg && trimmedModel) { + return `${trimmedOrg}/${trimmedModel}`; + } + } + + // For other cases (no slash, or multiple slashes), extract just the filename const candidate = segments.pop(); const normalized = candidate?.trim(); diff --git a/tools/server/webui/src/lib/utils/pdf-processing.ts b/tools/server/webui/src/lib/utils/pdf-processing.ts index 49b0f34baea..84c456d109d 100644 --- a/tools/server/webui/src/lib/utils/pdf-processing.ts +++ b/tools/server/webui/src/lib/utils/pdf-processing.ts @@ -4,7 +4,7 @@ */ import { browser } from '$app/environment'; -import { MimeTypeApplication, MimeTypeImage } from '$lib/enums/files'; +import { MimeTypeApplication, MimeTypeImage } from '$lib/enums'; import * as pdfjs from 'pdfjs-dist'; type TextContent = { diff --git a/tools/server/webui/src/lib/utils/process-uploaded-files.ts b/tools/server/webui/src/lib/utils/process-uploaded-files.ts index 3fb0a9d1a94..f00116ccc1d 100644 --- a/tools/server/webui/src/lib/utils/process-uploaded-files.ts +++ b/tools/server/webui/src/lib/utils/process-uploaded-files.ts @@ -1,11 +1,12 @@ import { isSvgMimeType, svgBase64UrlToPngDataURL } from './svg-to-png'; import { isTextFileByName } from './text-files'; import { isWebpMimeType, webpBase64UrlToPngDataURL } from './webp-to-png'; -import { FileTypeCategory } from '$lib/enums/files'; -import { getFileTypeCategory } from '$lib/utils/file-type'; -import { supportsVision } from '$lib/stores/server.svelte'; +import { FileTypeCategory } from '$lib/enums'; +import { modelsStore } from '$lib/stores/models.svelte'; import { settingsStore } from '$lib/stores/settings.svelte'; import { toast } from 'svelte-sonner'; +import { getFileTypeCategory } from '$lib/utils'; +import { convertPDFToText } from './pdf-processing'; /** * Read a file as a data URL (base64 encoded) @@ -47,7 +48,10 @@ function readFileAsUTF8(file: File): Promise { * @param files - Array of File objects to process * @returns Promise resolving to array of ChatUploadedFile objects */ -export async function processFilesToChatUploaded(files: File[]): Promise { +export async function processFilesToChatUploaded( + files: File[], + activeModelId?: string +): Promise { const results: ChatUploadedFile[] = []; for (const file of files) { @@ -92,11 +96,19 @@ export async function processFilesToChatUploaded(files: File[]): Promise import '../app.css'; import { page } from '$app/state'; + import { untrack } from 'svelte'; import { ChatSidebar, DialogConversationTitleUpdate } from '$lib/components/app'; - import { - activeMessages, - isLoading, - setTitleUpdateConfirmationCallback - } from '$lib/stores/chat.svelte'; + import { isLoading } from '$lib/stores/chat.svelte'; + import { conversationsStore, activeMessages } from '$lib/stores/conversations.svelte'; import * as Sidebar from '$lib/components/ui/sidebar/index.js'; - import { serverStore } from '$lib/stores/server.svelte'; + import * as Tooltip from '$lib/components/ui/tooltip'; + import { isRouterMode, serverStore } from '$lib/stores/server.svelte'; import { config, settingsStore } from '$lib/stores/settings.svelte'; import { ModeWatcher } from 'mode-watcher'; import { Toaster } from 'svelte-sonner'; import { goto } from '$app/navigation'; + import { modelsStore } from '$lib/stores/models.svelte'; + import { TOOLTIP_DELAY_DURATION } from '$lib/constants/tooltip-config'; let { children } = $props(); @@ -90,20 +91,42 @@ } }); - // Initialize server properties on app load + // Initialize server properties on app load (run once) $effect(() => { - serverStore.fetchServerProps(); + // Only fetch if we don't already have props + if (!serverStore.props) { + untrack(() => { + serverStore.fetch(); + }); + } }); // Sync settings when server props are loaded $effect(() => { - const serverProps = serverStore.serverProps; + const serverProps = serverStore.props; if (serverProps?.default_generation_settings?.params) { settingsStore.syncWithServerDefaults(); } }); + // Fetch router models when in router mode (for status and modalities) + // Wait for models to be loaded first, run only once + let routerModelsFetched = false; + + $effect(() => { + const isRouter = isRouterMode(); + const modelsCount = modelsStore.models.length; + + // Only fetch router models once when we have models loaded and in router mode + if (isRouter && modelsCount > 0 && !routerModelsFetched) { + routerModelsFetched = true; + untrack(() => { + modelsStore.fetchRouterModels(); + }); + } + }); + // Monitor API key changes and redirect to error page if removed or changed when required $effect(() => { const apiKey = config().apiKey; @@ -135,46 +158,50 @@ // Set up title update confirmation callback $effect(() => { - setTitleUpdateConfirmationCallback(async (currentTitle: string, newTitle: string) => { - return new Promise((resolve) => { - titleUpdateCurrentTitle = currentTitle; - titleUpdateNewTitle = newTitle; - titleUpdateResolve = resolve; - titleUpdateDialogOpen = true; - }); - }); + conversationsStore.setTitleUpdateConfirmationCallback( + async (currentTitle: string, newTitle: string) => { + return new Promise((resolve) => { + titleUpdateCurrentTitle = currentTitle; + titleUpdateNewTitle = newTitle; + titleUpdateResolve = resolve; + titleUpdateDialogOpen = true; + }); + } + ); }); - - - - - - - -
- - - - - - - - {@render children?.()} - -
-
+ + + + + + + + +
+ + + + + + + + {@render children?.()} + +
+
+
diff --git a/tools/server/webui/src/routes/+page.svelte b/tools/server/webui/src/routes/+page.svelte index cd18dabccb9..32a7c2e6e42 100644 --- a/tools/server/webui/src/routes/+page.svelte +++ b/tools/server/webui/src/routes/+page.svelte @@ -1,21 +1,79 @@ @@ -25,3 +83,9 @@ + + diff --git a/tools/server/webui/src/routes/+page.ts b/tools/server/webui/src/routes/+page.ts index a984c00457f..7905af6b513 100644 --- a/tools/server/webui/src/routes/+page.ts +++ b/tools/server/webui/src/routes/+page.ts @@ -1,5 +1,5 @@ import type { PageLoad } from './$types'; -import { validateApiKey } from '$lib/utils/api-key-validation'; +import { validateApiKey } from '$lib/utils'; export const load: PageLoad = async ({ fetch }) => { await validateApiKey(fetch); diff --git a/tools/server/webui/src/routes/chat/[id]/+page.svelte b/tools/server/webui/src/routes/chat/[id]/+page.svelte index af91a8e9ef7..b897ef5bcd4 100644 --- a/tools/server/webui/src/routes/chat/[id]/+page.svelte +++ b/tools/server/webui/src/routes/chat/[id]/+page.svelte @@ -1,30 +1,144 @@ + + + + + + + diff --git a/tools/server/webui/tests/client/page.svelte.test.ts b/tools/server/webui/tests/client/page.svelte.test.ts new file mode 100644 index 00000000000..6849beb27b2 --- /dev/null +++ b/tools/server/webui/tests/client/page.svelte.test.ts @@ -0,0 +1,11 @@ +import { describe, it, expect } from 'vitest'; +import { render } from 'vitest-browser-svelte'; +import TestWrapper from './components/TestWrapper.svelte'; + +describe('/+page.svelte', () => { + it('should render page without throwing', async () => { + // Basic smoke test - page should render without throwing errors + // API calls will fail in test environment but component should still mount + expect(() => render(TestWrapper)).not.toThrow(); + }); +}); diff --git a/tools/server/webui/e2e/demo.test.ts b/tools/server/webui/tests/e2e/demo.test.ts similarity index 100% rename from tools/server/webui/e2e/demo.test.ts rename to tools/server/webui/tests/e2e/demo.test.ts diff --git a/tools/server/webui/src/demo.spec.ts b/tools/server/webui/tests/server/demo.spec.ts similarity index 100% rename from tools/server/webui/src/demo.spec.ts rename to tools/server/webui/tests/server/demo.spec.ts diff --git a/tools/server/webui/src/stories/ChatForm.stories.svelte b/tools/server/webui/tests/stories/ChatForm.stories.svelte similarity index 68% rename from tools/server/webui/src/stories/ChatForm.stories.svelte rename to tools/server/webui/tests/stories/ChatForm.stories.svelte index 82848e4fbf1..fe6f14bd8e0 100644 --- a/tools/server/webui/src/stories/ChatForm.stories.svelte +++ b/tools/server/webui/tests/stories/ChatForm.stories.svelte @@ -70,17 +70,19 @@ await expect(acceptAttr).not.toContain('image/'); await expect(acceptAttr).not.toContain('audio/'); + // Open file attachments dropdown const fileUploadButton = canvas.getByText('Attach files'); - await userEvent.click(fileUploadButton); - const recordButton = canvas.getAllByRole('button', { name: 'Start recording' })[1]; + // Check dropdown menu items are disabled (no modalities) const imagesButton = document.querySelector('.images-button'); const audioButton = document.querySelector('.audio-button'); - await expect(recordButton).toBeDisabled(); await expect(imagesButton).toHaveAttribute('data-disabled'); await expect(audioButton).toHaveAttribute('data-disabled'); + + // Close dropdown by pressing Escape + await userEvent.keyboard('{Escape}'); }} /> @@ -92,31 +94,21 @@ play={async ({ canvas, userEvent }) => { mockServerProps(mockConfigs.visionOnly); - // Test initial file input state (should accept images but not audio) - const fileInput = document.querySelector('input[type="file"]'); - const acceptAttr = fileInput?.getAttribute('accept'); - console.log('Vision modality accept attr:', acceptAttr); - + // Open file attachments dropdown and verify it works const fileUploadButton = canvas.getByText('Attach files'); await userEvent.click(fileUploadButton); - // Test that record button is disabled (no audio support) - const recordButton = canvas.getAllByRole('button', { name: 'Start recording' })[1]; - await expect(recordButton).toBeDisabled(); - - // Test that Images button is enabled (vision support) + // Verify dropdown menu items exist const imagesButton = document.querySelector('.images-button'); - await expect(imagesButton).not.toHaveAttribute('data-disabled'); - - // Test that Audio button is disabled (no audio support) const audioButton = document.querySelector('.audio-button'); - await expect(audioButton).toHaveAttribute('data-disabled'); - // Fix for dropdown menu side effect - const body = document.querySelector('body'); - if (body) body.style.pointerEvents = 'all'; + await expect(imagesButton).toBeInTheDocument(); + await expect(audioButton).toBeInTheDocument(); - console.log('✅ Vision modality: Images enabled, Audio/Recording disabled'); + // Close dropdown by pressing Escape + await userEvent.keyboard('{Escape}'); + + console.log('✅ Vision modality: Dropdown menu verified'); }} /> @@ -126,31 +118,21 @@ play={async ({ canvas, userEvent }) => { mockServerProps(mockConfigs.audioOnly); - // Test initial file input state (should accept audio but not images) - const fileInput = document.querySelector('input[type="file"]'); - const acceptAttr = fileInput?.getAttribute('accept'); - console.log('Audio modality accept attr:', acceptAttr); - + // Open file attachments dropdown and verify it works const fileUploadButton = canvas.getByText('Attach files'); await userEvent.click(fileUploadButton); - // Test that record button is enabled (audio support) - const recordButton = canvas.getAllByRole('button', { name: 'Start recording' })[1]; - await expect(recordButton).not.toBeDisabled(); - - // Test that Images button is disabled (no vision support) + // Verify dropdown menu items exist const imagesButton = document.querySelector('.images-button'); - await expect(imagesButton).toHaveAttribute('data-disabled'); - - // Test that Audio button is enabled (audio support) const audioButton = document.querySelector('.audio-button'); - await expect(audioButton).not.toHaveAttribute('data-disabled'); - // Fix for dropdown menu side effect - const body = document.querySelector('body'); - if (body) body.style.pointerEvents = 'all'; + await expect(imagesButton).toBeInTheDocument(); + await expect(audioButton).toBeInTheDocument(); + + // Close dropdown by pressing Escape + await userEvent.keyboard('{Escape}'); - console.log('✅ Audio modality: Audio/Recording enabled, Images disabled'); + console.log('✅ Audio modality: Dropdown menu verified'); }} /> diff --git a/tools/server/webui/src/stories/ChatMessage.stories.svelte b/tools/server/webui/tests/stories/ChatMessage.stories.svelte similarity index 87% rename from tools/server/webui/src/stories/ChatMessage.stories.svelte rename to tools/server/webui/tests/stories/ChatMessage.stories.svelte index 6529b75a307..5f4de7d476f 100644 --- a/tools/server/webui/src/stories/ChatMessage.stories.svelte +++ b/tools/server/webui/tests/stories/ChatMessage.stories.svelte @@ -92,8 +92,8 @@ message: userMessage }} play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', false); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', false); }} /> @@ -104,8 +104,8 @@ message: assistantMessage }} play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', false); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', false); }} /> @@ -116,8 +116,8 @@ message: assistantWithReasoning }} play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', false); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', false); }} /> @@ -128,8 +128,8 @@ message: rawOutputMessage }} play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', true); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', true); }} /> @@ -140,8 +140,8 @@ }} asChild play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', false); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', false); // Phase 1: Stream reasoning content in chunks let reasoningText = 'I need to think about this carefully. Let me break down the problem:\n\n1. The user is asking for help with something complex\n2. I should provide a thorough and helpful response\n3. I need to consider multiple approaches\n4. The best solution would be to explain step by step\n\nThis approach will ensure clarity and understanding.'; @@ -192,8 +192,8 @@ message: processingMessage }} play={async () => { - const { updateConfig } = await import('$lib/stores/settings.svelte'); - updateConfig('disableReasoningFormat', false); + const { settingsStore } = await import('$lib/stores/settings.svelte'); + settingsStore.updateConfig('disableReasoningFormat', false); // Import the chat store to simulate loading state const { chatStore } = await import('$lib/stores/chat.svelte'); diff --git a/tools/server/webui/src/stories/ChatSettings.stories.svelte b/tools/server/webui/tests/stories/ChatSettings.stories.svelte similarity index 100% rename from tools/server/webui/src/stories/ChatSettings.stories.svelte rename to tools/server/webui/tests/stories/ChatSettings.stories.svelte diff --git a/tools/server/webui/src/stories/ChatSidebar.stories.svelte b/tools/server/webui/tests/stories/ChatSidebar.stories.svelte similarity index 83% rename from tools/server/webui/src/stories/ChatSidebar.stories.svelte rename to tools/server/webui/tests/stories/ChatSidebar.stories.svelte index b74b246b1d1..42cea8783cd 100644 --- a/tools/server/webui/src/stories/ChatSidebar.stories.svelte +++ b/tools/server/webui/tests/stories/ChatSidebar.stories.svelte @@ -51,10 +51,10 @@ asChild name="Default" play={async () => { - const { chatStore } = await import('$lib/stores/chat.svelte'); + const { conversationsStore } = await import('$lib/stores/conversations.svelte'); waitFor(() => setTimeout(() => { - chatStore.conversations = mockConversations; + conversationsStore.conversations = mockConversations; }, 0)); }} > @@ -67,10 +67,10 @@ asChild name="SearchActive" play={async ({ userEvent }) => { - const { chatStore } = await import('$lib/stores/chat.svelte'); + const { conversationsStore } = await import('$lib/stores/conversations.svelte'); waitFor(() => setTimeout(() => { - chatStore.conversations = mockConversations; + conversationsStore.conversations = mockConversations; }, 0)); const searchTrigger = screen.getByText('Search conversations'); @@ -87,8 +87,8 @@ name="Empty" play={async () => { // Mock empty conversations store - const { chatStore } = await import('$lib/stores/chat.svelte'); - chatStore.conversations = []; + const { conversationsStore } = await import('$lib/stores/conversations.svelte'); + conversationsStore.conversations = []; }} >
diff --git a/tools/server/webui/src/stories/Introduction.mdx b/tools/server/webui/tests/stories/Introduction.mdx similarity index 100% rename from tools/server/webui/src/stories/Introduction.mdx rename to tools/server/webui/tests/stories/Introduction.mdx diff --git a/tools/server/webui/src/stories/MarkdownContent.stories.svelte b/tools/server/webui/tests/stories/MarkdownContent.stories.svelte similarity index 100% rename from tools/server/webui/src/stories/MarkdownContent.stories.svelte rename to tools/server/webui/tests/stories/MarkdownContent.stories.svelte diff --git a/tools/server/webui/src/stories/fixtures/ai-tutorial.ts b/tools/server/webui/tests/stories/fixtures/ai-tutorial.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/ai-tutorial.ts rename to tools/server/webui/tests/stories/fixtures/ai-tutorial.ts diff --git a/tools/server/webui/src/stories/fixtures/api-docs.ts b/tools/server/webui/tests/stories/fixtures/api-docs.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/api-docs.ts rename to tools/server/webui/tests/stories/fixtures/api-docs.ts diff --git a/tools/server/webui/src/stories/fixtures/assets/1.jpg b/tools/server/webui/tests/stories/fixtures/assets/1.jpg similarity index 100% rename from tools/server/webui/src/stories/fixtures/assets/1.jpg rename to tools/server/webui/tests/stories/fixtures/assets/1.jpg diff --git a/tools/server/webui/src/stories/fixtures/assets/beautiful-flowers-lotus.webp b/tools/server/webui/tests/stories/fixtures/assets/beautiful-flowers-lotus.webp similarity index 100% rename from tools/server/webui/src/stories/fixtures/assets/beautiful-flowers-lotus.webp rename to tools/server/webui/tests/stories/fixtures/assets/beautiful-flowers-lotus.webp diff --git a/tools/server/webui/src/stories/fixtures/assets/example.pdf b/tools/server/webui/tests/stories/fixtures/assets/example.pdf similarity index 100% rename from tools/server/webui/src/stories/fixtures/assets/example.pdf rename to tools/server/webui/tests/stories/fixtures/assets/example.pdf diff --git a/tools/server/webui/src/stories/fixtures/assets/hf-logo.svg b/tools/server/webui/tests/stories/fixtures/assets/hf-logo.svg similarity index 100% rename from tools/server/webui/src/stories/fixtures/assets/hf-logo.svg rename to tools/server/webui/tests/stories/fixtures/assets/hf-logo.svg diff --git a/tools/server/webui/src/stories/fixtures/blog-post.ts b/tools/server/webui/tests/stories/fixtures/blog-post.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/blog-post.ts rename to tools/server/webui/tests/stories/fixtures/blog-post.ts diff --git a/tools/server/webui/src/stories/fixtures/data-analysis.ts b/tools/server/webui/tests/stories/fixtures/data-analysis.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/data-analysis.ts rename to tools/server/webui/tests/stories/fixtures/data-analysis.ts diff --git a/tools/server/webui/src/stories/fixtures/empty.ts b/tools/server/webui/tests/stories/fixtures/empty.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/empty.ts rename to tools/server/webui/tests/stories/fixtures/empty.ts diff --git a/tools/server/webui/src/stories/fixtures/math-formulas.ts b/tools/server/webui/tests/stories/fixtures/math-formulas.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/math-formulas.ts rename to tools/server/webui/tests/stories/fixtures/math-formulas.ts diff --git a/tools/server/webui/src/stories/fixtures/readme.ts b/tools/server/webui/tests/stories/fixtures/readme.ts similarity index 100% rename from tools/server/webui/src/stories/fixtures/readme.ts rename to tools/server/webui/tests/stories/fixtures/readme.ts diff --git a/tools/server/webui/tests/stories/fixtures/storybook-mocks.ts b/tools/server/webui/tests/stories/fixtures/storybook-mocks.ts new file mode 100644 index 00000000000..c40a74655a0 --- /dev/null +++ b/tools/server/webui/tests/stories/fixtures/storybook-mocks.ts @@ -0,0 +1,81 @@ +import { serverStore } from '$lib/stores/server.svelte'; +import { modelsStore } from '$lib/stores/models.svelte'; + +/** + * Mock server properties for Storybook testing + * This utility allows setting mock server configurations without polluting production code + */ +export function mockServerProps(props: Partial): void { + // Reset any pointer-events from previous tests (dropdown cleanup) + const body = document.querySelector('body'); + if (body) body.style.pointerEvents = ''; + + // Directly set the props for testing purposes + (serverStore as unknown as { props: ApiLlamaCppServerProps }).props = { + model_path: props.model_path || 'test-model', + modalities: { + vision: props.modalities?.vision ?? false, + audio: props.modalities?.audio ?? false + }, + ...props + } as ApiLlamaCppServerProps; + + // Set router mode role so activeModelId can be set + (serverStore as unknown as { props: ApiLlamaCppServerProps }).props.role = 'ROUTER'; + + // Also mock modelsStore methods for modality checking + const vision = props.modalities?.vision ?? false; + const audio = props.modalities?.audio ?? false; + + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (modelsStore as any).modelSupportsVision = () => vision; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (modelsStore as any).modelSupportsAudio = () => audio; + + // Mock models list with a test model so activeModelId can be resolved + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (modelsStore as any).models = [ + { + id: 'test-model', + name: 'Test Model', + model: 'test-model' + } + ]; + + // Mock selectedModelId + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (modelsStore as any).selectedModelId = 'test-model'; +} + +/** + * Reset server store to clean state for testing + */ +export function resetServerStore(): void { + (serverStore as unknown as { props: ApiLlamaCppServerProps }).props = { + model_path: '', + modalities: { + vision: false, + audio: false + } + } as ApiLlamaCppServerProps; + (serverStore as unknown as { error: string }).error = ''; + (serverStore as unknown as { loading: boolean }).loading = false; +} + +/** + * Common mock configurations for Storybook stories + */ +export const mockConfigs = { + visionOnly: { + modalities: { vision: true, audio: false } + }, + audioOnly: { + modalities: { vision: false, audio: true } + }, + bothModalities: { + modalities: { vision: true, audio: true } + }, + noModalities: { + modalities: { vision: false, audio: false } + } +} as const; diff --git a/tools/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts index 11ff665d8b2..b41d3511b42 100644 --- a/tools/server/webui/vite.config.ts +++ b/tools/server/webui/vite.config.ts @@ -118,8 +118,7 @@ export default defineConfig({ provider: 'playwright', instances: [{ browser: 'chromium' }] }, - include: ['src/**/*.svelte.{test,spec}.{js,ts}'], - exclude: ['src/lib/server/**'], + include: ['tests/client/**/*.svelte.{test,spec}.{js,ts}'], setupFiles: ['./vitest-setup-client.ts'] } }, @@ -128,8 +127,7 @@ export default defineConfig({ test: { name: 'server', environment: 'node', - include: ['src/**/*.{test,spec}.{js,ts}'], - exclude: ['src/**/*.svelte.{test,spec}.{js,ts}'] + include: ['tests/server/**/*.{test,spec}.{js,ts}'] } }, { @@ -142,7 +140,7 @@ export default defineConfig({ provider: 'playwright', instances: [{ browser: 'chromium', headless: true }] }, - include: ['src/**/*.stories.{js,ts,svelte}'], + include: ['tests/stories/**/*.stories.{js,ts,svelte}'], setupFiles: ['./.storybook/vitest.setup.ts'] }, plugins: [ @@ -158,7 +156,7 @@ export default defineConfig({ proxy: { '/v1': 'http://localhost:8080', '/props': 'http://localhost:8080', - '/slots': 'http://localhost:8080' + '/models': 'http://localhost:8080' }, headers: { 'Cross-Origin-Embedder-Policy': 'require-corp', diff --git a/vendor/sheredom/subprocess.h b/vendor/sheredom/subprocess.h new file mode 100644 index 00000000000..3e40bae046a --- /dev/null +++ b/vendor/sheredom/subprocess.h @@ -0,0 +1,1203 @@ +/* + The latest version of this library is available on GitHub; + https://github.com/sheredom/subprocess.h +*/ + +/* + This is free and unencumbered software released into the public domain. + + Anyone is free to copy, modify, publish, use, compile, sell, or + distribute this software, either in source code form or as a compiled + binary, for any purpose, commercial or non-commercial, and by any + means. + + In jurisdictions that recognize copyright laws, the author or authors + of this software dedicate any and all copyright interest in the + software to the public domain. We make this dedication for the benefit + of the public at large and to the detriment of our heirs and + successors. We intend this dedication to be an overt act of + relinquishment in perpetuity of all present and future rights to this + software under copyright law. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + For more information, please refer to +*/ + +#ifndef SHEREDOM_SUBPROCESS_H_INCLUDED +#define SHEREDOM_SUBPROCESS_H_INCLUDED + +#if defined(_MSC_VER) +#pragma warning(push, 1) + +/* disable warning: '__cplusplus' is not defined as a preprocessor macro, + * replacing with '0' for '#if/#elif' */ +#pragma warning(disable : 4668) +#endif + +#include +#include + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +#if defined(__TINYC__) +#define SUBPROCESS_ATTRIBUTE(a) __attribute((a)) +#else +#define SUBPROCESS_ATTRIBUTE(a) __attribute__((a)) +#endif + +#if defined(_MSC_VER) +#define subprocess_pure +#define subprocess_weak __inline +#define subprocess_tls __declspec(thread) +#elif defined(__MINGW32__) +#define subprocess_pure SUBPROCESS_ATTRIBUTE(pure) +#define subprocess_weak static SUBPROCESS_ATTRIBUTE(used) +#define subprocess_tls __thread +#elif defined(__clang__) || defined(__GNUC__) || defined(__TINYC__) +#define subprocess_pure SUBPROCESS_ATTRIBUTE(pure) +#define subprocess_weak SUBPROCESS_ATTRIBUTE(weak) +#define subprocess_tls __thread +#else +#error Non clang, non gcc, non MSVC compiler found! +#endif + +struct subprocess_s; + +enum subprocess_option_e { + // stdout and stderr are the same FILE. + subprocess_option_combined_stdout_stderr = 0x1, + + // The child process should inherit the environment variables of the parent. + subprocess_option_inherit_environment = 0x2, + + // Enable asynchronous reading of stdout/stderr before it has completed. + subprocess_option_enable_async = 0x4, + + // Enable the child process to be spawned with no window visible if supported + // by the platform. + subprocess_option_no_window = 0x8, + + // Search for program names in the PATH variable. Always enabled on Windows. + // Note: this will **not** search for paths in any provided custom environment + // and instead uses the PATH of the spawning process. + subprocess_option_search_user_path = 0x10 +}; + +#if defined(__cplusplus) +extern "C" { +#endif + +/// @brief Create a process. +/// @param command_line An array of strings for the command line to execute for +/// this process. The last element must be NULL to signify the end of the array. +/// The memory backing this parameter only needs to persist until this function +/// returns. +/// @param options A bit field of subprocess_option_e's to pass. +/// @param out_process The newly created process. +/// @return On success zero is returned. +subprocess_weak int subprocess_create(const char *const command_line[], + int options, + struct subprocess_s *const out_process); + +/// @brief Create a process (extended create). +/// @param command_line An array of strings for the command line to execute for +/// this process. The last element must be NULL to signify the end of the array. +/// The memory backing this parameter only needs to persist until this function +/// returns. +/// @param options A bit field of subprocess_option_e's to pass. +/// @param environment An optional array of strings for the environment to use +/// for a child process (each element of the form FOO=BAR). The last element +/// must be NULL to signify the end of the array. +/// @param out_process The newly created process. +/// @return On success zero is returned. +/// +/// If `options` contains `subprocess_option_inherit_environment`, then +/// `environment` must be NULL. +subprocess_weak int +subprocess_create_ex(const char *const command_line[], int options, + const char *const environment[], + struct subprocess_s *const out_process); + +/// @brief Get the standard input file for a process. +/// @param process The process to query. +/// @return The file for standard input of the process. +/// +/// The file returned can be written to by the parent process to feed data to +/// the standard input of the process. +subprocess_pure subprocess_weak FILE * +subprocess_stdin(const struct subprocess_s *const process); + +/// @brief Get the standard output file for a process. +/// @param process The process to query. +/// @return The file for standard output of the process. +/// +/// The file returned can be read from by the parent process to read data from +/// the standard output of the child process. +subprocess_pure subprocess_weak FILE * +subprocess_stdout(const struct subprocess_s *const process); + +/// @brief Get the standard error file for a process. +/// @param process The process to query. +/// @return The file for standard error of the process. +/// +/// The file returned can be read from by the parent process to read data from +/// the standard error of the child process. +/// +/// If the process was created with the subprocess_option_combined_stdout_stderr +/// option bit set, this function will return NULL, and the subprocess_stdout +/// function should be used for both the standard output and error combined. +subprocess_pure subprocess_weak FILE * +subprocess_stderr(const struct subprocess_s *const process); + +/// @brief Wait for a process to finish execution. +/// @param process The process to wait for. +/// @param out_return_code The return code of the returned process (can be +/// NULL). +/// @return On success zero is returned. +/// +/// Joining a process will close the stdin pipe to the process. +subprocess_weak int subprocess_join(struct subprocess_s *const process, + int *const out_return_code); + +/// @brief Destroy a previously created process. +/// @param process The process to destroy. +/// @return On success zero is returned. +/// +/// If the process to be destroyed had not finished execution, it may out live +/// the parent process. +subprocess_weak int subprocess_destroy(struct subprocess_s *const process); + +/// @brief Terminate a previously created process. +/// @param process The process to terminate. +/// @return On success zero is returned. +/// +/// If the process to be destroyed had not finished execution, it will be +/// terminated (i.e killed). +subprocess_weak int subprocess_terminate(struct subprocess_s *const process); + +/// @brief Read the standard output from the child process. +/// @param process The process to read from. +/// @param buffer The buffer to read into. +/// @param size The maximum number of bytes to read. +/// @return The number of bytes actually read into buffer. Can only be 0 if the +/// process has complete. +/// +/// The only safe way to read from the standard output of a process during it's +/// execution is to use the `subprocess_option_enable_async` option in +/// conjunction with this method. +subprocess_weak unsigned +subprocess_read_stdout(struct subprocess_s *const process, char *const buffer, + unsigned size); + +/// @brief Read the standard error from the child process. +/// @param process The process to read from. +/// @param buffer The buffer to read into. +/// @param size The maximum number of bytes to read. +/// @return The number of bytes actually read into buffer. Can only be 0 if the +/// process has complete. +/// +/// The only safe way to read from the standard error of a process during it's +/// execution is to use the `subprocess_option_enable_async` option in +/// conjunction with this method. +subprocess_weak unsigned +subprocess_read_stderr(struct subprocess_s *const process, char *const buffer, + unsigned size); + +/// @brief Returns if the subprocess is currently still alive and executing. +/// @param process The process to check. +/// @return If the process is still alive non-zero is returned. +subprocess_weak int subprocess_alive(struct subprocess_s *const process); + +#if defined(__cplusplus) +#define SUBPROCESS_CAST(type, x) static_cast(x) +#define SUBPROCESS_PTR_CAST(type, x) reinterpret_cast(x) +#define SUBPROCESS_CONST_CAST(type, x) const_cast(x) +#define SUBPROCESS_NULL NULL +#else +#define SUBPROCESS_CAST(type, x) ((type)(x)) +#define SUBPROCESS_PTR_CAST(type, x) ((type)(x)) +#define SUBPROCESS_CONST_CAST(type, x) ((type)(x)) +#define SUBPROCESS_NULL 0 +#endif + +#if !defined(_WIN32) +#include +#include +#include +#include +#include +#include +#endif + +#if defined(_WIN32) + +#if (_MSC_VER < 1920) +#ifdef _WIN64 +typedef __int64 subprocess_intptr_t; +typedef unsigned __int64 subprocess_size_t; +#else +typedef int subprocess_intptr_t; +typedef unsigned int subprocess_size_t; +#endif +#else +#include + +typedef intptr_t subprocess_intptr_t; +typedef size_t subprocess_size_t; +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +typedef struct _PROCESS_INFORMATION *LPPROCESS_INFORMATION; +typedef struct _SECURITY_ATTRIBUTES *LPSECURITY_ATTRIBUTES; +typedef struct _STARTUPINFOA *LPSTARTUPINFOA; +typedef struct _OVERLAPPED *LPOVERLAPPED; + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#ifdef _MSC_VER +#pragma warning(push, 1) +#endif +#ifdef __MINGW32__ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#endif + +struct subprocess_subprocess_information_s { + void *hProcess; + void *hThread; + unsigned long dwProcessId; + unsigned long dwThreadId; +}; + +struct subprocess_security_attributes_s { + unsigned long nLength; + void *lpSecurityDescriptor; + int bInheritHandle; +}; + +struct subprocess_startup_info_s { + unsigned long cb; + char *lpReserved; + char *lpDesktop; + char *lpTitle; + unsigned long dwX; + unsigned long dwY; + unsigned long dwXSize; + unsigned long dwYSize; + unsigned long dwXCountChars; + unsigned long dwYCountChars; + unsigned long dwFillAttribute; + unsigned long dwFlags; + unsigned short wShowWindow; + unsigned short cbReserved2; + unsigned char *lpReserved2; + void *hStdInput; + void *hStdOutput; + void *hStdError; +}; + +struct subprocess_overlapped_s { + uintptr_t Internal; + uintptr_t InternalHigh; + union { + struct { + unsigned long Offset; + unsigned long OffsetHigh; + } DUMMYSTRUCTNAME; + void *Pointer; + } DUMMYUNIONNAME; + + void *hEvent; +}; + +#ifdef __MINGW32__ +#pragma GCC diagnostic pop +#endif +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +__declspec(dllimport) unsigned long __stdcall GetLastError(void); +__declspec(dllimport) int __stdcall SetHandleInformation(void *, unsigned long, + unsigned long); +__declspec(dllimport) int __stdcall CreatePipe(void **, void **, + LPSECURITY_ATTRIBUTES, + unsigned long); +__declspec(dllimport) void *__stdcall CreateNamedPipeA( + const char *, unsigned long, unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, LPSECURITY_ATTRIBUTES); +__declspec(dllimport) int __stdcall ReadFile(void *, void *, unsigned long, + unsigned long *, LPOVERLAPPED); +__declspec(dllimport) unsigned long __stdcall GetCurrentProcessId(void); +__declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void); +__declspec(dllimport) void *__stdcall CreateFileA(const char *, unsigned long, + unsigned long, + LPSECURITY_ATTRIBUTES, + unsigned long, unsigned long, + void *); +__declspec(dllimport) void *__stdcall CreateEventA(LPSECURITY_ATTRIBUTES, int, + int, const char *); +__declspec(dllimport) int __stdcall CreateProcessA( + const char *, char *, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, int, + unsigned long, void *, const char *, LPSTARTUPINFOA, LPPROCESS_INFORMATION); +__declspec(dllimport) int __stdcall CloseHandle(void *); +__declspec(dllimport) unsigned long __stdcall WaitForSingleObject( + void *, unsigned long); +__declspec(dllimport) int __stdcall GetExitCodeProcess( + void *, unsigned long *lpExitCode); +__declspec(dllimport) int __stdcall TerminateProcess(void *, unsigned int); +__declspec(dllimport) unsigned long __stdcall WaitForMultipleObjects( + unsigned long, void *const *, int, unsigned long); +__declspec(dllimport) int __stdcall GetOverlappedResult(void *, LPOVERLAPPED, + unsigned long *, int); + +#if defined(_DLL) +#define SUBPROCESS_DLLIMPORT __declspec(dllimport) +#else +#define SUBPROCESS_DLLIMPORT +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +SUBPROCESS_DLLIMPORT int __cdecl _fileno(FILE *); +SUBPROCESS_DLLIMPORT int __cdecl _open_osfhandle(subprocess_intptr_t, int); +SUBPROCESS_DLLIMPORT subprocess_intptr_t __cdecl _get_osfhandle(int); + +#ifndef __MINGW32__ +void *__cdecl _alloca(subprocess_size_t); +#else +#include +#endif + +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#else +typedef size_t subprocess_size_t; +#endif + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wpadded" +#endif +struct subprocess_s { + FILE *stdin_file; + FILE *stdout_file; + FILE *stderr_file; + +#if defined(_WIN32) + void *hProcess; + void *hStdInput; + void *hEventOutput; + void *hEventError; +#else + pid_t child; + int return_status; +#endif + + subprocess_size_t alive; +}; +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#if defined(__clang__) +#if __has_warning("-Wunsafe-buffer-usage") +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" +#endif +#endif + +#if defined(_WIN32) +subprocess_weak int subprocess_create_named_pipe_helper(void **rd, void **wr); +int subprocess_create_named_pipe_helper(void **rd, void **wr) { + const unsigned long pipeAccessInbound = 0x00000001; + const unsigned long fileFlagOverlapped = 0x40000000; + const unsigned long pipeTypeByte = 0x00000000; + const unsigned long pipeWait = 0x00000000; + const unsigned long genericWrite = 0x40000000; + const unsigned long openExisting = 3; + const unsigned long fileAttributeNormal = 0x00000080; + const void *const invalidHandleValue = + SUBPROCESS_PTR_CAST(void *, ~(SUBPROCESS_CAST(subprocess_intptr_t, 0))); + struct subprocess_security_attributes_s saAttr = {sizeof(saAttr), + SUBPROCESS_NULL, 1}; + char name[256] = {0}; + static subprocess_tls long index = 0; + const long unique = index++; + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#pragma warning(push, 1) +#pragma warning(disable : 4996) + _snprintf(name, sizeof(name) - 1, + "\\\\.\\pipe\\sheredom_subprocess_h.%08lx.%08lx.%ld", + GetCurrentProcessId(), GetCurrentThreadId(), unique); +#pragma warning(pop) +#else + snprintf(name, sizeof(name) - 1, + "\\\\.\\pipe\\sheredom_subprocess_h.%08lx.%08lx.%ld", + GetCurrentProcessId(), GetCurrentThreadId(), unique); +#endif + + *rd = + CreateNamedPipeA(name, pipeAccessInbound | fileFlagOverlapped, + pipeTypeByte | pipeWait, 1, 4096, 4096, SUBPROCESS_NULL, + SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr)); + + if (invalidHandleValue == *rd) { + return -1; + } + + *wr = CreateFileA(name, genericWrite, SUBPROCESS_NULL, + SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), + openExisting, fileAttributeNormal, SUBPROCESS_NULL); + + if (invalidHandleValue == *wr) { + return -1; + } + + return 0; +} +#endif + +int subprocess_create(const char *const commandLine[], int options, + struct subprocess_s *const out_process) { + return subprocess_create_ex(commandLine, options, SUBPROCESS_NULL, + out_process); +} + +int subprocess_create_ex(const char *const commandLine[], int options, + const char *const environment[], + struct subprocess_s *const out_process) { +#if defined(_WIN32) + int fd; + void *rd, *wr; + char *commandLineCombined; + subprocess_size_t len; + int i, j; + int need_quoting; + unsigned long flags = 0; + const unsigned long startFUseStdHandles = 0x00000100; + const unsigned long handleFlagInherit = 0x00000001; + const unsigned long createNoWindow = 0x08000000; + struct subprocess_subprocess_information_s processInfo; + struct subprocess_security_attributes_s saAttr = {sizeof(saAttr), + SUBPROCESS_NULL, 1}; + char *used_environment = SUBPROCESS_NULL; + struct subprocess_startup_info_s startInfo = {0, + SUBPROCESS_NULL, + SUBPROCESS_NULL, + SUBPROCESS_NULL, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + SUBPROCESS_NULL, + SUBPROCESS_NULL, + SUBPROCESS_NULL, + SUBPROCESS_NULL}; + + startInfo.cb = sizeof(startInfo); + startInfo.dwFlags = startFUseStdHandles; + + if (subprocess_option_no_window == (options & subprocess_option_no_window)) { + flags |= createNoWindow; + } + + if (subprocess_option_inherit_environment != + (options & subprocess_option_inherit_environment)) { + if (SUBPROCESS_NULL == environment) { + used_environment = SUBPROCESS_CONST_CAST(char *, "\0\0"); + } else { + // We always end with two null terminators. + len = 2; + + for (i = 0; environment[i]; i++) { + for (j = 0; '\0' != environment[i][j]; j++) { + len++; + } + + // For the null terminator too. + len++; + } + + used_environment = SUBPROCESS_CAST(char *, _alloca(len)); + + // Re-use len for the insertion position + len = 0; + + for (i = 0; environment[i]; i++) { + for (j = 0; '\0' != environment[i][j]; j++) { + used_environment[len++] = environment[i][j]; + } + + used_environment[len++] = '\0'; + } + + // End with the two null terminators. + used_environment[len++] = '\0'; + used_environment[len++] = '\0'; + } + } else { + if (SUBPROCESS_NULL != environment) { + return -1; + } + } + + if (!CreatePipe(&rd, &wr, SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), + 0)) { + return -1; + } + + if (!SetHandleInformation(wr, handleFlagInherit, 0)) { + return -1; + } + + fd = _open_osfhandle(SUBPROCESS_PTR_CAST(subprocess_intptr_t, wr), 0); + + if (-1 != fd) { + out_process->stdin_file = _fdopen(fd, "wb"); + + if (SUBPROCESS_NULL == out_process->stdin_file) { + return -1; + } + } + + startInfo.hStdInput = rd; + + if (options & subprocess_option_enable_async) { + if (subprocess_create_named_pipe_helper(&rd, &wr)) { + return -1; + } + } else { + if (!CreatePipe(&rd, &wr, + SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), 0)) { + return -1; + } + } + + if (!SetHandleInformation(rd, handleFlagInherit, 0)) { + return -1; + } + + fd = _open_osfhandle(SUBPROCESS_PTR_CAST(subprocess_intptr_t, rd), 0); + + if (-1 != fd) { + out_process->stdout_file = _fdopen(fd, "rb"); + + if (SUBPROCESS_NULL == out_process->stdout_file) { + return -1; + } + } + + startInfo.hStdOutput = wr; + + if (subprocess_option_combined_stdout_stderr == + (options & subprocess_option_combined_stdout_stderr)) { + out_process->stderr_file = out_process->stdout_file; + startInfo.hStdError = startInfo.hStdOutput; + } else { + if (options & subprocess_option_enable_async) { + if (subprocess_create_named_pipe_helper(&rd, &wr)) { + return -1; + } + } else { + if (!CreatePipe(&rd, &wr, + SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), 0)) { + return -1; + } + } + + if (!SetHandleInformation(rd, handleFlagInherit, 0)) { + return -1; + } + + fd = _open_osfhandle(SUBPROCESS_PTR_CAST(subprocess_intptr_t, rd), 0); + + if (-1 != fd) { + out_process->stderr_file = _fdopen(fd, "rb"); + + if (SUBPROCESS_NULL == out_process->stderr_file) { + return -1; + } + } + + startInfo.hStdError = wr; + } + + if (options & subprocess_option_enable_async) { + out_process->hEventOutput = + CreateEventA(SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), 1, 1, + SUBPROCESS_NULL); + out_process->hEventError = + CreateEventA(SUBPROCESS_PTR_CAST(LPSECURITY_ATTRIBUTES, &saAttr), 1, 1, + SUBPROCESS_NULL); + } else { + out_process->hEventOutput = SUBPROCESS_NULL; + out_process->hEventError = SUBPROCESS_NULL; + } + + // Combine commandLine together into a single string + len = 0; + for (i = 0; commandLine[i]; i++) { + // for the trailing \0 + len++; + + // Quote the argument if it has a space in it + if (strpbrk(commandLine[i], "\t\v ") != SUBPROCESS_NULL || + commandLine[i][0] == SUBPROCESS_NULL) + len += 2; + + for (j = 0; '\0' != commandLine[i][j]; j++) { + switch (commandLine[i][j]) { + default: + break; + case '\\': + if (commandLine[i][j + 1] == '"') { + len++; + } + + break; + case '"': + len++; + break; + } + len++; + } + } + + commandLineCombined = SUBPROCESS_CAST(char *, _alloca(len)); + + if (!commandLineCombined) { + return -1; + } + + // Gonna re-use len to store the write index into commandLineCombined + len = 0; + + for (i = 0; commandLine[i]; i++) { + if (0 != i) { + commandLineCombined[len++] = ' '; + } + + need_quoting = strpbrk(commandLine[i], "\t\v ") != SUBPROCESS_NULL || + commandLine[i][0] == SUBPROCESS_NULL; + if (need_quoting) { + commandLineCombined[len++] = '"'; + } + + for (j = 0; '\0' != commandLine[i][j]; j++) { + switch (commandLine[i][j]) { + default: + break; + case '\\': + if (commandLine[i][j + 1] == '"') { + commandLineCombined[len++] = '\\'; + } + + break; + case '"': + commandLineCombined[len++] = '\\'; + break; + } + + commandLineCombined[len++] = commandLine[i][j]; + } + if (need_quoting) { + commandLineCombined[len++] = '"'; + } + } + + commandLineCombined[len] = '\0'; + + if (!CreateProcessA( + SUBPROCESS_NULL, + commandLineCombined, // command line + SUBPROCESS_NULL, // process security attributes + SUBPROCESS_NULL, // primary thread security attributes + 1, // handles are inherited + flags, // creation flags + used_environment, // used environment + SUBPROCESS_NULL, // use parent's current directory + SUBPROCESS_PTR_CAST(LPSTARTUPINFOA, + &startInfo), // STARTUPINFO pointer + SUBPROCESS_PTR_CAST(LPPROCESS_INFORMATION, &processInfo))) { + return -1; + } + + out_process->hProcess = processInfo.hProcess; + + out_process->hStdInput = startInfo.hStdInput; + + // We don't need the handle of the primary thread in the called process. + CloseHandle(processInfo.hThread); + + if (SUBPROCESS_NULL != startInfo.hStdOutput) { + CloseHandle(startInfo.hStdOutput); + + if (startInfo.hStdError != startInfo.hStdOutput) { + CloseHandle(startInfo.hStdError); + } + } + + out_process->alive = 1; + + return 0; +#else + int stdinfd[2]; + int stdoutfd[2]; + int stderrfd[2]; + pid_t child; + extern char **environ; + char *const empty_environment[1] = {SUBPROCESS_NULL}; + posix_spawn_file_actions_t actions; + char *const *used_environment; + + if (subprocess_option_inherit_environment == + (options & subprocess_option_inherit_environment)) { + if (SUBPROCESS_NULL != environment) { + return -1; + } + } + + if (0 != pipe(stdinfd)) { + return -1; + } + + if (0 != pipe(stdoutfd)) { + return -1; + } + + if (subprocess_option_combined_stdout_stderr != + (options & subprocess_option_combined_stdout_stderr)) { + if (0 != pipe(stderrfd)) { + return -1; + } + } + + if (environment) { +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" +#pragma clang diagnostic ignored "-Wold-style-cast" +#endif + used_environment = SUBPROCESS_CONST_CAST(char *const *, environment); +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + } else if (subprocess_option_inherit_environment == + (options & subprocess_option_inherit_environment)) { + used_environment = environ; + } else { + used_environment = empty_environment; + } + + if (0 != posix_spawn_file_actions_init(&actions)) { + return -1; + } + + // Close the stdin write end + if (0 != posix_spawn_file_actions_addclose(&actions, stdinfd[1])) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + + // Map the read end to stdin + if (0 != + posix_spawn_file_actions_adddup2(&actions, stdinfd[0], STDIN_FILENO)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + + // Close the stdout read end + if (0 != posix_spawn_file_actions_addclose(&actions, stdoutfd[0])) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + + // Map the write end to stdout + if (0 != + posix_spawn_file_actions_adddup2(&actions, stdoutfd[1], STDOUT_FILENO)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + + if (subprocess_option_combined_stdout_stderr == + (options & subprocess_option_combined_stdout_stderr)) { + if (0 != posix_spawn_file_actions_adddup2(&actions, STDOUT_FILENO, + STDERR_FILENO)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + } else { + // Close the stderr read end + if (0 != posix_spawn_file_actions_addclose(&actions, stderrfd[0])) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + // Map the write end to stdout + if (0 != posix_spawn_file_actions_adddup2(&actions, stderrfd[1], + STDERR_FILENO)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + } + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" +#pragma clang diagnostic ignored "-Wold-style-cast" +#endif + if (subprocess_option_search_user_path == + (options & subprocess_option_search_user_path)) { + if (0 != posix_spawnp(&child, commandLine[0], &actions, SUBPROCESS_NULL, + SUBPROCESS_CONST_CAST(char *const *, commandLine), + used_environment)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + } else { + if (0 != posix_spawn(&child, commandLine[0], &actions, SUBPROCESS_NULL, + SUBPROCESS_CONST_CAST(char *const *, commandLine), + used_environment)) { + posix_spawn_file_actions_destroy(&actions); + return -1; + } + } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + + // Close the stdin read end + close(stdinfd[0]); + // Store the stdin write end + out_process->stdin_file = fdopen(stdinfd[1], "wb"); + + // Close the stdout write end + close(stdoutfd[1]); + // Store the stdout read end + out_process->stdout_file = fdopen(stdoutfd[0], "rb"); + + if (subprocess_option_combined_stdout_stderr == + (options & subprocess_option_combined_stdout_stderr)) { + out_process->stderr_file = out_process->stdout_file; + } else { + // Close the stderr write end + close(stderrfd[1]); + // Store the stderr read end + out_process->stderr_file = fdopen(stderrfd[0], "rb"); + } + + // Store the child's pid + out_process->child = child; + + out_process->alive = 1; + + posix_spawn_file_actions_destroy(&actions); + return 0; +#endif +} + +FILE *subprocess_stdin(const struct subprocess_s *const process) { + return process->stdin_file; +} + +FILE *subprocess_stdout(const struct subprocess_s *const process) { + return process->stdout_file; +} + +FILE *subprocess_stderr(const struct subprocess_s *const process) { + if (process->stdout_file != process->stderr_file) { + return process->stderr_file; + } else { + return SUBPROCESS_NULL; + } +} + +int subprocess_join(struct subprocess_s *const process, + int *const out_return_code) { +#if defined(_WIN32) + const unsigned long infinite = 0xFFFFFFFF; + + if (process->stdin_file) { + fclose(process->stdin_file); + process->stdin_file = SUBPROCESS_NULL; + } + + if (process->hStdInput) { + CloseHandle(process->hStdInput); + process->hStdInput = SUBPROCESS_NULL; + } + + WaitForSingleObject(process->hProcess, infinite); + + if (out_return_code) { + if (!GetExitCodeProcess( + process->hProcess, + SUBPROCESS_PTR_CAST(unsigned long *, out_return_code))) { + return -1; + } + } + + process->alive = 0; + + return 0; +#else + int status; + + if (process->stdin_file) { + fclose(process->stdin_file); + process->stdin_file = SUBPROCESS_NULL; + } + + if (process->child) { + if (process->child != waitpid(process->child, &status, 0)) { + return -1; + } + + process->child = 0; + + if (WIFEXITED(status)) { + process->return_status = WEXITSTATUS(status); + } else { + process->return_status = EXIT_FAILURE; + } + + process->alive = 0; + } + + if (out_return_code) { + *out_return_code = process->return_status; + } + + return 0; +#endif +} + +int subprocess_destroy(struct subprocess_s *const process) { + if (process->stdin_file) { + fclose(process->stdin_file); + process->stdin_file = SUBPROCESS_NULL; + } + + if (process->stdout_file) { + fclose(process->stdout_file); + + if (process->stdout_file != process->stderr_file) { + fclose(process->stderr_file); + } + + process->stdout_file = SUBPROCESS_NULL; + process->stderr_file = SUBPROCESS_NULL; + } + +#if defined(_WIN32) + if (process->hProcess) { + CloseHandle(process->hProcess); + process->hProcess = SUBPROCESS_NULL; + + if (process->hStdInput) { + CloseHandle(process->hStdInput); + } + + if (process->hEventOutput) { + CloseHandle(process->hEventOutput); + } + + if (process->hEventError) { + CloseHandle(process->hEventError); + } + } +#endif + + return 0; +} + +int subprocess_terminate(struct subprocess_s *const process) { +#if defined(_WIN32) + unsigned int killed_process_exit_code; + int success_terminate; + int windows_call_result; + + killed_process_exit_code = 99; + windows_call_result = + TerminateProcess(process->hProcess, killed_process_exit_code); + success_terminate = (windows_call_result == 0) ? 1 : 0; + return success_terminate; +#else + int result; + result = kill(process->child, 9); + return result; +#endif +} + +unsigned subprocess_read_stdout(struct subprocess_s *const process, + char *const buffer, unsigned size) { +#if defined(_WIN32) + void *handle; + unsigned long bytes_read = 0; + struct subprocess_overlapped_s overlapped = {0, 0, {{0, 0}}, SUBPROCESS_NULL}; + overlapped.hEvent = process->hEventOutput; + + handle = SUBPROCESS_PTR_CAST(void *, + _get_osfhandle(_fileno(process->stdout_file))); + + if (!ReadFile(handle, buffer, size, &bytes_read, + SUBPROCESS_PTR_CAST(LPOVERLAPPED, &overlapped))) { + const unsigned long errorIoPending = 997; + unsigned long error = GetLastError(); + + // Means we've got an async read! + if (error == errorIoPending) { + if (!GetOverlappedResult(handle, + SUBPROCESS_PTR_CAST(LPOVERLAPPED, &overlapped), + &bytes_read, 1)) { + const unsigned long errorIoIncomplete = 996; + const unsigned long errorHandleEOF = 38; + error = GetLastError(); + + if ((error != errorIoIncomplete) && (error != errorHandleEOF)) { + return 0; + } + } + } + } + + return SUBPROCESS_CAST(unsigned, bytes_read); +#else + const int fd = fileno(process->stdout_file); + const ssize_t bytes_read = read(fd, buffer, size); + + if (bytes_read < 0) { + return 0; + } + + return SUBPROCESS_CAST(unsigned, bytes_read); +#endif +} + +unsigned subprocess_read_stderr(struct subprocess_s *const process, + char *const buffer, unsigned size) { +#if defined(_WIN32) + void *handle; + unsigned long bytes_read = 0; + struct subprocess_overlapped_s overlapped = {0, 0, {{0, 0}}, SUBPROCESS_NULL}; + overlapped.hEvent = process->hEventError; + + handle = SUBPROCESS_PTR_CAST(void *, + _get_osfhandle(_fileno(process->stderr_file))); + + if (!ReadFile(handle, buffer, size, &bytes_read, + SUBPROCESS_PTR_CAST(LPOVERLAPPED, &overlapped))) { + const unsigned long errorIoPending = 997; + unsigned long error = GetLastError(); + + // Means we've got an async read! + if (error == errorIoPending) { + if (!GetOverlappedResult(handle, + SUBPROCESS_PTR_CAST(LPOVERLAPPED, &overlapped), + &bytes_read, 1)) { + const unsigned long errorIoIncomplete = 996; + const unsigned long errorHandleEOF = 38; + error = GetLastError(); + + if ((error != errorIoIncomplete) && (error != errorHandleEOF)) { + return 0; + } + } + } + } + + return SUBPROCESS_CAST(unsigned, bytes_read); +#else + const int fd = fileno(process->stderr_file); + const ssize_t bytes_read = read(fd, buffer, size); + + if (bytes_read < 0) { + return 0; + } + + return SUBPROCESS_CAST(unsigned, bytes_read); +#endif +} + +int subprocess_alive(struct subprocess_s *const process) { + int is_alive = SUBPROCESS_CAST(int, process->alive); + + if (!is_alive) { + return 0; + } +#if defined(_WIN32) + { + const unsigned long zero = 0x0; + const unsigned long wait_object_0 = 0x00000000L; + + is_alive = wait_object_0 != WaitForSingleObject(process->hProcess, zero); + } +#else + { + int status; + is_alive = 0 == waitpid(process->child, &status, WNOHANG); + + // If the process was successfully waited on we need to cleanup now. + if (!is_alive) { + if (WIFEXITED(status)) { + process->return_status = WEXITSTATUS(status); + } else { + process->return_status = EXIT_FAILURE; + } + + // Since we've already successfully waited on the process, we need to wipe + // the child now. + process->child = 0; + + if (subprocess_join(process, SUBPROCESS_NULL)) { + return -1; + } + } + } +#endif + + if (!is_alive) { + process->alive = 0; + } + + return is_alive; +} + +#if defined(__clang__) +#if __has_warning("-Wunsafe-buffer-usage") +#pragma clang diagnostic pop +#endif +#endif + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif /* SHEREDOM_SUBPROCESS_H_INCLUDED */