Skip to content

Commit dd8a905

Browse files
author
firecoperana
committed
Add new webui
1 parent 7e80904 commit dd8a905

File tree

10 files changed

+1345
-33
lines changed

10 files changed

+1345
-33
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,4 @@ poetry.toml
130130

131131
# Scripts
132132
!/scripts/install-oneapi.bat
133+
/examples/server/webui_llamacpp/.gitignore

common/common.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,20 @@ int32_t cpu_get_num_math() {
201201
return cpu_get_num_physical_cores();
202202
}
203203

204+
common_webui common_webui_from_name(const std::string& format) {
205+
if (format == "none") {
206+
return COMMON_WEBUI_NONE;
207+
}
208+
else if (format == "auto") {
209+
return COMMON_WEBUI_AUTO;
210+
}
211+
else if (format == "llamacpp") {
212+
return COMMON_WEBUI_LLAMACPP;
213+
}
214+
else {
215+
return COMMON_WEBUI_AUTO;
216+
}
217+
}
204218

205219
static std::string read_file(const std::string& fname) {
206220
std::ifstream file(fname);
@@ -1401,6 +1415,11 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
14011415
params.public_path = argv[i];
14021416
return true;
14031417
}
1418+
if (arg == "--webui") {
1419+
CHECK_ARG
1420+
params.webui = common_webui_from_name(std::string(argv[i]));
1421+
return true;
1422+
}
14041423
if (arg == "--api-key") {
14051424
CHECK_ARG
14061425
params.api_keys.push_back(argv[i]);
@@ -2028,6 +2047,12 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
20282047
options.push_back({ "server", " --port PORT", "port to listen (default: %d)", params.port });
20292048
options.push_back({ "server", " --path PATH", "path to serve static files from (default: %s)", params.public_path.c_str() });
20302049
options.push_back({ "server", " --embedding(s)", "restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled" });
2050+
options.push_back({ "server", " --webui NAME",
2051+
"controls which webui to server:\n"
2052+
"- none: disable webui\n"
2053+
"- auto: default webui \n"
2054+
"- llamacpp: llamacpp webui \n"
2055+
"(default: auto)", });
20312056
options.push_back({ "server", " --api-key KEY", "API key to use for authentication (default: none)" });
20322057
options.push_back({ "server", " --api-key-file FNAME", "path to file containing API keys (default: none)" });
20332058
options.push_back({ "server", " --ssl-key-file FNAME", "path to file a PEM-encoded SSL private key" });

common/common.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ enum common_reasoning_format {
8686
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
8787
};
8888

89+
enum common_webui {
90+
COMMON_WEBUI_NONE,
91+
COMMON_WEBUI_AUTO,
92+
COMMON_WEBUI_LLAMACPP,
93+
};
94+
95+
common_webui common_webui_from_name(const std::string& format);
96+
8997
struct gpt_params {
9098
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
9199

@@ -265,8 +273,8 @@ struct gpt_params {
265273
std::map<std::string, std::string> default_template_kwargs;
266274

267275
// "advanced" endpoints are disabled by default for better security
268-
bool webui = true;
269-
bool endpoint_slots = false;
276+
common_webui webui = COMMON_WEBUI_AUTO;
277+
bool endpoint_slots = true;
270278
bool endpoint_props = false; // only control POST requests, not GET
271279
bool endpoint_metrics = false;
272280

examples/server/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ set(TARGET_SRCS
1717
)
1818
set(PUBLIC_ASSETS
1919
index.html.gz
20-
loading.html
20+
2121
)
2222

2323
foreach(asset ${PUBLIC_ASSETS})
@@ -29,14 +29,15 @@ foreach(asset ${PUBLIC_ASSETS})
2929
OUTPUT "${output}"
3030
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
3131
)
32+
message("TARGET_SRCS contains: ${input}")
3233
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
3334

3435
endforeach()
3536

36-
3737
# include new llamacpp webui
3838
set(ALT_PUBLIC_ASSETS
3939
index_llamacpp.html.gz
40+
loading.html
4041
)
4142

4243
foreach(asset ${ALT_PUBLIC_ASSETS})
@@ -48,6 +49,7 @@ foreach(asset ${ALT_PUBLIC_ASSETS})
4849
OUTPUT "${output}"
4950
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
5051
)
52+
message("TARGET_SRCS contains: ${input}")
5153
set_source_files_properties(${output} PROPERTIES GENERATED TRUE)
5254

5355
endforeach()
Lines changed: 1 addition & 0 deletions
Loading

examples/server/public_llamacpp/index_llamacpp.html

Lines changed: 1212 additions & 0 deletions
Large diffs are not rendered by default.
823 KB
Binary file not shown.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta http-equiv="refresh" content="5">
5+
</head>
6+
<body>
7+
<div id="loading">
8+
The model is loading. Please wait.<br/>
9+
The user interface will appear soon.
10+
</div>
11+
</body>
12+
</html>

examples/server/server.cpp

Lines changed: 77 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#define JSON_ASSERT GGML_ASSERT
2424
#include "json.hpp"
2525
#include "index.html.gz.hpp"
26+
#include "index_llamacpp.html.gz.hpp"
2627
#include "loading.html.hpp"
2728

2829
#include <atomic>
@@ -4141,6 +4142,7 @@ int main(int argc, char ** argv) {
41414142
{ "chat_template", common_chat_templates_source(ctx_server.chat_templates.get()) },
41424143
{ "bos_token", llama_token_to_piece(ctx_server.ctx, llama_token_bos(ctx_server.model), /* special= */ true)},
41434144
{ "eos_token", llama_token_to_piece(ctx_server.ctx, llama_token_eos(ctx_server.model), /* special= */ true)},
4145+
{ "model_path", ctx_server.params.model },
41444146
{ "n_ctx", ctx_server.n_ctx }
41454147

41464148
};
@@ -4998,38 +5000,51 @@ int main(int argc, char ** argv) {
49985000
//
49995001
// Router
50005002
//
5001-
5002-
// register static assets routes
5003-
if (!params.public_path.empty()) {
5004-
// Set the base directory for serving static files
5005-
svr->set_base_dir(params.public_path);
5003+
if (params.webui == COMMON_WEBUI_NONE) {
5004+
LLAMA_LOG_INFO("Web UI is disabled\n");
50065005
}
5007-
5008-
{
5006+
else {
50095007
// register static assets routes
50105008
if (!params.public_path.empty()) {
50115009
// Set the base directory for serving static files
5012-
bool is_found = svr->set_mount_point("/", params.public_path);
5013-
if (!is_found) {
5014-
GGML_ABORT("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
5015-
return 1;
5016-
}
5010+
svr->set_base_dir(params.public_path);
50175011
}
5018-
else {
5019-
// using embedded static index.html
5020-
svr->Get("/", [](const httplib::Request& req, httplib::Response& res) {
5021-
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
5022-
res.set_content("Error: gzip is not supported by this browser", "text/plain");
5023-
}
5024-
else {
5025-
res.set_header("Content-Encoding", "gzip");
5026-
// COEP and COOP headers, required by pyodide (python interpreter)
5027-
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
5028-
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
5029-
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
5012+
5013+
{
5014+
// register static assets routes
5015+
if (!params.public_path.empty()) {
5016+
// Set the base directory for serving static files
5017+
bool is_found = svr->set_mount_point("/", params.public_path);
5018+
if (!is_found) {
5019+
GGML_ABORT("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
5020+
return 1;
50305021
}
5031-
return false;
5032-
});
5022+
}
5023+
else {
5024+
5025+
// using embedded static index.html
5026+
svr->Get("/", [params](const httplib::Request& req, httplib::Response& res) {
5027+
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
5028+
res.set_content("Error: gzip is not supported by this browser", "text/plain");
5029+
}
5030+
else {
5031+
res.set_header("Content-Encoding", "gzip");
5032+
// COEP and COOP headers, required by pyodide (python interpreter)
5033+
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
5034+
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
5035+
if (params.webui == COMMON_WEBUI_AUTO) {
5036+
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
5037+
}
5038+
else if (params.webui == COMMON_WEBUI_LLAMACPP) {
5039+
res.set_content(reinterpret_cast<const char*>(index_llamacpp_html_gz), index_llamacpp_html_gz_len, "text/html; charset=utf-8");
5040+
}
5041+
else {
5042+
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
5043+
}
5044+
}
5045+
return false;
5046+
});
5047+
}
50335048
}
50345049
}
50355050
// register API routes
@@ -5062,6 +5077,42 @@ int main(int argc, char ** argv) {
50625077
svr->Post("/rename_prompt", rename_saved_prompt);
50635078

50645079
}
5080+
// SPA fallback route - serve index.html for any route that doesn't match API endpoints
5081+
// This enables client-side routing for dynamic routes like /chat/[id]
5082+
if (params.webui && params.public_path.empty()) {
5083+
// Only add fallback when using embedded static files
5084+
svr->Get(".*", [](const httplib::Request& req, httplib::Response& res) {
5085+
// Skip API routes - they should have been handled above
5086+
if (req.path.find("/v1/") != std::string::npos ||
5087+
req.path.find("/health") != std::string::npos ||
5088+
req.path.find("/metrics") != std::string::npos ||
5089+
req.path.find("/props") != std::string::npos ||
5090+
req.path.find("/models") != std::string::npos ||
5091+
req.path.find("/api/tags") != std::string::npos ||
5092+
req.path.find("/completions") != std::string::npos ||
5093+
req.path.find("/chat/completions") != std::string::npos ||
5094+
req.path.find("/embeddings") != std::string::npos ||
5095+
req.path.find("/tokenize") != std::string::npos ||
5096+
req.path.find("/detokenize") != std::string::npos ||
5097+
req.path.find("/lora-adapters") != std::string::npos ||
5098+
req.path.find("/slots") != std::string::npos) {
5099+
return false; // Let other handlers process API routes
5100+
}
5101+
5102+
// Serve index.html for all other routes (SPA fallback)
5103+
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
5104+
res.set_content("Error: gzip is not supported by this browser", "text/plain");
5105+
}
5106+
else {
5107+
res.set_header("Content-Encoding", "gzip");
5108+
// COEP and COOP headers, required by pyodide (python interpreter)
5109+
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
5110+
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
5111+
res.set_content(reinterpret_cast<const char*>(index_html_gz), index_html_gz_len, "text/html; charset=utf-8");
5112+
}
5113+
return false;
5114+
});
5115+
}
50655116
svr->Get ("/version", handle_version);
50665117
if (!params.sql_save_file.empty()) {
50675118
// these endpoints rely on sql_save_file existing

examples/server/webui_llamacpp/vite.config.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ function llamaCppBuildPlugin() {
2626
// Ensure the SvelteKit adapter has finished writing to ../public
2727
setTimeout(() => {
2828
try {
29-
const indexPath = resolve('../public/index.html');
30-
const gzipPath = resolve('../public/index.html.gz');
29+
const indexPath = resolve('../public_llamacpp/index_llamacpp.html');
30+
const gzipPath = resolve('../public_llamacpp/index_llamacpp.html.gz');
3131

3232
if (!existsSync(indexPath)) {
3333
return;
@@ -65,7 +65,7 @@ function llamaCppBuildPlugin() {
6565
}
6666

6767
writeFileSync(gzipPath, compressed);
68-
console.log('✓ Created index.html.gz');
68+
console.log('✓ Created index_llamacpp.html.gz');
6969
} catch (error) {
7070
console.error('Failed to create gzip file:', error);
7171
}

0 commit comments

Comments
 (0)