Skip to content

Commit 3de65f8

Browse files
committed
cont
1 parent 65528f6 commit 3de65f8

File tree

4 files changed

+24
-9
lines changed

4 files changed

+24
-9
lines changed

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2600,6 +2600,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26002600
params.models_dir = value;
26012601
}
26022602
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
2603+
add_opt(common_arg(
2604+
{"--models-preset"}, "PATH",
2605+
"path to INI file containing model presets for the router server (default: disabled)",
2606+
[](common_params & params, const std::string & value) {
2607+
params.models_preset = value;
2608+
}
2609+
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_PRESET"));
26032610
add_opt(common_arg(
26042611
{"--models-max"}, "N",
26052612
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),

common/common.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,10 @@ struct common_params {
482482
bool endpoint_metrics = false;
483483

484484
// router server configs
485-
std::string models_dir = ""; // directory containing models for the router server
486-
int models_max = 4; // maximum number of models to load simultaneously
487-
bool models_autoload = true; // automatically load models when requested via the router server
485+
std::string models_dir = ""; // directory containing models for the router server
486+
std::string models_preset = ""; // directory containing model presets for the router server
487+
int models_max = 4; // maximum number of models to load simultaneously
488+
bool models_autoload = true; // automatically load models when requested via the router server
488489

489490
bool log_json = false;
490491

common/preset.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ static std::map<std::string, std::map<std::string, std::string>> parse_ini_from_
5454
std::map<std::string, std::map<std::string, std::string>> parsed;
5555

5656
if (!std::filesystem::exists(path)) {
57-
return parsed; // return empty if file does not exist (expected behavior)
57+
throw std::runtime_error("preset file does not exist: " + path);
5858
}
5959

6060
std::ifstream file(path);
6161
if (!file.good()) {
62-
throw std::runtime_error("failed to open server config file: " + path);
62+
throw std::runtime_error("failed to open server preset file: " + path);
6363
}
6464

6565
std::string contents((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());

tools/server/server-models.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,9 @@ static std::vector<local_model> list_local_models(const std::string & dir) {
138138
//
139139

140140

141-
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & models_dir)
141+
server_presets::server_presets(int argc, char ** argv, common_params & base_params, const std::string & presets_path)
142142
: ctx_params(common_params_parser_init(base_params, LLAMA_EXAMPLE_SERVER)) {
143-
if (!models_dir.empty()) {
144-
auto presets_path = models_dir + DIRECTORY_SEPARATOR + "presets.ini";
143+
if (!presets_path.empty()) {
145144
presets = common_presets_load(presets_path, ctx_params);
146145
SRV_INF("Loaded %zu presets from %s\n", presets.size(), presets_path.c_str());
147146
}
@@ -167,6 +166,14 @@ server_presets::server_presets(int argc, char ** argv, common_params & base_para
167166
control_args[env] = opt;
168167
}
169168
}
169+
170+
// remove any router-controlled args from base_args
171+
for (const auto & cargs : control_args) {
172+
auto it = base_args.find(cargs.second);
173+
if (it != base_args.end()) {
174+
base_args.erase(it);
175+
}
176+
}
170177
}
171178

172179
common_preset server_presets::get_preset(const std::string & name) {
@@ -209,7 +216,7 @@ server_models::server_models(
209216
const common_params & params,
210217
int argc,
211218
char ** argv,
212-
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_dir) {
219+
char ** envp) : base_params(params), presets(argc, argv, base_params, params.models_preset) {
213220
for (int i = 0; i < argc; i++) {
214221
base_args.push_back(std::string(argv[i]));
215222
}

0 commit comments

Comments
 (0)