* llama-server: recursive GGUF loading Replace flat directory scan with recursive traversal using std::filesystem::recursive_directory_iterator. Support for nested vendor/model layouts (e.g. vendor/model/*.gguf). Model name now reflects the relative path within --models-dir instead of just the filename. Aggregate files by parent directory via std::map before constructing local_model * server : router config POC (INI-based per-model settings) * server: address review feedback from @aldehir and @ngxson PEG parser usage improvements: - Simplify parser instantiation (remove arena indirection) - Optimize grammar usage (ws instead of zero_or_more, remove optional wrapping) - Fix last line without newline bug (+ operator instead of <<) - Remove redundant end position check Feature scope: - Remove auto-reload feature (will be separate PR per @ngxson) - Keep config.ini auto-creation and template generation - Preserve per-model customization logic Co-authored-by: aldehir <aldehir@users.noreply.github.com> Co-authored-by: ngxson <ngxson@users.noreply.github.com> * server: adopt aldehir's line-oriented PEG parser Complete rewrite of INI parser grammar and visitor: - Use p.chars(), p.negate(), p.any() instead of p.until() - Support end-of-line comments (key=value # comment) - Handle EOF without trailing newline correctly - Strict identifier validation ([a-zA-Z_][a-zA-Z0-9_.-]*) - Simplified visitor (no pending state, no trim needed) - Grammar handles whitespace natively via eol rule Business validation preserved: - Reject section names starting with LLAMA_ARG_* - Accept only keys starting with LLAMA_ARG_* - Require explicit section before key-value pairs Co-authored-by: aldehir <aldehir@users.noreply.github.com> * server: fix CLI/env duplication in child processes Children now receive minimal CLI args (executable, model, port, alias) instead of inheriting all router args. Global settings pass through LLAMA_ARG_* environment variables only, eliminating duplicate config warnings. Fixes: Router args like -ngl, -fa were passed both via CLI and env, causing 'will be overwritten' warnings on every child spawn * add common/preset.cpp * fix compile * cont * allow custom-path models * add falsey check * server: fix router model discovery and child process spawning - Sanitize model names: replace / and \ with _ for display - Recursive directory scan with relative path storage - Convert relative paths to absolute when spawning children - Filter router control args from child processes - Refresh args after port assignment for correct port value - Fallback preset lookup for compatibility - Fix missing argv[0]: store server binary path before base_args parsing * Revert "server: fix router model discovery and child process spawning" This reverts commit e3832b42eeea7fcb108995966c7584479f745857. * clarify about "no-" prefix * correct render_args() to include binary path * also remove arg LLAMA_ARG_MODELS_PRESET for child * add co-author for ini parser code Co-authored-by: aldehir <hello@alde.dev> * also set LLAMA_ARG_HOST * add CHILD_ADDR * Remove dead code --------- Co-authored-by: aldehir <aldehir@users.noreply.github.com> Co-authored-by: ngxson <ngxson@users.noreply.github.com> Co-authored-by: Xuan Son Nguyen <son@huggingface.co> Co-authored-by: aldehir <hello@alde.dev>
116 lines
4.5 KiB
C++
116 lines
4.5 KiB
C++
#pragma once
|
|
|
|
#include "common.h"
|
|
|
|
#include <set>
|
|
#include <map>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <cstring>
|
|
|
|
//
|
|
// CLI argument parsing
|
|
//
|
|
|
|
struct common_arg {
|
|
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
|
std::set<enum llama_example> excludes = {};
|
|
std::vector<const char *> args;
|
|
const char * value_hint = nullptr; // help text or example for arg value
|
|
const char * value_hint_2 = nullptr; // for second arg value
|
|
const char * env = nullptr;
|
|
std::string help;
|
|
bool is_sparam = false; // is current arg a sampling param?
|
|
void (*handler_void) (common_params & params) = nullptr;
|
|
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
|
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
|
void (*handler_int) (common_params & params, int) = nullptr;
|
|
|
|
common_arg() = default;
|
|
|
|
common_arg(
|
|
const std::initializer_list<const char *> & args,
|
|
const char * value_hint,
|
|
const std::string & help,
|
|
void (*handler)(common_params & params, const std::string &)
|
|
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
|
|
|
common_arg(
|
|
const std::initializer_list<const char *> & args,
|
|
const char * value_hint,
|
|
const std::string & help,
|
|
void (*handler)(common_params & params, int)
|
|
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
|
|
|
common_arg(
|
|
const std::initializer_list<const char *> & args,
|
|
const std::string & help,
|
|
void (*handler)(common_params & params)
|
|
) : args(args), help(help), handler_void(handler) {}
|
|
|
|
// support 2 values for arg
|
|
common_arg(
|
|
const std::initializer_list<const char *> & args,
|
|
const char * value_hint,
|
|
const char * value_hint_2,
|
|
const std::string & help,
|
|
void (*handler)(common_params & params, const std::string &, const std::string &)
|
|
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
|
|
|
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
|
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
|
common_arg & set_env(const char * env);
|
|
common_arg & set_sparam();
|
|
bool in_example(enum llama_example ex);
|
|
bool is_exclude(enum llama_example ex);
|
|
bool get_value_from_env(std::string & output) const;
|
|
bool has_value_from_env() const;
|
|
std::string to_string() const;
|
|
|
|
// for using as key in std::map
|
|
bool operator<(const common_arg& other) const {
|
|
if (args.empty() || other.args.empty()) {
|
|
return false;
|
|
}
|
|
return strcmp(args[0], other.args[0]) < 0;
|
|
}
|
|
bool operator==(const common_arg& other) const {
|
|
if (args.empty() || other.args.empty()) {
|
|
return false;
|
|
}
|
|
return strcmp(args[0], other.args[0]) == 0;
|
|
}
|
|
};
|
|
|
|
namespace common_arg_utils {
|
|
bool is_truthy(const std::string & value);
|
|
bool is_falsey(const std::string & value);
|
|
bool is_autoy(const std::string & value);
|
|
}
|
|
|
|
struct common_params_context {
|
|
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
|
common_params & params;
|
|
std::vector<common_arg> options;
|
|
void(*print_usage)(int, char **) = nullptr;
|
|
common_params_context(common_params & params) : params(params) {}
|
|
};
|
|
|
|
// parse input arguments from CLI
|
|
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
|
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
|
|
// parse input arguments from CLI into a map
|
|
// TODO: support repeated args in the future
|
|
bool common_params_parse(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map);
|
|
|
|
// initialize argument parser context - used by test-arg-parser and preset
|
|
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
|
|
struct common_remote_params {
|
|
std::vector<std::string> headers;
|
|
long timeout = 0; // CURLOPT_TIMEOUT, in seconds ; 0 means no timeout
|
|
long max_size = 0; // max size of the response ; unlimited if 0 ; max is 2GB
|
|
};
|
|
// get remote file content, returns <http_code, raw_response_body>
|
|
std::pair<long, std::vector<char>> common_remote_get_content(const std::string & url, const common_remote_params & params);
|