server : use different seeds for child completions (#18700)
* server : use different seeds for child completions * cont : handle default seed * cont : note
This commit is contained in:
parent
8ece3836b4
commit
f5f8812f7c
4 changed files with 12 additions and 8 deletions
|
|
@ -4,7 +4,6 @@
|
|||
#include "server-task.h"
|
||||
#include "server-queue.h"
|
||||
|
||||
#include "arg.h"
|
||||
#include "common.h"
|
||||
#include "llama.h"
|
||||
#include "log.h"
|
||||
|
|
@ -16,7 +15,6 @@
|
|||
#include <cstddef>
|
||||
#include <cinttypes>
|
||||
#include <memory>
|
||||
#include <unordered_set>
|
||||
#include <filesystem>
|
||||
|
||||
// fix problem with std::min and std::max
|
||||
|
|
@ -2927,9 +2925,14 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
|
|||
if (task.params.n_cmpl > 1) {
|
||||
task.n_children = task.params.n_cmpl - 1;
|
||||
for (size_t j = 0; j < task.n_children; j++) {
|
||||
server_task child = task.create_child(
|
||||
task.id,
|
||||
rd.get_new_id());
|
||||
server_task child = task.create_child(task.id, rd.get_new_id());
|
||||
|
||||
// use different sampling seed for each child
|
||||
// note: https://github.com/ggml-org/llama.cpp/pull/18700#discussion_r2675115723
|
||||
if (child.params.sampling.seed != LLAMA_DEFAULT_SEED) {
|
||||
child.params.sampling.seed += j + 1;
|
||||
}
|
||||
|
||||
tasks.push_back(std::move(child));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue