common : add common_speculative_is_compat() (#19270)
* llama : add llama_memory_can_rm_suffix() * Revert "llama : add llama_memory_can_rm_suffix()" This reverts commit d30e59b62a15ef4266a6503e3f4eba770aec001b. * spec : check if the target context is compatible for spec decoding
This commit is contained in:
parent
06bf3796f4
commit
dfde5993ea
3 changed files with 46 additions and 1 deletions
|
|
@ -740,6 +740,11 @@ private:
|
|||
|
||||
slots.clear();
|
||||
|
||||
const bool can_spec = common_speculative_is_compat(ctx);
|
||||
if (!can_spec) {
|
||||
SRV_WRN("%s", "speculative decoding not supported by this context\n");
|
||||
}
|
||||
|
||||
// initialize slots
|
||||
for (int i = 0; i < params_base.n_parallel; i++) {
|
||||
server_slot slot;
|
||||
|
|
@ -752,7 +757,7 @@ private:
|
|||
slot.prompt.tokens.has_mtmd = mctx != nullptr;
|
||||
|
||||
// try speculative decoding
|
||||
{
|
||||
if (can_spec) {
|
||||
slot.spec = common_speculative_init(params_base.speculative, slot.ctx);
|
||||
if (slot.spec) {
|
||||
if (mctx) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue