model : add Jina Embeddings v5 Nano (partial EuroBERT) support (#19826)

* WIP: Add EuroBERT support with autoformatting changes This commit includes: - EuroBERT model implementation for GGUF conversion - C++ backend support for EuroBERT architecture - Unintended autoformatting changes to Python files Saving before reverting formatting-only changes. * feat: add back eos assert when not last token pooling * feat: removed duplicated code and cleanup * feat: removed not working architectures and unnecessary check * fix: typo * fix: dynamic pooling config * feat: added an example model for eurobert * feat: proper llama-vocab implementation for jina-v5 * fix: removed unnecessary comments
2026-02-26 12:14:09 +01:00 · 2026-02-26 12:14:09 +01:00 · 66287bdaac
commit 66287bdaac
parent 1ca3d1de15
12 changed files with 214 additions and 4 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@ -1148,6 +1148,9 @@ class TextModel(ModelBase):
        if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
            # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
            res = "jina-v2-de"
+        if chkhsh == "a023e9fdc5a11f034d3ef515b92350e56fb2af1f66c6b6811a4444ea9bf8763d":
+            # ref: https://huggingface.co/jinaai/jina-embeddings-v5-text-nano
+            res = "jina-v5-nano"
        if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d":
            # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct
            res = "smaug-bpe"
@ -6125,6 +6128,32 @@ class NeoBert(BertModel):
        yield from super().modify_tensors(data_torch, name, bid)


+@ModelBase.register("EuroBertModel", "JinaEmbeddingsV5Model")
+class EuroBertModel(TextModel):
+    model_arch = gguf.MODEL_ARCH.EUROBERT
+
+    def set_vocab(self):
+        self.gguf_writer.add_add_bos_token(False)
+        self._set_vocab_gpt2()
+
+    def set_gguf_parameters(self):
+        super().set_gguf_parameters()
+
+        # EuroBert is bidirectional (encoder)
+        self.gguf_writer.add_causal_attention(False)
+
+        self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
+
+        self._try_set_pooling_type()
+
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        # Strip "model." prefix from tensor names
+        if name.startswith("model."):
+            name = name[6:]
+
+        yield from super().modify_tensors(data_torch, name, bid)
+
+
@ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification")
 class XLMRobertaModel(BertModel):
    model_arch = gguf.MODEL_ARCH.BERT