Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions conversion/gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,16 @@ def set_gguf_parameters(self):
class Gemma4AssistantModel(Gemma4Model):
model_arch = gguf.MODEL_ARCH.GEMMA4_ASSISTANT

@classmethod
def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Callable[[], Tensor]] | None:
name, gen = item

if "masked_embedding" in name:
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
return None

return super().filter_tensors(item)

def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_embedding_length_out(self.hparams["backbone_hidden_size"])
Expand Down
6 changes: 6 additions & 0 deletions gguf-py/gguf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,8 @@ class VISION_PROJECTOR_TYPE(IntEnum):
class MODEL_TENSOR(IntEnum):
TOKEN_EMBD = auto()
TOKEN_EMBD_NORM = auto()
MASKED_EMBD_CENTROIDS= auto()
MASKED_EMBD_ORDERING = auto()
TOKEN_TYPES = auto()
POS_EMBD = auto()
OUTPUT = auto()
Expand Down Expand Up @@ -1087,6 +1089,8 @@ class MODEL_TENSOR(IntEnum):
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
MODEL_TENSOR.TOKEN_TYPES: "token_types",
MODEL_TENSOR.MASKED_EMBD_CENTROIDS: "masked_embd_centroids",
MODEL_TENSOR.MASKED_EMBD_ORDERING: "masked_embd_ordering",
MODEL_TENSOR.POS_EMBD: "position_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
MODEL_TENSOR.OUTPUT: "output",
Expand Down Expand Up @@ -2586,6 +2590,8 @@ class MODEL_TENSOR(IntEnum):
MODEL_ARCH.GEMMA4_ASSISTANT: [
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.MASKED_EMBD_CENTROIDS,
MODEL_TENSOR.MASKED_EMBD_ORDERING,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.NEXTN_PROJ_PRE,
MODEL_TENSOR.NEXTN_PROJ_POST,
Expand Down
8 changes: 8 additions & 0 deletions gguf-py/gguf/tensor_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ class TensorNameMap:
"model.embed", # talkie
),

# Masked embeddings
MODEL_TENSOR.MASKED_EMBD_CENTROIDS: (
"masked_embedding.centroids", # gemma-4 E2B/E4B assistants
),
MODEL_TENSOR.MASKED_EMBD_ORDERING: (
"masked_embedding.token_ordering", # gemma-4 E2B/E4B assistants
),

# Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: (
"embeddings.token_type_embeddings", # bert nomic-bert
Expand Down
4 changes: 4 additions & 0 deletions src/llama-arch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,8 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
{ LLM_TENSOR_INDEXER_PROJ, "blk.%d.indexer.proj" },
{ LLM_TENSOR_INDEXER_ATTN_K, "blk.%d.indexer.attn_k" },
{ LLM_TENSOR_INDEXER_ATTN_Q_B, "blk.%d.indexer.attn_q_b" },
{ LLM_TENSOR_MASKED_EMBD_CENTROIDS, "masked_embd_centroids" },
{ LLM_TENSOR_MASKED_EMBD_ORDERING, "masked_embd_ordering" },
};

// declare information about the model weight tensors:
Expand Down Expand Up @@ -783,6 +785,8 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
// latent projections feed ggml_mul_mat, the buft probe must use MUL_MAT to keep them on GPU
{LLM_TENSOR_FFN_LATENT_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_FFN_LATENT_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_MASKED_EMBD_CENTROIDS, {LLM_TENSOR_LAYER_INPUT, GGML_OP_NONE}},
{LLM_TENSOR_MASKED_EMBD_ORDERING, {LLM_TENSOR_LAYER_INPUT, GGML_OP_NONE}},
};

LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}
Expand Down
3 changes: 3 additions & 0 deletions src/llama-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,8 +566,11 @@ enum llm_tensor {
LLM_TENSOR_NEXTN_HNORM,
LLM_TENSOR_NEXTN_SHARED_HEAD_HEAD,
LLM_TENSOR_NEXTN_SHARED_HEAD_NORM,
LLM_TENSOR_MASKED_EMBD_CENTROIDS,
LLM_TENSOR_MASKED_EMBD_ORDERING,
};


enum llm_tensor_layer {
LLM_TENSOR_LAYER_INPUT,
LLM_TENSOR_LAYER_REPEATING,
Expand Down
3 changes: 3 additions & 0 deletions src/models/gemma4-assistant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ void llama_model_gemma4_assistant::load_arch_tensors(llama_model_loader &) {

output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), { n_embd }, 0);

create_tensor(tn(LLM_TENSOR_MASKED_EMBD_CENTROIDS, "weight"), {}, TENSOR_NOT_REQUIRED);
create_tensor(tn(LLM_TENSOR_MASKED_EMBD_ORDERING), {}, TENSOR_NOT_REQUIRED);

const int64_t n_embd_backbone = hparams.n_embd_inp();
nextn_proj_post = create_tensor(tn(LLM_TENSOR_NEXTN_PROJ_POST, "weight"), { n_embd, n_embd_backbone }, 0);

Expand Down
Loading