diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 168dbabd7667..3559f8fc8206 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -22,6 +22,10 @@ // llama_context // +class llama_exception : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + static llm_graph_type ctx_type_to_graph_type(llama_context_type ctx_type) { switch (ctx_type) { case LLAMA_CONTEXT_TYPE_DEFAULT: return LLM_GRAPH_TYPE_DEFAULT; @@ -93,8 +97,7 @@ llama_context::llama_context( // TODO: more generic if (model.arch == LLM_ARCH_GEMMA4_ASSISTANT) { if (params.ctx_other == nullptr) { - // TODO: change from runtime_error to llama_exception to avoid printing error message - throw std::runtime_error("Gemma4Assistant requires ctx_other to be set (this warning is normal during memory fitting)"); + throw llama_exception("Gemma4Assistant requires ctx_other to be set (this warning is normal during memory fitting)"); } cparams.ctx_other = params.ctx_other; @@ -103,7 +106,7 @@ llama_context::llama_context( if (model.arch == LLM_ARCH_EAGLE3) { if (model.tok_embd == nullptr || model.output == nullptr) { if (params.ctx_other == nullptr) { - throw std::runtime_error("EAGLE3 requires ctx_other to be set (this warning is normal during memory fitting)"); + throw llama_exception("EAGLE3 requires ctx_other to be set (this warning is normal during memory fitting)"); } cparams.ctx_other = params.ctx_other; } @@ -3560,6 +3563,8 @@ llama_context * llama_init_from_model( try { auto * ctx = new llama_context(*model, params); return ctx; + } catch (const llama_exception & err) { + LLAMA_LOG_WARN("%s: failed to initialize the context: %s\n", __func__, err.what()); } catch (const std::exception & err) { LLAMA_LOG_ERROR("%s: failed to initialize the context: %s\n", __func__, err.what()); }