Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3122,14 +3122,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
"token budget for thinking: -1 for unrestricted, 0 for immediate end, N>0 for token budget (default: -1)",
[](common_params & params, int value) {
if (value < -1) { throw std::invalid_argument("invalid value"); }
params.reasoning_budget = value;
params.sampling.reasoning_budget_tokens = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET"));
add_opt(common_arg(
{"--reasoning-budget-message"}, "MESSAGE",
"message injected before the end-of-thinking tag when reasoning budget is exhausted (default: none)",
[](common_params & params, const std::string & value) {
params.reasoning_budget_message = value;
params.sampling.reasoning_budget_message = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI}).set_env("LLAMA_ARG_THINK_BUDGET_MESSAGE"));
add_opt(common_arg(
Expand Down
3 changes: 1 addition & 2 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ struct common_params_sampling {
std::vector<llama_token> reasoning_budget_start; // start tag token sequence
std::vector<llama_token> reasoning_budget_end; // end tag token sequence
std::vector<llama_token> reasoning_budget_forced; // forced sequence (message + end tag)
std::string reasoning_budget_message; // message injected before end tag when budget exhausted

bool backend_sampling = false;

Expand Down Expand Up @@ -581,8 +582,6 @@ struct common_params {
bool force_pure_content_parser = false;
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
int reasoning_budget = -1;
std::string reasoning_budget_message; // message injected before end tag when budget exhausted
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
int sleep_idle_seconds = -1; // if >0, server will sleep after this many seconds of idle time

Expand Down
4 changes: 2 additions & 2 deletions tools/cli/cli.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ struct cli_context {
// defaults.return_progress = true; // TODO: show progress

verbose_prompt = params.verbose_prompt;
reasoning_budget = params.reasoning_budget;
reasoning_budget_message = params.reasoning_budget_message;
reasoning_budget = params.sampling.reasoning_budget_tokens;
reasoning_budget_message = params.sampling.reasoning_budget_message;
}

std::string generate_completion(result_timings & out_timings) {
Expand Down
4 changes: 2 additions & 2 deletions tools/server/server-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1044,8 +1044,8 @@ struct server_context_impl {
/* allow_image */ mctx ? mtmd_support_vision(mctx) : false,
/* allow_audio */ mctx ? mtmd_support_audio (mctx) : false,
/* enable_thinking */ enable_thinking,
/* reasoning_budget */ params_base.reasoning_budget,
/* reasoning_budget_msg */ params_base.reasoning_budget_message,
/* reasoning_budget */ params_base.sampling.reasoning_budget_tokens,
/* reasoning_budget_msg */ params_base.sampling.reasoning_budget_message,
/* media_path */ params_base.media_path,
/* force_pure_content */ params_base.force_pure_content_parser
};
Expand Down
Loading