diff --git a/go/api/v1alpha2/agent_types.go b/go/api/v1alpha2/agent_types.go index 5dc8e4958..330c19a3d 100644 --- a/go/api/v1alpha2/agent_types.go +++ b/go/api/v1alpha2/agent_types.go @@ -118,6 +118,11 @@ type DeclarativeAgentSpec struct { // +optional // due to a bug in adk (https://github.com/google/adk-python/issues/3921), this field is ignored for now. ExecuteCodeBlocks *bool `json:"executeCodeBlocks,omitempty"` + + // Context configures context management for this agent. + // This includes event compaction (compression) and context caching. + // +optional + Context *ContextConfig `json:"context,omitempty"` } type DeclarativeDeploymentSpec struct { @@ -127,6 +132,77 @@ type DeclarativeDeploymentSpec struct { SharedDeploymentSpec `json:",inline"` } +// ContextConfig configures context management for an agent. +// Context management includes event compaction (compression/summarization) and context caching. +type ContextConfig struct { + // Compaction configures event history compaction. + // When enabled, older events in the conversation are compacted (compressed/summarized) + // to reduce context size while preserving key information. + // +optional + Compaction *ContextCompressionConfig `json:"compaction,omitempty"` + // Cache configures context caching. + // When enabled, prefix context is cached at the provider level to reduce + // redundant processing of repeated context. + // +optional + Cache *ContextCacheConfig `json:"cache,omitempty"` +} + +// ContextCompressionConfig configures event history compaction/compression. +// +kubebuilder:validation:XValidation:rule="has(self.compactionInterval) && has(self.overlapSize)",message="compactionInterval and overlapSize are required" +type ContextCompressionConfig struct { + // CompactionInterval specifies how often (in number of events) to trigger compaction. + // +kubebuilder:validation:Minimum=1 + CompactionInterval int `json:"compactionInterval"` + // OverlapSize specifies the number of events to keep from the previous compaction window + // for continuity. + // +kubebuilder:validation:Minimum=0 + OverlapSize int `json:"overlapSize"` + // Summarizer configures an LLM-based summarizer for event compaction. + // If not specified, compacted events are simply truncated without summarization. + // +optional + Summarizer *ContextSummarizerConfig `json:"summarizer,omitempty"` + // TokenThreshold is the minimum token count before compaction is triggered. + // +optional + TokenThreshold *int `json:"tokenThreshold,omitempty"` + // EventRetentionSize is the number of most recent events to always retain. + // +optional + EventRetentionSize *int `json:"eventRetentionSize,omitempty"` +} + +// ContextSummarizerConfig configures the LLM-based event summarizer. +type ContextSummarizerConfig struct { + // ModelConfig is the name of a ModelConfig resource to use for summarization. + // Must be in the same namespace as the Agent. + // If not specified, uses the agent's own model. + // +optional + ModelConfig string `json:"modelConfig,omitempty"` + // PromptTemplate is a custom prompt template for the summarizer. + // +optional + PromptTemplate string `json:"promptTemplate,omitempty"` +} + +// ContextCacheConfig configures prefix context caching at the LLM provider level. +type ContextCacheConfig struct { + // CacheIntervals specifies how often (in number of events) to update the cache. + // Default: 10 + // +optional + // +kubebuilder:default=10 + // +kubebuilder:validation:Minimum=1 + CacheIntervals *int `json:"cacheIntervals,omitempty"` + // TTLSeconds specifies the time-to-live for cached context in seconds. + // Default: 1800 (30 minutes) + // +optional + // +kubebuilder:default=1800 + // +kubebuilder:validation:Minimum=0 + TTLSeconds *int `json:"ttlSeconds,omitempty"` + // MinTokens is the minimum number of tokens before caching is activated. + // Default: 0 + // +optional + // +kubebuilder:default=0 + // +kubebuilder:validation:Minimum=0 + MinTokens *int `json:"minTokens,omitempty"` +} + type BYOAgentSpec struct { // Trust relationship to the agent. // +optional diff --git a/go/api/v1alpha2/zz_generated.deepcopy.go b/go/api/v1alpha2/zz_generated.deepcopy.go index 100fdbbf1..437f18469 100644 --- a/go/api/v1alpha2/zz_generated.deepcopy.go +++ b/go/api/v1alpha2/zz_generated.deepcopy.go @@ -356,6 +356,106 @@ func (in *ByoDeploymentSpec) DeepCopy() *ByoDeploymentSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ContextCacheConfig) DeepCopyInto(out *ContextCacheConfig) { + *out = *in + if in.CacheIntervals != nil { + in, out := &in.CacheIntervals, &out.CacheIntervals + *out = new(int) + **out = **in + } + if in.TTLSeconds != nil { + in, out := &in.TTLSeconds, &out.TTLSeconds + *out = new(int) + **out = **in + } + if in.MinTokens != nil { + in, out := &in.MinTokens, &out.MinTokens + *out = new(int) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContextCacheConfig. +func (in *ContextCacheConfig) DeepCopy() *ContextCacheConfig { + if in == nil { + return nil + } + out := new(ContextCacheConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ContextCompressionConfig) DeepCopyInto(out *ContextCompressionConfig) { + *out = *in + if in.Summarizer != nil { + in, out := &in.Summarizer, &out.Summarizer + *out = new(ContextSummarizerConfig) + **out = **in + } + if in.TokenThreshold != nil { + in, out := &in.TokenThreshold, &out.TokenThreshold + *out = new(int) + **out = **in + } + if in.EventRetentionSize != nil { + in, out := &in.EventRetentionSize, &out.EventRetentionSize + *out = new(int) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContextCompressionConfig. +func (in *ContextCompressionConfig) DeepCopy() *ContextCompressionConfig { + if in == nil { + return nil + } + out := new(ContextCompressionConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ContextConfig) DeepCopyInto(out *ContextConfig) { + *out = *in + if in.Compaction != nil { + in, out := &in.Compaction, &out.Compaction + *out = new(ContextCompressionConfig) + (*in).DeepCopyInto(*out) + } + if in.Cache != nil { + in, out := &in.Cache, &out.Cache + *out = new(ContextCacheConfig) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContextConfig. +func (in *ContextConfig) DeepCopy() *ContextConfig { + if in == nil { + return nil + } + out := new(ContextConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ContextSummarizerConfig) DeepCopyInto(out *ContextSummarizerConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContextSummarizerConfig. +func (in *ContextSummarizerConfig) DeepCopy() *ContextSummarizerConfig { + if in == nil { + return nil + } + out := new(ContextSummarizerConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeclarativeAgentSpec) DeepCopyInto(out *DeclarativeAgentSpec) { *out = *in @@ -390,6 +490,11 @@ func (in *DeclarativeAgentSpec) DeepCopyInto(out *DeclarativeAgentSpec) { *out = new(bool) **out = **in } + if in.Context != nil { + in, out := &in.Context, &out.Context + *out = new(ContextConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeclarativeAgentSpec. diff --git a/go/config/crd/bases/kagent.dev_agents.yaml b/go/config/crd/bases/kagent.dev_agents.yaml index 418225433..b2a76f19c 100644 --- a/go/config/crd/bases/kagent.dev_agents.yaml +++ b/go/config/crd/bases/kagent.dev_agents.yaml @@ -6196,6 +6196,88 @@ spec: minItems: 1 type: array type: object + context: + description: |- + Context configures context management for this agent. + This includes event compaction (compression) and context caching. + properties: + cache: + description: |- + Cache configures context caching. + When enabled, prefix context is cached at the provider level to reduce + redundant processing of repeated context. + properties: + cacheIntervals: + default: 10 + description: |- + CacheIntervals specifies how often (in number of events) to update the cache. + Default: 10 + minimum: 1 + type: integer + minTokens: + default: 0 + description: |- + MinTokens is the minimum number of tokens before caching is activated. + Default: 0 + minimum: 0 + type: integer + ttlSeconds: + default: 1800 + description: |- + TTLSeconds specifies the time-to-live for cached context in seconds. + Default: 1800 (30 minutes) + minimum: 0 + type: integer + type: object + compaction: + description: |- + Compaction configures event history compaction. + When enabled, older events in the conversation are compacted (compressed/summarized) + to reduce context size while preserving key information. + properties: + compactionInterval: + description: CompactionInterval specifies how often (in + number of events) to trigger compaction. + minimum: 1 + type: integer + eventRetentionSize: + description: EventRetentionSize is the number of most + recent events to always retain. + type: integer + overlapSize: + description: |- + OverlapSize specifies the number of events to keep from the previous compaction window + for continuity. + minimum: 0 + type: integer + summarizer: + description: |- + Summarizer configures an LLM-based summarizer for event compaction. + If not specified, compacted events are simply truncated without summarization. + properties: + modelConfig: + description: |- + ModelConfig is the name of a ModelConfig resource to use for summarization. + Must be in the same namespace as the Agent. + If not specified, uses the agent's own model. + type: string + promptTemplate: + description: PromptTemplate is a custom prompt template + for the summarizer. + type: string + type: object + tokenThreshold: + description: TokenThreshold is the minimum token count + before compaction is triggered. + type: integer + required: + - compactionInterval + - overlapSize + type: object + x-kubernetes-validations: + - message: compactionInterval and overlapSize are required + rule: has(self.compactionInterval) && has(self.overlapSize) + type: object deployment: properties: affinity: diff --git a/go/internal/controller/translator/agent/adk_api_translator.go b/go/internal/controller/translator/agent/adk_api_translator.go index dbc6fd7dd..ab1f72fa8 100644 --- a/go/internal/controller/translator/agent/adk_api_translator.go +++ b/go/internal/controller/translator/agent/adk_api_translator.go @@ -575,6 +575,51 @@ func (a *adkApiTranslator) translateInlineAgent(ctx context.Context, agent *v1al Stream: agent.Spec.Declarative.Stream, } + // Translate context management configuration + if agent.Spec.Declarative.Context != nil { + contextCfg := &adk.AgentContextConfig{} + + if agent.Spec.Declarative.Context.Compaction != nil { + comp := agent.Spec.Declarative.Context.Compaction + compCfg := &adk.AgentCompressionConfig{ + CompactionInterval: comp.CompactionInterval, + OverlapSize: comp.OverlapSize, + TokenThreshold: comp.TokenThreshold, + EventRetentionSize: comp.EventRetentionSize, + } + + if comp.Summarizer != nil { + // Resolve summarizer model - use agent's model if not specified + summarizerModelConfigName := agent.Spec.Declarative.ModelConfig + if comp.Summarizer.ModelConfig != "" { + summarizerModelConfigName = comp.Summarizer.ModelConfig + } + summarizerModelCfg := &v1alpha2.ModelConfig{} + err := a.kube.Get(ctx, types.NamespacedName{ + Namespace: agent.Namespace, + Name: summarizerModelConfigName, + }, summarizerModelCfg) + if err != nil { + return nil, nil, nil, fmt.Errorf("failed to resolve summarizer model config %q: %w", summarizerModelConfigName, err) + } + compCfg.SummarizerModelName = summarizerModelCfg.Spec.Model + compCfg.PromptTemplate = comp.Summarizer.PromptTemplate + } + + contextCfg.Compaction = compCfg + } + + if agent.Spec.Declarative.Context.Cache != nil { + contextCfg.Cache = &adk.AgentCacheConfig{ + CacheIntervals: agent.Spec.Declarative.Context.Cache.CacheIntervals, + TTLSeconds: agent.Spec.Declarative.Context.Cache.TTLSeconds, + MinTokens: agent.Spec.Declarative.Context.Cache.MinTokens, + } + } + + cfg.ContextConfig = contextCfg + } + for _, tool := range agent.Spec.Declarative.Tools { headers, err := tool.ResolveHeaders(ctx, a.kube, agent.Namespace) if err != nil { diff --git a/go/internal/controller/translator/agent/adk_api_translator_test.go b/go/internal/controller/translator/agent/adk_api_translator_test.go index 7fa2fb345..106b064bd 100644 --- a/go/internal/controller/translator/agent/adk_api_translator_test.go +++ b/go/internal/controller/translator/agent/adk_api_translator_test.go @@ -17,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/types" schemev1 "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -913,3 +914,189 @@ func Test_AdkApiTranslator_RecursionDepthTracking(t *testing.T) { require.NoError(t, err, "diamond pattern should pass — D is not a cycle, just shared") }) } + +func Test_AdkApiTranslator_ContextConfig(t *testing.T) { + scheme := schemev1.Scheme + require.NoError(t, v1alpha2.AddToScheme(scheme)) + + modelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-model", + Namespace: "default", + }, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + + summarizerModelConfig := &v1alpha2.ModelConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "summarizer-model", + Namespace: "default", + }, + Spec: v1alpha2.ModelConfigSpec{ + Model: "gpt-4o-mini", + Provider: v1alpha2.ModelProviderOpenAI, + }, + } + + makeAgent := func(context *v1alpha2.ContextConfig) *v1alpha2.Agent { + return &v1alpha2.Agent{ + ObjectMeta: metav1.ObjectMeta{Name: "test-agent", Namespace: "default"}, + Spec: v1alpha2.AgentSpec{ + Type: v1alpha2.AgentType_Declarative, + Description: "Test agent", + Declarative: &v1alpha2.DeclarativeAgentSpec{ + SystemMessage: "You are a test agent", + ModelConfig: "test-model", + Context: context, + }, + }, + } + } + + tests := []struct { + name string + agent *v1alpha2.Agent + extraObjects []client.Object + wantErr bool + errContains string + assertConfig func(t *testing.T, cfg *adk.AgentConfig) + }{ + { + name: "no context config", + agent: makeAgent(nil), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + assert.Nil(t, cfg.ContextConfig) + }, + }, + { + name: "compaction only", + agent: makeAgent(&v1alpha2.ContextConfig{ + Compaction: &v1alpha2.ContextCompressionConfig{ + CompactionInterval: 5, + OverlapSize: 2, + }, + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.ContextConfig) + require.NotNil(t, cfg.ContextConfig.Compaction) + assert.Equal(t, 5, cfg.ContextConfig.Compaction.CompactionInterval) + assert.Equal(t, 2, cfg.ContextConfig.Compaction.OverlapSize) + assert.Empty(t, cfg.ContextConfig.Compaction.SummarizerModelName) + assert.Nil(t, cfg.ContextConfig.Cache) + }, + }, + { + name: "cache only", + agent: makeAgent(&v1alpha2.ContextConfig{ + Cache: &v1alpha2.ContextCacheConfig{ + CacheIntervals: ptr.To(20), + TTLSeconds: ptr.To(3600), + MinTokens: ptr.To(100), + }, + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.ContextConfig) + assert.Nil(t, cfg.ContextConfig.Compaction) + require.NotNil(t, cfg.ContextConfig.Cache) + assert.Equal(t, ptr.To(20), cfg.ContextConfig.Cache.CacheIntervals) + assert.Equal(t, ptr.To(3600), cfg.ContextConfig.Cache.TTLSeconds) + assert.Equal(t, ptr.To(100), cfg.ContextConfig.Cache.MinTokens) + }, + }, + { + name: "both compaction and cache", + agent: makeAgent(&v1alpha2.ContextConfig{ + Compaction: &v1alpha2.ContextCompressionConfig{ + CompactionInterval: 10, + OverlapSize: 3, + TokenThreshold: ptr.To(1000), + EventRetentionSize: ptr.To(5), + }, + Cache: &v1alpha2.ContextCacheConfig{ + CacheIntervals: ptr.To(15), + }, + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.ContextConfig) + require.NotNil(t, cfg.ContextConfig.Compaction) + assert.Equal(t, 10, cfg.ContextConfig.Compaction.CompactionInterval) + assert.Equal(t, 3, cfg.ContextConfig.Compaction.OverlapSize) + assert.Equal(t, ptr.To(1000), cfg.ContextConfig.Compaction.TokenThreshold) + assert.Equal(t, ptr.To(5), cfg.ContextConfig.Compaction.EventRetentionSize) + require.NotNil(t, cfg.ContextConfig.Cache) + assert.Equal(t, ptr.To(15), cfg.ContextConfig.Cache.CacheIntervals) + }, + }, + { + name: "compaction with summarizer using agent model", + agent: makeAgent(&v1alpha2.ContextConfig{ + Compaction: &v1alpha2.ContextCompressionConfig{ + CompactionInterval: 5, + OverlapSize: 2, + Summarizer: &v1alpha2.ContextSummarizerConfig{ + PromptTemplate: "Summarize: {{events}}", + }, + }, + }), + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.ContextConfig) + require.NotNil(t, cfg.ContextConfig.Compaction) + assert.Equal(t, "gpt-4", cfg.ContextConfig.Compaction.SummarizerModelName) + assert.Equal(t, "Summarize: {{events}}", cfg.ContextConfig.Compaction.PromptTemplate) + }, + }, + { + name: "compaction with summarizer using separate model", + agent: makeAgent(&v1alpha2.ContextConfig{ + Compaction: &v1alpha2.ContextCompressionConfig{ + CompactionInterval: 5, + OverlapSize: 2, + Summarizer: &v1alpha2.ContextSummarizerConfig{ + ModelConfig: "summarizer-model", + }, + }, + }), + extraObjects: []client.Object{summarizerModelConfig.DeepCopy()}, + assertConfig: func(t *testing.T, cfg *adk.AgentConfig) { + require.NotNil(t, cfg.ContextConfig) + require.NotNil(t, cfg.ContextConfig.Compaction) + assert.Equal(t, "gpt-4o-mini", cfg.ContextConfig.Compaction.SummarizerModelName) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + objects := []client.Object{modelConfig.DeepCopy()} + for _, obj := range tt.extraObjects { + objects = append(objects, obj) + } + kubeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objects...). + Build() + + defaultModel := types.NamespacedName{Namespace: "default", Name: "test-model"} + trans := translator.NewAdkApiTranslator(kubeClient, defaultModel, nil, "") + outputs, err := trans.TranslateAgent(context.Background(), tt.agent) + + if tt.wantErr { + require.Error(t, err) + if tt.errContains != "" { + assert.Contains(t, err.Error(), tt.errContains) + } + return + } + + require.NoError(t, err) + require.NotNil(t, outputs) + require.NotNil(t, outputs.Config) + if tt.assertConfig != nil { + tt.assertConfig(t, outputs.Config) + } + }) + } +} diff --git a/go/pkg/adk/types.go b/go/pkg/adk/types.go index 8479a00e6..773dffc35 100644 --- a/go/pkg/adk/types.go +++ b/go/pkg/adk/types.go @@ -287,16 +287,42 @@ type AgentConfig struct { RemoteAgents []RemoteAgentConfig `json:"remote_agents"` ExecuteCode bool `json:"execute_code,omitempty"` Stream bool `json:"stream"` + // Context management configuration + ContextConfig *AgentContextConfig `json:"context_config,omitempty"` +} + +// AgentContextConfig is the context management configuration that flows through config.json to the Python runtime. +type AgentContextConfig struct { + Compaction *AgentCompressionConfig `json:"compaction,omitempty"` + Cache *AgentCacheConfig `json:"cache,omitempty"` +} + +// AgentCompressionConfig maps to Python's ContextCompressionSettings. +type AgentCompressionConfig struct { + CompactionInterval int `json:"compaction_interval"` + OverlapSize int `json:"overlap_size"` + SummarizerModelName string `json:"summarizer_model_name,omitempty"` + PromptTemplate string `json:"prompt_template,omitempty"` + TokenThreshold *int `json:"token_threshold,omitempty"` + EventRetentionSize *int `json:"event_retention_size,omitempty"` +} + +// AgentCacheConfig maps to Python's ContextCacheSettings. +type AgentCacheConfig struct { + CacheIntervals *int `json:"cache_intervals,omitempty"` + TTLSeconds *int `json:"ttl_seconds,omitempty"` + MinTokens *int `json:"min_tokens,omitempty"` } func (a *AgentConfig) UnmarshalJSON(data []byte) error { var tmp struct { - Model json.RawMessage `json:"model"` - Description string `json:"description"` - Instruction string `json:"instruction"` - HttpTools []HttpMcpServerConfig `json:"http_tools"` - SseTools []SseMcpServerConfig `json:"sse_tools"` - RemoteAgents []RemoteAgentConfig `json:"remote_agents"` + Model json.RawMessage `json:"model"` + Description string `json:"description"` + Instruction string `json:"instruction"` + HttpTools []HttpMcpServerConfig `json:"http_tools"` + SseTools []SseMcpServerConfig `json:"sse_tools"` + RemoteAgents []RemoteAgentConfig `json:"remote_agents"` + ContextConfig *AgentContextConfig `json:"context_config,omitempty"` } if err := json.Unmarshal(data, &tmp); err != nil { return err @@ -311,6 +337,7 @@ func (a *AgentConfig) UnmarshalJSON(data []byte) error { a.HttpTools = tmp.HttpTools a.SseTools = tmp.SseTools a.RemoteAgents = tmp.RemoteAgents + a.ContextConfig = tmp.ContextConfig return nil } diff --git a/helm/kagent-crds/templates/kagent.dev_agents.yaml b/helm/kagent-crds/templates/kagent.dev_agents.yaml index 418225433..b2a76f19c 100644 --- a/helm/kagent-crds/templates/kagent.dev_agents.yaml +++ b/helm/kagent-crds/templates/kagent.dev_agents.yaml @@ -6196,6 +6196,88 @@ spec: minItems: 1 type: array type: object + context: + description: |- + Context configures context management for this agent. + This includes event compaction (compression) and context caching. + properties: + cache: + description: |- + Cache configures context caching. + When enabled, prefix context is cached at the provider level to reduce + redundant processing of repeated context. + properties: + cacheIntervals: + default: 10 + description: |- + CacheIntervals specifies how often (in number of events) to update the cache. + Default: 10 + minimum: 1 + type: integer + minTokens: + default: 0 + description: |- + MinTokens is the minimum number of tokens before caching is activated. + Default: 0 + minimum: 0 + type: integer + ttlSeconds: + default: 1800 + description: |- + TTLSeconds specifies the time-to-live for cached context in seconds. + Default: 1800 (30 minutes) + minimum: 0 + type: integer + type: object + compaction: + description: |- + Compaction configures event history compaction. + When enabled, older events in the conversation are compacted (compressed/summarized) + to reduce context size while preserving key information. + properties: + compactionInterval: + description: CompactionInterval specifies how often (in + number of events) to trigger compaction. + minimum: 1 + type: integer + eventRetentionSize: + description: EventRetentionSize is the number of most + recent events to always retain. + type: integer + overlapSize: + description: |- + OverlapSize specifies the number of events to keep from the previous compaction window + for continuity. + minimum: 0 + type: integer + summarizer: + description: |- + Summarizer configures an LLM-based summarizer for event compaction. + If not specified, compacted events are simply truncated without summarization. + properties: + modelConfig: + description: |- + ModelConfig is the name of a ModelConfig resource to use for summarization. + Must be in the same namespace as the Agent. + If not specified, uses the agent's own model. + type: string + promptTemplate: + description: PromptTemplate is a custom prompt template + for the summarizer. + type: string + type: object + tokenThreshold: + description: TokenThreshold is the minimum token count + before compaction is triggered. + type: integer + required: + - compactionInterval + - overlapSize + type: object + x-kubernetes-validations: + - message: compactionInterval and overlapSize are required + rule: has(self.compactionInterval) && has(self.overlapSize) + type: object deployment: properties: affinity: diff --git a/python/packages/kagent-adk/src/kagent/adk/_a2a.py b/python/packages/kagent-adk/src/kagent/adk/_a2a.py index d32d9d83b..1becf78e3 100644 --- a/python/packages/kagent-adk/src/kagent/adk/_a2a.py +++ b/python/packages/kagent-adk/src/kagent/adk/_a2a.py @@ -14,7 +14,9 @@ from fastapi.responses import PlainTextResponse from google.adk.agents import BaseAgent from google.adk.apps import App +from google.adk.apps.app import EventsCompactionConfig from google.adk.artifacts import InMemoryArtifactService +from google.adk.agents.context_cache_config import ContextCacheConfig as AdkContextCacheConfig from google.adk.plugins import BasePlugin from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService @@ -60,6 +62,8 @@ def __init__( lifespan: Optional[Callable[[Any], Any]] = None, plugins: List[BasePlugin] = None, stream: bool = False, + events_compaction_config: Optional[EventsCompactionConfig] = None, + context_cache_config: Optional[AdkContextCacheConfig] = None, ): """Initialize the KAgent application. @@ -79,6 +83,8 @@ def __init__( self._lifespan = lifespan self.plugins = plugins if plugins is not None else [] self.stream = stream + self.events_compaction_config = events_compaction_config + self.context_cache_config = context_cache_config def build(self, local=False) -> FastAPI: session_service = InMemorySessionService() @@ -95,7 +101,13 @@ def build(self, local=False) -> FastAPI: def create_runner() -> Runner: root_agent = self.root_agent_factory() - adk_app = App(name=self.app_name, root_agent=root_agent, plugins=self.plugins) + adk_app = App( + name=self.app_name, + root_agent=root_agent, + plugins=self.plugins, + events_compaction_config=self.events_compaction_config, + context_cache_config=self.context_cache_config, + ) return Runner( app=adk_app, diff --git a/python/packages/kagent-adk/src/kagent/adk/cli.py b/python/packages/kagent-adk/src/kagent/adk/cli.py index 4116efd6e..bcaaaf9ce 100644 --- a/python/packages/kagent-adk/src/kagent/adk/cli.py +++ b/python/packages/kagent-adk/src/kagent/adk/cli.py @@ -15,6 +15,7 @@ from kagent.core import KAgentConfig, configure_logging, configure_tracing from . import AgentConfig, KAgentApp +from .types import build_adk_context_configs from .skill_fetcher import fetch_skill from .tools import add_skills_tool_to_agent @@ -24,6 +25,22 @@ app = typer.Typer() +def _build_context_kwargs(agent_config: AgentConfig) -> dict: + """Build context config kwargs for KAgentApp from agent config.""" + if agent_config.context_config is None: + return {} + events_compaction_config, context_cache_config = build_adk_context_configs( + agent_config.context_config, + agent_model_name=getattr(agent_config.model, "model", None), + ) + kwargs = {} + if events_compaction_config is not None: + kwargs["events_compaction_config"] = events_compaction_config + if context_cache_config is not None: + kwargs["context_cache_config"] = context_cache_config + return kwargs + + kagent_url_override = os.getenv("KAGENT_URL") sts_well_known_uri = os.getenv("STS_WELL_KNOWN_URI") propagate_token = os.getenv("KAGENT_PROPAGATE_TOKEN") @@ -72,6 +89,7 @@ def root_agent_factory() -> BaseAgent: return root_agent + context_kwargs = _build_context_kwargs(agent_config) kagent_app = KAgentApp( root_agent_factory, agent_card, @@ -79,6 +97,7 @@ def root_agent_factory() -> BaseAgent: app_cfg.app_name, plugins=plugins, stream=agent_config.stream if agent_config.stream is not None else False, + **context_kwargs, ) server = kagent_app.build() @@ -181,6 +200,7 @@ def root_agent_factory() -> BaseAgent: except Exception: logger.exception(f"Failed to load agent module '{name}' for lifespan") + context_kwargs = _build_context_kwargs(agent_config) if agent_config else {} kagent_app = KAgentApp( root_agent_factory, agent_card, @@ -189,6 +209,7 @@ def root_agent_factory() -> BaseAgent: lifespan=lifespan, plugins=plugins, stream=agent_config.stream if agent_config and agent_config.stream is not None else False, + **context_kwargs, ) if local: @@ -219,7 +240,8 @@ def root_agent_factory() -> BaseAgent: maybe_add_skills(root_agent) return root_agent - app = KAgentApp(root_agent_factory, agent_card, app_cfg.url, app_cfg.app_name, plugins=plugins) + context_kwargs = _build_context_kwargs(agent_config) + app = KAgentApp(root_agent_factory, agent_card, app_cfg.url, app_cfg.app_name, plugins=plugins, **context_kwargs) await app.test(task) diff --git a/python/packages/kagent-adk/src/kagent/adk/types.py b/python/packages/kagent-adk/src/kagent/adk/types.py index d5f6f4f0d..b71156d86 100644 --- a/python/packages/kagent-adk/src/kagent/adk/types.py +++ b/python/packages/kagent-adk/src/kagent/adk/types.py @@ -215,6 +215,32 @@ class Bedrock(BaseLLM): type: Literal["bedrock"] +class ContextCompressionSettings(BaseModel): + """Settings for event history compaction/compression.""" + + compaction_interval: int + overlap_size: int + summarizer_model_name: str | None = None + prompt_template: str | None = None + token_threshold: int | None = None + event_retention_size: int | None = None + + +class ContextCacheSettings(BaseModel): + """Settings for prefix context caching.""" + + cache_intervals: int | None = None + ttl_seconds: int | None = None + min_tokens: int | None = None + + +class ContextConfig(BaseModel): + """Context management configuration containing compaction and cache settings.""" + + compaction: ContextCompressionSettings | None = None + cache: ContextCacheSettings | None = None + + class AgentConfig(BaseModel): model: Union[OpenAI, Anthropic, GeminiVertexAI, GeminiAnthropic, Ollama, AzureOpenAI, Gemini, Bedrock] = Field( discriminator="type" @@ -227,6 +253,7 @@ class AgentConfig(BaseModel): execute_code: bool | None = None # This stream option refers to LLM response streaming, not A2A streaming stream: bool | None = None + context_config: ContextConfig | None = None def to_agent(self, name: str, sts_integration: Optional[ADKTokenPropagationPlugin] = None) -> Agent: if name is None or not str(name).strip(): @@ -395,3 +422,66 @@ async def rewrite_url_to_proxy(request: httpx.Request) -> None: tools=tools, code_executor=code_executor, ) + + +def build_adk_context_configs( + context_config: ContextConfig, + agent_model_name: str | None = None, +) -> tuple: + """Build Google ADK context config objects from kagent settings. + + Args: + context_config: The kagent context configuration + agent_model_name: The agent's model name, used as fallback for summarizer + + Returns: + Tuple of (events_compaction_config, context_cache_config), either may be None + """ + from google.adk.apps.app import EventsCompactionConfig + from google.adk.agents.context_cache_config import ( + ContextCacheConfig as AdkContextCacheConfig, + ) + + events_compaction_config = None + context_cache_config = None + + if context_config.compaction is not None: + comp = context_config.compaction + summarizer = None + + if comp.summarizer_model_name or comp.prompt_template: + from google.adk.apps.llm_event_summarizer import LlmEventSummarizer + + model_name = comp.summarizer_model_name or agent_model_name + if model_name: + summarizer_llm = LiteLlm(model=model_name) + summarizer = LlmEventSummarizer( + llm=summarizer_llm, + prompt_template=comp.prompt_template, + ) + + compaction_kwargs: dict = { + "compaction_interval": comp.compaction_interval, + "overlap_size": comp.overlap_size, + "summarizer": summarizer, + } + # Forward-compatible: pass token_threshold/event_retention_size + # when the ADK version supports them. + if comp.token_threshold is not None and hasattr(EventsCompactionConfig, "token_threshold"): + compaction_kwargs["token_threshold"] = comp.token_threshold + if comp.event_retention_size is not None and hasattr(EventsCompactionConfig, "event_retention_size"): + compaction_kwargs["event_retention_size"] = comp.event_retention_size + events_compaction_config = EventsCompactionConfig(**compaction_kwargs) + + if context_config.cache is not None: + cache = context_config.cache + kwargs = {} + if cache.cache_intervals is not None: + kwargs["cache_intervals"] = cache.cache_intervals + if cache.ttl_seconds is not None: + kwargs["ttl_seconds"] = cache.ttl_seconds + if cache.min_tokens is not None: + kwargs["min_tokens"] = cache.min_tokens + context_cache_config = AdkContextCacheConfig(**kwargs) + + return events_compaction_config, context_cache_config diff --git a/python/packages/kagent-adk/tests/unittests/test_context_config.py b/python/packages/kagent-adk/tests/unittests/test_context_config.py new file mode 100644 index 000000000..c74dc165b --- /dev/null +++ b/python/packages/kagent-adk/tests/unittests/test_context_config.py @@ -0,0 +1,265 @@ +"""Tests for context management configuration types and builder.""" + +import json + +import pytest +from pydantic import ValidationError + +from kagent.adk.types import ( + AgentConfig, + ContextCacheSettings, + ContextCompressionSettings, + ContextConfig, + build_adk_context_configs, +) + + +def _make_agent_config_json(**context_kwargs) -> str: + """Helper to create AgentConfig JSON with optional context config.""" + config = { + "model": {"type": "openai", "model": "gpt-4"}, + "description": "test agent", + "instruction": "test instruction", + } + if context_kwargs: + config["context_config"] = context_kwargs + return json.dumps(config) + + +class TestContextConfigParsing: + """Tests for parsing context config from JSON.""" + + def test_no_context_config(self): + """AgentConfig without context_config should have None.""" + config = AgentConfig.model_validate_json(_make_agent_config_json()) + assert config.context_config is None + + def test_empty_context_config(self): + """AgentConfig with empty context_config should have empty ContextConfig.""" + json_str = _make_agent_config_json() + data = json.loads(json_str) + data["context_config"] = {} + config = AgentConfig.model_validate(data) + assert config.context_config is not None + assert config.context_config.compaction is None + assert config.context_config.cache is None + + def test_compaction_only(self): + """Parse compaction config without cache.""" + data = json.loads(_make_agent_config_json()) + data["context_config"] = {"compaction": {"compaction_interval": 5, "overlap_size": 2}} + config = AgentConfig.model_validate(data) + assert config.context_config is not None + assert config.context_config.compaction is not None + assert config.context_config.compaction.compaction_interval == 5 + assert config.context_config.compaction.overlap_size == 2 + assert config.context_config.cache is None + + def test_cache_only(self): + """Parse cache config without compaction.""" + data = json.loads(_make_agent_config_json()) + data["context_config"] = {"cache": {"cache_intervals": 20, "ttl_seconds": 3600, "min_tokens": 100}} + config = AgentConfig.model_validate(data) + assert config.context_config is not None + assert config.context_config.compaction is None + assert config.context_config.cache is not None + assert config.context_config.cache.cache_intervals == 20 + assert config.context_config.cache.ttl_seconds == 3600 + assert config.context_config.cache.min_tokens == 100 + + def test_both_compaction_and_cache(self): + """Parse both compaction and cache.""" + data = json.loads(_make_agent_config_json()) + data["context_config"] = { + "compaction": { + "compaction_interval": 10, + "overlap_size": 3, + "token_threshold": 1000, + "event_retention_size": 5, + }, + "cache": {"cache_intervals": 15}, + } + config = AgentConfig.model_validate(data) + assert config.context_config.compaction.compaction_interval == 10 + assert config.context_config.compaction.overlap_size == 3 + assert config.context_config.compaction.token_threshold == 1000 + assert config.context_config.compaction.event_retention_size == 5 + assert config.context_config.cache.cache_intervals == 15 + + def test_compaction_with_summarizer(self): + """Parse compaction with summarizer fields.""" + data = json.loads(_make_agent_config_json()) + data["context_config"] = { + "compaction": { + "compaction_interval": 5, + "overlap_size": 2, + "summarizer_model_name": "gpt-4o-mini", + "prompt_template": "Summarize these events: {{events}}", + } + } + config = AgentConfig.model_validate(data) + comp = config.context_config.compaction + assert comp.summarizer_model_name == "gpt-4o-mini" + assert comp.prompt_template == "Summarize these events: {{events}}" + + def test_compaction_missing_required_fields(self): + """Compaction missing required fields should fail validation.""" + with pytest.raises(ValidationError): + ContextCompressionSettings(compaction_interval=5) # missing overlap_size + + with pytest.raises(ValidationError): + ContextCompressionSettings(overlap_size=2) # missing compaction_interval + + def test_cache_with_defaults(self): + """Cache with empty/partial config should accept all None.""" + cache = ContextCacheSettings() + assert cache.cache_intervals is None + assert cache.ttl_seconds is None + assert cache.min_tokens is None + + def test_null_vs_absent_context_config(self): + """Both null and absent context_config should result in None.""" + # Absent + data = json.loads(_make_agent_config_json()) + config1 = AgentConfig.model_validate(data) + assert config1.context_config is None + + # Explicit null + data["context_config"] = None + config2 = AgentConfig.model_validate(data) + assert config2.context_config is None + + +class TestBuildAdkContextConfigs: + """Tests for build_adk_context_configs function.""" + + def test_compaction_basic(self): + """Build EventsCompactionConfig from basic compaction settings.""" + ctx_config = ContextConfig(compaction=ContextCompressionSettings(compaction_interval=5, overlap_size=2)) + events_cfg, cache_cfg = build_adk_context_configs(ctx_config) + assert events_cfg is not None + assert events_cfg.compaction_interval == 5 + assert events_cfg.overlap_size == 2 + assert events_cfg.summarizer is None + assert cache_cfg is None + + def test_cache_basic(self): + """Build ContextCacheConfig from basic cache settings.""" + ctx_config = ContextConfig(cache=ContextCacheSettings(cache_intervals=20, ttl_seconds=3600, min_tokens=100)) + events_cfg, cache_cfg = build_adk_context_configs(ctx_config) + assert events_cfg is None + assert cache_cfg is not None + assert cache_cfg.cache_intervals == 20 + assert cache_cfg.ttl_seconds == 3600 + assert cache_cfg.min_tokens == 100 + + def test_cache_defaults(self): + """Build ContextCacheConfig with empty settings uses ADK defaults.""" + ctx_config = ContextConfig(cache=ContextCacheSettings()) + events_cfg, cache_cfg = build_adk_context_configs(ctx_config) + assert events_cfg is None + assert cache_cfg is not None + # ADK defaults: cache_intervals=10, ttl_seconds=1800, min_tokens=0 + assert cache_cfg.cache_intervals == 10 + assert cache_cfg.ttl_seconds == 1800 + assert cache_cfg.min_tokens == 0 + + def test_compaction_with_summarizer(self): + """Build compaction config with LLM summarizer.""" + ctx_config = ContextConfig( + compaction=ContextCompressionSettings( + compaction_interval=5, + overlap_size=2, + summarizer_model_name="gpt-4o-mini", + prompt_template="Summarize: {{events}}", + ) + ) + events_cfg, cache_cfg = build_adk_context_configs(ctx_config, agent_model_name="gpt-4") + assert events_cfg is not None + assert events_cfg.summarizer is not None + assert cache_cfg is None + + def test_compaction_summarizer_falls_back_to_agent_model(self): + """Summarizer without explicit model uses agent's model.""" + ctx_config = ContextConfig( + compaction=ContextCompressionSettings( + compaction_interval=5, + overlap_size=2, + prompt_template="Summarize these events", + ) + ) + events_cfg, _ = build_adk_context_configs(ctx_config, agent_model_name="gpt-4") + assert events_cfg is not None + assert events_cfg.summarizer is not None + + def test_both_configs(self): + """Build both compaction and cache configs.""" + ctx_config = ContextConfig( + compaction=ContextCompressionSettings( + compaction_interval=10, + overlap_size=3, + token_threshold=1000, + event_retention_size=5, + ), + cache=ContextCacheSettings(cache_intervals=15), + ) + events_cfg, cache_cfg = build_adk_context_configs(ctx_config) + assert events_cfg is not None + assert events_cfg.compaction_interval == 10 + assert events_cfg.overlap_size == 3 + assert cache_cfg is not None + assert cache_cfg.cache_intervals == 15 + + +class TestJsonRoundTrip: + """Test that Go-serialized config.json can be parsed by Python.""" + + def test_full_config_round_trip(self): + """Parse a config.json matching Go's output format.""" + go_style_json = { + "model": {"type": "openai", "model": "gpt-4"}, + "description": "My agent", + "instruction": "Help the user", + "http_tools": None, + "sse_tools": None, + "remote_agents": None, + "execute_code": False, + "stream": False, + "context_config": { + "compaction": { + "compaction_interval": 10, + "overlap_size": 3, + "summarizer_model_name": "gpt-4o-mini", + "prompt_template": "Summarize: {{events}}", + "token_threshold": 500, + "event_retention_size": 5, + }, + "cache": { + "cache_intervals": 20, + "ttl_seconds": 3600, + "min_tokens": 100, + }, + }, + } + config = AgentConfig.model_validate(go_style_json) + assert config.context_config is not None + assert config.context_config.compaction.compaction_interval == 10 + assert config.context_config.compaction.summarizer_model_name == "gpt-4o-mini" + assert config.context_config.cache.cache_intervals == 20 + assert config.context_config.cache.ttl_seconds == 3600 + + def test_config_without_context_round_trip(self): + """Existing config without context_config still parses correctly.""" + go_style_json = { + "model": {"type": "openai", "model": "gpt-4"}, + "description": "My agent", + "instruction": "Help the user", + "http_tools": [], + "sse_tools": [], + "remote_agents": [], + "execute_code": False, + "stream": True, + } + config = AgentConfig.model_validate(go_style_json) + assert config.context_config is None + assert config.stream is True