diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go
index 44d9c98a4303..69e932c3f49b 100644
--- a/comp/logs/agent/config/config.go
+++ b/comp/logs/agent/config/config.go
@@ -124,7 +124,7 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs
 	if logsDDURL, defined := logsConfig.logsDDURL(); defined {
 		haveHTTPProxy = strings.HasPrefix(logsDDURL, "http://") || strings.HasPrefix(logsDDURL, "https://")
 	}
-	if logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) {
+	if logsConfig.isGRPCUse() || logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) {
 		return BuildHTTPEndpointsWithConfig(coreConfig, logsConfig, endpointPrefix, intakeTrackType, intakeProtocol, intakeOrigin)
 	}
 	log.Warnf("You are currently sending Logs to Datadog through TCP (either because %s or %s is set or the HTTP connectivity test has failed) "+
@@ -373,7 +373,7 @@ func buildHTTPEndpoints(coreConfig pkgconfigmodel.Reader, logsConfig *LogsConfig
 	batchMaxContentSize := logsConfig.batchMaxContentSize()
 	inputChanSize := logsConfig.inputChanSize()
 
-	return NewEndpointsWithBatchSettings(main, additionals, false, true, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil
+	return NewEndpointsWithBatchSettings(main, additionals, false, true, logsConfig.isGRPCUse(), batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil
 }
 
 type defaultParseAddressFunc func(string) (host string, port int, err error)
@@ -447,6 +447,11 @@ func TaggerWarmupDuration(coreConfig pkgconfigmodel.Reader) time.Duration {
 	return defaultLogsConfigKeys(coreConfig).taggerWarmupDuration()
 }
 
+// StreamLifetime returns the duration for gRPC stream lifetime before rotation.
+func StreamLifetime(coreConfig pkgconfigmodel.Reader) time.Duration {
+	return defaultLogsConfigKeys(coreConfig).streamLifetime()
+}
+
 // AggregationTimeout is used when performing aggregation operations
 func AggregationTimeout(coreConfig pkgconfigmodel.Reader) time.Duration {
 	return defaultLogsConfigKeys(coreConfig).aggregationTimeout()
diff --git a/comp/logs/agent/config/config_keys.go b/comp/logs/agent/config/config_keys.go
index bf6f9313c28c..110243d41068 100644
--- a/comp/logs/agent/config/config_keys.go
+++ b/comp/logs/agent/config/config_keys.go
@@ -101,6 +101,10 @@ func (l *LogsConfigKeys) isForceHTTPUse() bool {
 		l.getConfig().GetBool(l.getConfigKey("force_use_http"))
 }
 
+func (l *LogsConfigKeys) isGRPCUse() bool {
+	return l.getConfig().GetBool(l.getConfigKey("use_grpc"))
+}
+
 func (l *LogsConfigKeys) logsNoSSL() bool {
 	return l.getConfig().GetBool(l.getConfigKey("logs_no_ssl"))
 }
@@ -292,6 +296,16 @@ func (l *LogsConfigKeys) senderRecoveryReset() bool {
 	return l.getConfig().GetBool(l.getConfigKey("sender_recovery_reset"))
 }
 
+func (l *LogsConfigKeys) streamLifetime() time.Duration {
+	key := l.getConfigKey("stream_lifetime")
+	streamLifetime := l.getConfig().GetInt(key)
+	if streamLifetime <= 0 {
+		log.Warnf("Invalid %s: %v should be > 0, fallback on %v", key, streamLifetime, pkgconfigsetup.DefaultLogsStreamLifetime)
+		return time.Duration(pkgconfigsetup.DefaultLogsStreamLifetime) * time.Second
+	}
+	return time.Duration(streamLifetime) * time.Second
+}
+
 // AggregationTimeout is used when performing aggregation operations
 func (l *LogsConfigKeys) aggregationTimeout() time.Duration {
 	return l.getConfig().GetDuration(l.getConfigKey("aggregation_timeout")) * time.Millisecond
diff --git a/comp/logs/agent/config/config_test.go b/comp/logs/agent/config/config_test.go
index ee9049f48d9a..1de098e78668 100644
--- a/comp/logs/agent/config/config_test.go
+++ b/comp/logs/agent/config/config_test.go
@@ -287,7 +287,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsEnvVar() {
 		isReliable:             true,
 	}
 
-	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
+	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
 	endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source")
 
 	suite.Nil(err)
@@ -414,7 +414,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig() {
 		isReliable:             true,
 	}
 
-	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
+	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
 	endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source")
 
 	suite.Nil(err)
@@ -504,7 +504,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig2() {
 		isReliable:             true,
 	}
 
-	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
+	expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize)
 	endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source")
 
 	suite.Nil(err)
diff --git a/comp/logs/agent/config/endpoints.go b/comp/logs/agent/config/endpoints.go
index 6771f20d6d25..238c222c38ff 100644
--- a/comp/logs/agent/config/endpoints.go
+++ b/comp/logs/agent/config/endpoints.go
@@ -343,6 +343,7 @@ type Endpoints struct {
 	Endpoints              []Endpoint
 	UseProto               bool
 	UseHTTP                bool
+	UseGRPC                bool
 	BatchWait              time.Duration
 	BatchMaxConcurrentSend int
 	BatchMaxSize           int
@@ -369,6 +370,23 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool,
 		additionalEndpoints,
 		useProto,
 		useHTTP,
+		false, // useGRPC defaults to false for backward compatibility
+		pkgconfigsetup.DefaultBatchWait,
+		pkgconfigsetup.DefaultBatchMaxConcurrentSend,
+		pkgconfigsetup.DefaultBatchMaxSize,
+		pkgconfigsetup.DefaultBatchMaxContentSize,
+		pkgconfigsetup.DefaultInputChanSize,
+	)
+}
+
+// NewEndpointsWithGRPC returns a new endpoints composite with gRPC support
+func NewEndpointsWithGRPC(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool) *Endpoints {
+	return NewEndpointsWithBatchSettings(
+		main,
+		additionalEndpoints,
+		useProto,
+		useHTTP,
+		useGRPC,
 		pkgconfigsetup.DefaultBatchWait,
 		pkgconfigsetup.DefaultBatchMaxConcurrentSend,
 		pkgconfigsetup.DefaultBatchMaxSize,
@@ -378,12 +396,13 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool,
 }
 
 // NewEndpointsWithBatchSettings returns a new endpoints composite with non-default batching settings specified
-func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints {
+func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints {
 	return &Endpoints{
 		Main:                   main,
 		Endpoints:              append([]Endpoint{main}, additionalEndpoints...),
 		UseProto:               useProto,
 		UseHTTP:                useHTTP,
+		UseGRPC:                useGRPC,
 		BatchWait:              batchWait,
 		BatchMaxConcurrentSend: batchMaxConcurrentSend,
 		BatchMaxSize:           batchMaxSize,
diff --git a/comp/logs/agent/config/endpoints_test.go b/comp/logs/agent/config/endpoints_test.go
index 831e7b52113c..cfdba0590321 100644
--- a/comp/logs/agent/config/endpoints_test.go
+++ b/comp/logs/agent/config/endpoints_test.go
@@ -135,6 +135,24 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPCon
 	suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host)
 }
 
+func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidGRPCConfig() {
+	var endpoints *Endpoints
+	var endpoint Endpoint
+	var err error
+
+	suite.config.SetWithoutSource("logs_config.use_grpc", true)
+
+	endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source")
+	suite.Nil(err)
+	suite.True(endpoints.UseGRPC)
+	suite.False(endpoints.UseHTTP)
+	suite.Equal(endpoints.BatchWait, 5*time.Second)
+
+	endpoint = endpoints.Main
+	suite.True(endpoint.UseSSL())
+	suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host)
+}
+
 func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPConfigAndCompression() {
 	var endpoints *Endpoints
 	var endpoint Endpoint
@@ -259,6 +277,7 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn
 		suite.config.SetWithoutSource("logs_config.force_use_tcp", "false")
 		suite.config.SetWithoutSource("logs_config.use_http", "false")
 		suite.config.SetWithoutSource("logs_config.force_use_http", "false")
+		suite.config.SetWithoutSource("logs_config.use_grpc", "false")
 		suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "")
 		suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{})
 	}
@@ -329,6 +348,19 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn
 		suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "")
 	})
 
+	suite.Run("When use_grpc is true always create gRPC endpoints", func() {
+		defer resetHTTPConfigValuesToFalse()
+		suite.config.SetWithoutSource("logs_config.use_grpc", "true")
+		endpoints, err := BuildEndpoints(suite.config, HTTPConnectivitySuccess, "test-track", "test-proto", "test-source")
+		suite.Nil(err)
+		suite.True(endpoints.UseGRPC)
+		suite.False(endpoints.UseHTTP)
+		endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source")
+		suite.Nil(err)
+		suite.True(endpoints.UseGRPC)
+		suite.False(endpoints.UseHTTP)
+	})
+
 	suite.Run("When additional_endpoints is not empty always create TCP endpoints", func() {
 		defer resetHTTPConfigValuesToFalse()
 		suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{
diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go
index bd96231222bd..835c4ee90a75 100644
--- a/pkg/config/setup/config.go
+++ b/pkg/config/setup/config.go
@@ -118,6 +118,9 @@ const (
 	// DefaultLogsSenderBackoffRecoveryInterval is the default logs sender backoff recovery interval
 	DefaultLogsSenderBackoffRecoveryInterval = 2
 
+	// DefaultLogsStreamLifetime is the default gRPC stream lifetime in seconds (15 minutes)
+	DefaultLogsStreamLifetime = 900
+
 	// maxExternalMetricsProviderChunkSize ensures batch queries are limited in size.
 	maxExternalMetricsProviderChunkSize = 35
 
@@ -2728,6 +2731,7 @@ func bindEnvAndSetLogsConfigKeys(config pkgconfigmodel.Setup, prefix string) {
 	config.BindEnvAndSetDefault(prefix+"sender_backoff_max", DefaultLogsSenderBackoffMax)
 	config.BindEnvAndSetDefault(prefix+"sender_recovery_interval", DefaultForwarderRecoveryInterval)
 	config.BindEnvAndSetDefault(prefix+"sender_recovery_reset", false)
+	config.BindEnvAndSetDefault(prefix+"stream_lifetime", DefaultLogsStreamLifetime)
 	config.BindEnvAndSetDefault(prefix+"use_v2_api", true)
 	config.SetKnown(prefix + "dev_mode_no_ssl") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv'
 }
diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go
index 4f91ef10ffc4..ec40a34361b6 100644
--- a/pkg/logs/message/message.go
+++ b/pkg/logs/message/message.go
@@ -12,6 +12,7 @@ import (
 	"time"
 
 	"github.com/DataDog/datadog-agent/pkg/logs/sources"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
 	"github.com/DataDog/datadog-agent/pkg/util/log"
 )
 
@@ -38,6 +39,8 @@ type Payload struct {
 	Encoding string
 	// The size of the unencoded payload
 	UnencodedSize int
+	// Extra information for Stateful gRPC streaming (batch-level state changes)
+	StatefulExtra any
 }
 
 // NewPayload creates a new payload with the given message metadata, encoded content, encoding type and unencoded size
@@ -70,6 +73,13 @@ type Message struct {
 	MessageMetadata
 }
 
+// StatefulMessage represents a log message for gRPC stateful streaming
+// It contains a Datum (from stateful_encoding.proto) and associated metadata
+type StatefulMessage struct {
+	Datum    *statefulpb.Datum
+	Metadata *MessageMetadata
+}
+
 // MessageMetadata contains metadata information about a log message
 //
 //nolint:revive // exported: ignore package name struct conflict
@@ -125,7 +135,7 @@ type MessageContent struct { //nolint:revive
 	content []byte
 	// structured content
 	structuredContent StructuredContent
-	State             MessageContentState
+	State           MessageContentState
 }
 
 // MessageContentState is used to represent the MessageContent state.
diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go
new file mode 100644
index 000000000000..5d21f7bc81b2
--- /dev/null
+++ b/pkg/logs/patterns/automaton/rules.go
@@ -0,0 +1,485 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package automaton provides terminal rules for token classification.
+package automaton
+
+import (
+	"fmt"
+	"regexp"
+	"sort"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// Priority constants for rule evaluation order
+//
+// Rules are sorted by priority (highest first) and evaluated sequentially until the first match.
+// Priority is based on the specificity of the pattern. The more specific the pattern, the higher the priority.
+// Higher priority = evaluated first = more specific classification.
+const (
+	PriorityHigh   = 3 // Very specific patterns like IPv4, IPv6, Email
+	PriorityMedium = 2 // Structured patterns like URI, Dates, HTTPStatus
+	PriorityLow    = 1 // Generic fallback patterns like Numeric
+)
+
+// TerminalRule represents a classification rule
+type TerminalRule struct {
+	Name        string
+	Pattern     *regexp.Regexp
+	TokenType   token.TokenType
+	Priority    int // Use PriorityHigh/Medium/Low constants - higher values evaluated first
+	Category    string
+	Description string
+	Examples    []string
+}
+
+// RuleCategory represents a grouping of rules
+type RuleCategory struct {
+	Name        string
+	Description string
+	Rules       []*TerminalRule
+}
+
+// RuleManager manages terminal rules
+type RuleManager struct {
+	rules      []*TerminalRule
+	categories map[string]*RuleCategory
+}
+
+// NewRuleManager creates a new rule manager
+func NewRuleManager() *RuleManager {
+	return &RuleManager{
+		rules:      make([]*TerminalRule, 0),
+		categories: make(map[string]*RuleCategory),
+	}
+}
+
+// AddRule adds a new terminal rule
+func (rm *RuleManager) AddRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error {
+	// Check for duplicate rule name
+	if rm.GetRule(name) != nil {
+		return fmt.Errorf("rule '%s' already exists", name)
+	}
+
+	// Compile and validate regex pattern
+	regex, err := regexp.Compile(pattern)
+	if err != nil {
+		return fmt.Errorf("invalid regex pattern '%s': %v", pattern, err)
+	}
+
+	// Validate examples match the pattern
+	for _, example := range examples {
+		if !regex.MatchString(example) {
+			return fmt.Errorf("example '%s' does not match pattern '%s'", example, pattern)
+		}
+	}
+
+	// Create and insert rule
+	rule := &TerminalRule{
+		Name:        name,
+		Pattern:     regex,
+		TokenType:   tokenType,
+		Priority:    priority,
+		Category:    category,
+		Description: description,
+		Examples:    examples,
+	}
+
+	rm.insertRuleByPriority(rule)
+	rm.addToCategory(rule)
+
+	return nil
+}
+
+// RemoveRule removes a rule by name
+func (rm *RuleManager) RemoveRule(name string) bool {
+	for i, rule := range rm.rules {
+		if rule.Name == name {
+			// Remove from rules list
+			rm.rules = append(rm.rules[:i], rm.rules[i+1:]...)
+
+			// Remove from category
+			rm.removeFromCategory(rule)
+			return true
+		}
+	}
+	return false
+}
+
+// ApplyRules applies terminal rules in priority order to classify a token
+// Returns TokenWord if no rule matches (generic word fallback)
+func (rm *RuleManager) ApplyRules(value string) token.TokenType {
+	for _, rule := range rm.rules {
+		if rule.Pattern.MatchString(value) {
+			return rule.TokenType
+		}
+	}
+	return token.TokenWord
+}
+
+// LoadPredefinedRules loads predefined rules
+func (rm *RuleManager) LoadPredefinedRules() error {
+	predefined := GetPredefinedRules()
+
+	for _, rule := range predefined {
+		err := rm.AddRule(
+			rule.Name,
+			rule.Pattern.String(),
+			rule.Category,
+			rule.Description,
+			rule.TokenType,
+			rule.Priority,
+			rule.Examples,
+		)
+		if err != nil {
+			return fmt.Errorf("failed to load rule '%s': %v", rule.Name, err)
+		}
+	}
+
+	return nil
+}
+
+// ================================================
+// Helper methods
+// ================================================
+
+func (rm *RuleManager) insertRuleByPriority(rule *TerminalRule) {
+	// Insert in priority order (higher priority first)
+	inserted := false
+	for i, existing := range rm.rules {
+		if rule.Priority > existing.Priority {
+			// Insert at position i
+			rm.rules = append(rm.rules[:i], append([]*TerminalRule{rule}, rm.rules[i:]...)...)
+			inserted = true
+			break
+		}
+	}
+
+	if !inserted {
+		rm.rules = append(rm.rules, rule)
+	}
+}
+
+func (rm *RuleManager) addToCategory(rule *TerminalRule) {
+	if rm.categories[rule.Category] == nil {
+		rm.categories[rule.Category] = &RuleCategory{
+			Name:        rule.Category,
+			Description: fmt.Sprintf("Rules for %s tokens", rule.Category),
+			Rules:       make([]*TerminalRule, 0),
+		}
+	}
+
+	rm.categories[rule.Category].Rules = append(rm.categories[rule.Category].Rules, rule)
+}
+
+func (rm *RuleManager) removeFromCategory(rule *TerminalRule) {
+	if category, exists := rm.categories[rule.Category]; exists {
+		for i, r := range category.Rules {
+			if r.Name == rule.Name {
+				category.Rules = append(category.Rules[:i], category.Rules[i+1:]...)
+				break
+			}
+		}
+
+		// Remove category if empty
+		if len(category.Rules) == 0 {
+			delete(rm.categories, rule.Category)
+		}
+	}
+}
+
+// GetRule retrieves a rule by name
+func (rm *RuleManager) GetRule(name string) *TerminalRule {
+	for _, rule := range rm.rules {
+		if rule.Name == name {
+			return rule
+		}
+	}
+	return nil
+}
+
+// ListRules returns all rules sorted by priority
+func (rm *RuleManager) ListRules() []*TerminalRule {
+	// Return a copy to prevent external modification
+	result := make([]*TerminalRule, len(rm.rules))
+	copy(result, rm.rules)
+	return result
+}
+
+// GetRulesByCategory returns rules in a specific category
+func (rm *RuleManager) GetRulesByCategory(category string) []*TerminalRule {
+	if cat, exists := rm.categories[category]; exists {
+		result := make([]*TerminalRule, len(cat.Rules))
+		copy(result, cat.Rules)
+		return result
+	}
+	return []*TerminalRule{}
+}
+
+// GetCategories returns all rule categories
+func (rm *RuleManager) GetCategories() []string {
+	categories := make([]string, 0, len(rm.categories))
+	for name := range rm.categories {
+		categories = append(categories, name)
+	}
+	sort.Strings(categories)
+	return categories
+}
+
+// GetRuleStats returns statistics about the rule system
+func (rm *RuleManager) GetRuleStats() RuleStats {
+	stats := RuleStats{
+		TotalRules:  len(rm.rules),
+		Categories:  len(rm.categories),
+		ByCategory:  make(map[string]int),
+		ByTokenType: make(map[token.TokenType]int),
+	}
+
+	for _, rule := range rm.rules {
+		stats.ByCategory[rule.Category]++
+		stats.ByTokenType[rule.TokenType]++
+	}
+
+	return stats
+}
+
+// RuleStats contains statistics about the rule system
+type RuleStats struct {
+	TotalRules  int
+	Categories  int
+	ByCategory  map[string]int
+	ByTokenType map[token.TokenType]int
+}
+
+// GetPredefinedRules returns the standard set of terminal rules
+func GetPredefinedRules() []*TerminalRule {
+	rules := []*TerminalRule{
+
+		// =============================================================================
+		// DATE & TIME PATTERNS (Priority: High to Medium)
+		// Based on multiline aggregation patterns for comprehensive coverage
+		// =============================================================================
+
+		// High Priority - Modern Standards with Timezone Support
+		{
+			Name:        "RFC3339DateTime",
+			Pattern:     regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(Z|[\+\-]\d{2}:?\d{2})?`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityHigh,
+			Category:    "time",
+			Description: "Matches RFC3339 datetime format with timezone",
+			Examples:    []string{"2024-01-15T10:30:45Z", "2024-01-15T10:30:45.123Z", "2024-01-15T10:30:45+02:00"},
+		},
+		{
+			Name:        "RFC3339NanoDateTime",
+			Pattern:     regexp.MustCompile(`^(\d+)-(\d+)-(\d+)([A-Za-z_]+)(\d+):(\d+):(\d+)\.(\d+)([A-Za-z_]+)(\d+):(\d+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityHigh,
+			Category:    "time",
+			Description: "Supplementary pattern from multiline handler for edge-case RFC3339 formats",
+			Examples:    []string{"2024-12-25T14:30:00.123456789Z07:00"},
+		},
+		{
+			Name:        "StandardTimestamp",
+			Pattern:     regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(,\d+)?`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityHigh,
+			Category:    "time",
+			Description: "Matches standard timestamp format with optional milliseconds",
+			Examples:    []string{"2024-01-15 10:30:45", "2024-01-15 10:30:45,123"},
+		},
+
+		// Medium Priority - Legacy RFC Standards
+		{
+			Name:        "RFC1123DateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches RFC1123 datetime format",
+			Examples:    []string{"Mon, 02 Jan 2006 15:04:05 MST", "Wed, 25 Dec 2024 14:30:00 UTC"},
+		},
+		{
+			Name:        "RFC1123ZDateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) (-\d+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Supplementary pattern from multiline handler for RFC1123Z edge cases",
+			Examples:    []string{"Mon, 02 Jan 2006 15:04:05 -0700", "Wed, 25 Dec 2024 14:30:00 -0800"},
+		},
+		{
+			Name:        "RFC850DateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+), (\d+)-([A-Za-z_]+)-(\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches RFC850 datetime format",
+			Examples:    []string{"Monday, 02-Jan-06 15:04:05 MST", "Wednesday, 25-Dec-24 14:30:00 UTC"},
+		},
+		{
+			Name:        "RFC822DateTime",
+			Pattern:     regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) ([A-Za-z_]+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches RFC822 datetime format",
+			Examples:    []string{"02 Jan 06 15:04 MST", "25 Dec 24 14:30 UTC"},
+		},
+		{
+			Name:        "RFC822ZDateTime",
+			Pattern:     regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) (-\d+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Supplementary pattern from multiline handler for RFC822Z edge cases",
+			Examples:    []string{"02 Jan 06 15:04 -0700", "25 Dec 24 14:30 -0800"},
+		},
+
+		// Medium Priority - Unix/System Formats
+		{
+			Name:        "ANSICDateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+) (\d+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches ANSIC datetime format",
+			Examples:    []string{"Mon Jan 2 15:04:05 2006", "Wed Dec 25 14:30:00 2024"},
+		},
+		{
+			Name:        "UnixDateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+)( [A-Za-z_]+ (\d+))?`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches Unix datetime format with optional timezone",
+			Examples:    []string{"Mon Jan 2 15:04:05 2006", "Mon Jan 2 15:04:05 MST 2006"},
+		},
+		{
+			Name:        "RubyDateTime",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([\-\+]\d+) (\d+)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches Ruby datetime format with timezone offset",
+			Examples:    []string{"Mon Jan 02 15:04:05 -0700 2006", "Wed Dec 25 14:30:00 +0200 2024"},
+		},
+
+		// Medium Priority - Application-Specific Formats
+		{
+			Name:        "JavaSimpleFormatter",
+			Pattern:     regexp.MustCompile(`^([A-Za-z_]+) (\d+), (\d{4}) (\d+):(\d+):(\d+) (AM|PM)`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches Java SimpleFormatter date format",
+			Examples:    []string{"January 15, 2024 2:30:45 PM", "December 31, 2023 11:59:59 AM"},
+		},
+		{
+			Name:        "SlashDateTime",
+			Pattern:     regexp.MustCompile(`^(\d{4})/(\d{2})/(\d{2}) (\d{2}):(\d{2}):(\d{2})`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches slash-separated datetime format",
+			Examples:    []string{"2024/01/15 10:30:45", "2024/12/31 23:59:59"},
+		},
+		{
+			Name:        "SimpleDate",
+			Pattern:     regexp.MustCompile(`^(\d{4})-(1[012]|0?[1-9])-([12][0-9]|3[01]|0?[1-9])$`),
+			TokenType:   token.TokenDate,
+			Priority:    PriorityMedium,
+			Category:    "time",
+			Description: "Matches YYYY-MM-DD date format with validation",
+			Examples:    []string{"2024-01-15", "2024-12-31", "2024-02-29"},
+		},
+
+		// =============================================================================
+		// NETWORK PATTERNS (Priority: High)
+		// =============================================================================
+
+		{
+			Name:        "IPv4Address",
+			Pattern:     regexp.MustCompile(`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`),
+			TokenType:   token.TokenIPv4,
+			Priority:    PriorityHigh,
+			Category:    "network",
+			Description: "Matches IPv4 addresses in dotted decimal notation",
+			Examples:    []string{"192.168.1.1", "10.0.0.1", "255.255.255.255", "0.0.0.0"},
+		},
+		{
+			Name:        "IPv6Address",
+			Pattern:     regexp.MustCompile(`^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$`),
+			TokenType:   token.TokenIPv6,
+			Priority:    PriorityHigh,
+			Category:    "network",
+			Description: "Matches basic IPv6 addresses",
+			Examples:    []string{"2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80:0000:0000:0000:0000:0000:0000:0001"},
+		},
+		{
+			Name:        "EmailAddress",
+			Pattern:     regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`),
+			TokenType:   token.TokenEmail,
+			Priority:    PriorityHigh,
+			Category:    "network",
+			Description: "Matches email addresses",
+			Examples:    []string{"user@example.com", "test.email+tag@domain.org", "admin@company.co.uk"},
+		},
+		{
+			Name:        "URI",
+			Pattern:     regexp.MustCompile(`^https?://[^\s]+$`),
+			TokenType:   token.TokenURI,
+			Priority:    PriorityMedium,
+			Category:    "network",
+			Description: "Matches HTTP and HTTPS URIs",
+			Examples:    []string{"http://example.com", "https://api.domain.com/v1/users", "https://cdn.example.org/assets/style.css"},
+		},
+
+		// =============================================================================
+		// HTTP PATTERNS (Priority: Medium)
+		// =============================================================================
+
+		{
+			Name:        "HTTPStatus",
+			Pattern:     regexp.MustCompile(`^[1-5][0-9][0-9]$`),
+			TokenType:   token.TokenHTTPStatus,
+			Priority:    PriorityMedium,
+			Category:    "http",
+			Description: "Matches HTTP status codes",
+			Examples:    []string{"200", "404", "500", "301", "403"},
+		},
+
+		// =============================================================================
+		// FILESYSTEM PATTERNS (Priority: Medium)
+		// =============================================================================
+
+		{
+			Name:        "AbsolutePath",
+			Pattern:     regexp.MustCompile(`^/[^\s]+$`),
+			TokenType:   token.TokenAbsolutePath,
+			Priority:    PriorityMedium,
+			Category:    "filesystem",
+			Description: "Matches absolute file/URL paths",
+			Examples:    []string{"/api/users", "/var/log/app.log", "/home/user/documents"},
+		},
+
+		// =============================================================================
+		// NUMERIC PATTERNS (Priority: Low - Fallback)
+		// =============================================================================
+
+		{
+			Name:        "Numeric",
+			Pattern:     regexp.MustCompile(`^\d+$`),
+			TokenType:   token.TokenNumeric,
+			Priority:    PriorityLow,
+			Category:    "numeric",
+			Description: "Matches pure numeric values",
+			Examples:    []string{"123", "0", "999999", "42"},
+		},
+	}
+
+	return rules
+}
diff --git a/pkg/logs/patterns/automaton/rules_test.go b/pkg/logs/patterns/automaton/rules_test.go
new file mode 100644
index 000000000000..78f70c5abf90
--- /dev/null
+++ b/pkg/logs/patterns/automaton/rules_test.go
@@ -0,0 +1,287 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package automaton
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// TestNewRuleManager tests the creation of a new rule manager
+func TestNewRuleManager(t *testing.T) {
+	rm := NewRuleManager()
+
+	assert.NotNil(t, rm.rules, "Expected rules slice to be initialized")
+	assert.NotNil(t, rm.categories, "Expected categories map to be initialized")
+	assert.Equal(t, 0, len(rm.rules), "Expected empty rules slice")
+}
+
+// TestRuleManager_AddRule tests the addition of a new rule
+func TestRuleManager_AddRule(t *testing.T) {
+	rm := NewRuleManager()
+
+	err := rm.AddRule(
+		"TestIPv4",
+		`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`,
+		"network",
+		"Test IPv4 pattern",
+		token.TokenIPv4,
+		100,
+		[]string{"192.168.1.1", "10.0.0.1"},
+	)
+
+	assert.NoError(t, err, "Failed to add rule")
+	assert.Equal(t, 1, len(rm.rules), "Expected 1 rule")
+
+	rule := rm.rules[0]
+	assert.Equal(t, "TestIPv4", rule.Name, "Expected rule name 'TestIPv4'")
+	assert.Equal(t, token.TokenIPv4, rule.TokenType, "Expected token type TokenIPv4")
+	assert.Equal(t, 100, rule.Priority, "Expected priority 100")
+	assert.Equal(t, "network", rule.Category, "Expected category 'network'")
+}
+
+// TestRuleManager_AddRule_InvalidPattern tests the addition of a new rule with an invalid regex pattern
+func TestRuleManager_AddRule_InvalidPattern(t *testing.T) {
+	rm := NewRuleManager()
+
+	err := rm.AddRule(
+		"BadRule",
+		`[invalid(regex`,
+		"test",
+		"Invalid regex",
+		token.TokenWord,
+		50,
+		[]string{},
+	)
+
+	assert.Error(t, err, "Expected error for invalid regex pattern")
+}
+
+// TestRuleManager_AddRule_InvalidExample tests the addition of a new rule with an invalid example
+func TestRuleManager_AddRule_InvalidExample(t *testing.T) {
+	rm := NewRuleManager()
+
+	err := rm.AddRule(
+		"TestRule",
+		`^\d+$`,
+		"test",
+		"Numeric pattern",
+		token.TokenNumeric,
+		50,
+		[]string{"123", "abc"}, // "abc" doesn't match ^\d+$
+	)
+
+	assert.Error(t, err, "Expected error for example that doesn't match pattern")
+}
+
+// TestRuleManager_AddRule_Duplicate tests the addition of a duplicate rule
+func TestRuleManager_AddRule_Duplicate(t *testing.T) {
+	rm := NewRuleManager()
+
+	// Add first rule
+	err := rm.AddRule("TestRule", `^\d+$`, "test", "Numeric", token.TokenNumeric, 50, []string{"123"})
+	assert.NoError(t, err, "Failed to add first rule")
+
+	// Try to add duplicate rule
+	err = rm.AddRule("TestRule", `^[a-z]+$`, "test", "Alpha", token.TokenWord, 50, []string{"abc"})
+	assert.Error(t, err, "Expected error when adding duplicate rule name")
+	assert.Contains(t, err.Error(), "already exists", "Expected 'already exists' error")
+}
+
+// TestRuleManager_RemoveRule tests the removal of a rule
+func TestRuleManager_RemoveRule(t *testing.T) {
+	rm := NewRuleManager()
+
+	// Add a rule first
+	rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"})
+
+	assert.Equal(t, 1, len(rm.rules), "Expected 1 rule before removal")
+
+	// Remove the rule
+	removed := rm.RemoveRule("TestRule")
+	assert.True(t, removed, "Expected RemoveRule to return true")
+	assert.Equal(t, 0, len(rm.rules), "Expected 0 rules after removal")
+
+	// Try to remove non-existent rule
+	removed = rm.RemoveRule("NonExistent")
+	assert.False(t, removed, "Expected RemoveRule to return false for non-existent rule")
+}
+
+// TestRuleManager_GetRule tests the retrieval of a rule by name
+func TestRuleManager_GetRule(t *testing.T) {
+	rm := NewRuleManager()
+
+	rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"})
+
+	rule := rm.GetRule("TestRule")
+	assert.NotNil(t, rule, "Expected to find rule 'TestRule'")
+	if rule != nil {
+		assert.Equal(t, "TestRule", rule.Name, "Expected rule name 'TestRule'")
+	}
+
+	notFound := rm.GetRule("NonExistent")
+	assert.Nil(t, notFound, "Expected nil for non-existent rule")
+}
+
+// TestRuleManager_PriorityOrdering tests the ordering of rules by priority
+func TestRuleManager_PriorityOrdering(t *testing.T) {
+	rm := NewRuleManager()
+
+	// Add rules in different priority order
+	rm.AddRule("Low", `low`, "test", "Low priority", token.TokenWord, 10, []string{"low"})
+	rm.AddRule("High", `high`, "test", "High priority", token.TokenWord, 100, []string{"high"})
+	rm.AddRule("Medium", `medium`, "test", "Medium priority", token.TokenWord, 50, []string{"medium"})
+
+	rules := rm.ListRules()
+	assert.Equal(t, 3, len(rules), "Expected 3 rules")
+
+	// Should be ordered by priority (highest first)
+	expectedOrder := []string{"High", "Medium", "Low"}
+	expectedPriorities := []int{100, 50, 10}
+
+	for i, rule := range rules {
+		assert.Equal(t, expectedOrder[i], rule.Name, "Rule %d name mismatch", i)
+		assert.Equal(t, expectedPriorities[i], rule.Priority, "Rule %d priority mismatch", i)
+	}
+}
+
+// TestRuleManager_ApplyRules tests the application of rules to a value
+func TestRuleManager_ApplyRules(t *testing.T) {
+	rm := NewRuleManager()
+
+	// Add rules with different priorities
+	rm.AddRule("IPv4", `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`,
+		"network", "IPv4", token.TokenIPv4, 100, []string{"192.168.1.1"})
+	rm.AddRule("Numeric", `^\d+$`, "numeric", "Numbers", token.TokenNumeric, 30, []string{"123"})
+
+	tests := []struct {
+		input    string
+		expected token.TokenType
+	}{
+		{"192.168.1.1", token.TokenIPv4}, // Higher priority rule should match
+		{"123", token.TokenNumeric},
+		{"999.999.999.999", token.TokenWord}, // Invalid IPv4, no rule matches - generic word
+		{"abc", token.TokenWord},             // No rule matches - generic word
+	}
+
+	for _, test := range tests {
+		result := rm.ApplyRules(test.input)
+		assert.Equal(t, test.expected, result, "ApplyRules('%s') mismatch", test.input)
+	}
+}
+
+// TestRuleManager_GetRulesByCategory tests the retrieval of rules by category
+func TestRuleManager_GetRulesByCategory(t *testing.T) {
+	rm := NewRuleManager()
+
+	rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"})
+	rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"})
+	rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"})
+
+	networkRules := rm.GetRulesByCategory("network")
+	assert.Equal(t, 2, len(networkRules), "Expected 2 network rules")
+
+	numericRules := rm.GetRulesByCategory("numeric")
+	assert.Equal(t, 1, len(numericRules), "Expected 1 numeric rule")
+
+	emptyRules := rm.GetRulesByCategory("nonexistent")
+	assert.Equal(t, 0, len(emptyRules), "Expected 0 rules for nonexistent category")
+}
+
+// TestRuleManager_GetCategories tests the retrieval of categories
+func TestRuleManager_GetCategories(t *testing.T) {
+	rm := NewRuleManager()
+
+	rm.AddRule("Rule1", `r1`, "network", "Rule 1", token.TokenWord, 50, []string{"r1"})
+	rm.AddRule("Rule2", `r2`, "time", "Rule 2", token.TokenWord, 50, []string{"r2"})
+	rm.AddRule("Rule3", `r3`, "network", "Rule 3", token.TokenWord, 50, []string{"r3"})
+
+	categories := rm.GetCategories()
+	assert.Equal(t, 2, len(categories), "Expected 2 categories")
+
+	// Categories should be sorted
+	expectedCategories := []string{"network", "time"}
+	for i, expected := range expectedCategories {
+		if assert.Less(t, i, len(categories), "Category %d should exist", i) {
+			assert.Equal(t, expected, categories[i], "Expected category %d to be '%s'", i, expected)
+		}
+	}
+}
+
+// TestRuleManager_GetRuleStats tests the retrieval of rule statistics
+func TestRuleManager_GetRuleStats(t *testing.T) {
+	rm := NewRuleManager()
+
+	rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"})
+	rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"})
+	rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"})
+
+	stats := rm.GetRuleStats()
+
+	assert.Equal(t, 3, stats.TotalRules, "Expected TotalRules=3")
+	assert.Equal(t, 2, stats.Categories, "Expected Categories=2")
+	assert.Equal(t, 2, stats.ByCategory["network"], "Expected 2 network rules")
+	assert.Equal(t, 1, stats.ByCategory["numeric"], "Expected 1 numeric rule")
+	assert.Equal(t, 1, stats.ByTokenType[token.TokenIPv4], "Expected 1 IPv4 token rule")
+}
+
+// TestGetPredefinedRules tests the retrieval of predefined rules
+func TestGetPredefinedRules(t *testing.T) {
+	rules := GetPredefinedRules()
+
+	assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be non-empty")
+
+	// Check that we have the expected rule types
+	foundRules := make(map[string]bool)
+	for _, rule := range rules {
+		foundRules[rule.Name] = true
+
+		// Validate rule structure
+		assert.NotNil(t, rule.Pattern, "Rule '%s' has nil pattern", rule.Name)
+		assert.NotEqual(t, "", rule.Name, "Found rule with empty name")
+		assert.NotEqual(t, "", rule.Category, "Rule '%s' has empty category", rule.Name)
+		assert.NotEqual(t, 0, len(rule.Examples), "Rule '%s' has no examples", rule.Name)
+
+		// Test examples against pattern
+		for _, example := range rule.Examples {
+			assert.True(t, rule.Pattern.MatchString(example),
+				"Rule '%s': example '%s' doesn't match pattern", rule.Name, example)
+		}
+	}
+
+	expectedRules := []string{"IPv4Address", "EmailAddress", "URI", "HTTPStatus", "Numeric"}
+	for _, expected := range expectedRules {
+		assert.True(t, foundRules[expected], "Expected predefined rule '%s' not found", expected)
+	}
+}
+
+// TestRuleManager_LoadPredefinedRules tests the loading of predefined rules
+func TestRuleManager_LoadPredefinedRules(t *testing.T) {
+	rm := NewRuleManager()
+
+	err := rm.LoadPredefinedRules()
+	assert.NoError(t, err, "Failed to load predefined rules")
+
+	rules := rm.ListRules()
+	assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be loaded")
+
+	// Verify some key rules exist
+	ipv4Rule := rm.GetRule("IPv4Address")
+	assert.NotNil(t, ipv4Rule, "Expected IPv4Address rule to be loaded")
+
+	emailRule := rm.GetRule("EmailAddress")
+	assert.NotNil(t, emailRule, "Expected EmailAddress rule to be loaded")
+
+	// Test that rules are working
+	result := rm.ApplyRules("192.168.1.1")
+	assert.Equal(t, token.TokenIPv4, result, "Expected IPv4 token for '192.168.1.1'")
+
+	result = rm.ApplyRules("test@example.com")
+	assert.Equal(t, token.TokenEmail, result, "Expected Email token for 'test@example.com'")
+}
diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go
new file mode 100644
index 000000000000..ab9f0b514965
--- /dev/null
+++ b/pkg/logs/patterns/automaton/tokenizer.go
@@ -0,0 +1,312 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package automaton provides log message tokenization using finite state automaton
+// and pattern matching for semantic token classification.
+package automaton
+
+import (
+	"fmt"
+	"unicode"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+)
+
+// TokenizerState represents the current state of the FSA
+type TokenizerState int
+
+const (
+	StateStart      TokenizerState = iota // StateStart is the initial state
+	StateWord                             // StateWord is letters, digits, and common separators for structured tokens
+	StateNumeric                          // StateNumeric is pure numbers
+	StateWhitespace                       // StateWhitespace is spaces, tabs, newlines
+	StateSpecial                          // StateSpecial is operators, punctuation, symbols
+)
+
+const (
+	// These numbers could be ran with some more testing on more log samples to optimize these values.
+	// tokenizerBufferCapacity is the initial capacity for the rune buffer.
+	tokenizerBufferCapacity = 128
+
+	// tokenizerTokensCapacity is the initial capacity for the tokens slice.
+	tokenizerTokensCapacity = 24
+)
+
+// Tokenizer implements a finite state automaton for log tokenization
+type Tokenizer struct {
+	input  string
+	pos    int
+	length int
+	state  TokenizerState
+	buffer []rune
+	tokens []token.Token
+}
+
+// NewTokenizer creates a new tokenizer for the given input
+func NewTokenizer(input string) *Tokenizer {
+	return &Tokenizer{
+		input:  input,
+		pos:    0,
+		length: len(input),
+		state:  StateStart,
+		buffer: make([]rune, 0, tokenizerBufferCapacity),
+		tokens: make([]token.Token, 0, tokenizerTokensCapacity),
+	}
+}
+
+// Tokenize processes the input string and returns a TokenList
+func (t *Tokenizer) Tokenize() *token.TokenList {
+	for t.pos < t.length {
+		if !t.processNextToken() {
+			break
+		}
+	}
+
+	t.handleLastToken()
+	t.classifyTokens()
+
+	return token.NewTokenListWithTokens(t.tokens)
+}
+
+// classifyTokens upgrades generic tokens to specific types.
+// The FSA first creates generic tokens (TokenWord, TokenNumeric), then this function uses
+// pattern matching to identify structured types:
+//   - "192.168.1.1" → TokenNumeric upgraded to TokenIPv4
+//   - "user@example.com" → TokenWord upgraded to TokenEmail
+//   - "GET" → TokenWord upgraded to TokenHTTPMethod
+func (t *Tokenizer) classifyTokens() {
+	for i, tok := range t.tokens {
+		// Skip if not eligible for classification
+		if !t.shouldClassify(&tok) {
+			continue
+		}
+
+		// identify specific structured types (IP, Email, Date, HTTP, etc.)
+		// fallback to word token if can't upgrade to specific type
+		classifiedType, err := t.classifyToken(tok.Value)
+		if err != nil {
+			log.Warnf("Failed to classify token '%s': %v. Falling back to word token type", tok.Value, err)
+			continue
+		}
+
+		// fallback to word token if can't upgrade to specific type
+		if classifiedType == token.TokenWord {
+			continue
+		}
+
+		// Upgrade token to the more specific type
+		t.tokens[i].Type = classifiedType
+		t.tokens[i].Wildcard = getWildcardPotential(classifiedType)
+	}
+}
+
+// shouldClassify determines if a token is eligible for pattern-based classification.
+// Returns true only for generic Word/Numeric tokens that are PotentialWildcard.
+// Excludes: whitespace, punctuation (NotWildcard)
+func (t *Tokenizer) shouldClassify(tok *token.Token) bool {
+	isGenericType := tok.Type == token.TokenWord || tok.Type == token.TokenNumeric
+	canVary := tok.Wildcard != token.NotWildcard
+
+	return isGenericType && canVary
+}
+
+// processNextToken advances the automaton by one token
+func (t *Tokenizer) processNextToken() bool {
+	if t.pos >= t.length {
+		return false
+	}
+
+	char := rune(t.input[t.pos])
+
+	switch t.state {
+	case StateStart:
+		return t.handleStartState(char)
+	case StateWord:
+		return t.handleWordState(char)
+	case StateNumeric:
+		return t.handleNumericState(char)
+	case StateWhitespace:
+		return t.handleWhitespaceState(char)
+	case StateSpecial:
+		return t.handleSpecialState(char)
+	default:
+		return t.handleStartState(char) // Fallback
+	}
+}
+
+// handleStartState determines initial state based on character type
+func (t *Tokenizer) handleStartState(char rune) bool {
+	switch {
+	case unicode.IsSpace(char):
+		t.setState(StateWhitespace)
+	case unicode.IsDigit(char):
+		t.setState(StateNumeric)
+	case unicode.IsLetter(char) || char == '/':
+		t.setState(StateWord)
+	default:
+		t.setState(StateSpecial)
+	}
+
+	t.addToBuffer(char)
+	t.pos++
+	return true
+}
+
+// handleWordState processes word tokens
+func (t *Tokenizer) handleWordState(char rune) bool {
+	if unicode.IsLetter(char) || unicode.IsDigit(char) || char == '_' || char == '-' ||
+		char == '.' || char == '@' || char == '/' ||
+		(char == ':' && t.isURLScheme()) {
+		t.addToBuffer(char)
+		t.pos++
+		return true
+	}
+
+	t.createWordToken()
+	t.setState(StateStart)
+	return true
+}
+
+// handleNumericState processes numeric tokens
+// Allows digits and special chars for dates (2024-01-15), times (10:30:45) or IPs (192.168.1.1)
+func (t *Tokenizer) handleNumericState(char rune) bool {
+	switch {
+	case unicode.IsDigit(char), char == '.', char == '-', char == '/', char == ':':
+		t.addToBuffer(char)
+		t.pos++
+		return true
+	default:
+		t.createNumericToken()
+		t.setState(StateStart)
+		return true
+	}
+}
+
+// handleWhitespaceState processes whitespace
+func (t *Tokenizer) handleWhitespaceState(char rune) bool {
+	switch {
+	case unicode.IsSpace(char):
+		t.addToBuffer(char)
+		t.pos++
+		return true
+	default:
+		t.createWhitespaceToken()
+		t.setState(StateStart)
+		return true
+	}
+}
+
+// handleSpecialState processes special characters
+func (t *Tokenizer) handleSpecialState(_ rune) bool {
+	// The special character is already in buffer from handleStartState
+	// Just create the token and reset state
+	t.createSpecialToken()
+	t.setState(StateStart)
+	return true
+}
+
+// classifyToken attempts to classify a single token's type using trie and terminal rules.
+func (t *Tokenizer) classifyToken(value string) (token.TokenType, error) {
+	if len(value) == 0 {
+		return token.TokenUnknown, fmt.Errorf("cannot classify empty string token value")
+	}
+	return globalTrie.Match(value), nil
+}
+
+// getWildcardPotential determines if a token type can potentially become a wildcard
+// Returns either NotWildcard (0%) or PotentialWildcard (50%)
+// Note: IsWildcard (100%) is only set during pattern merging, never during tokenization
+func getWildcardPotential(tokenType token.TokenType) token.WildcardStatus {
+	// Only whitespace cannot become a wildcard
+	if tokenType == token.TokenWhitespace {
+		return token.NotWildcard
+	}
+
+	// Everything else can potentially become wildcards
+	return token.PotentialWildcard
+}
+
+// ================================================
+// Helper functions
+// ================================================
+
+// isURLScheme checks if current buffer looks like a URL scheme
+func (t *Tokenizer) isURLScheme() bool {
+	buffer := string(t.buffer)
+	return buffer == "http" || buffer == "https"
+}
+
+// State management helpers
+
+func (t *Tokenizer) setState(newState TokenizerState) {
+	t.state = newState
+}
+
+func (t *Tokenizer) addToBuffer(char rune) {
+	t.buffer = append(t.buffer, char)
+}
+
+func (t *Tokenizer) clearBuffer() {
+	t.buffer = t.buffer[:0] // Keep capacity, reset length
+}
+
+func (t *Tokenizer) bufferToString() string {
+	return string(t.buffer)
+}
+
+func (t *Tokenizer) handleLastToken() {
+	if len(t.buffer) > 0 {
+		// Create token from remaining buffer content based on current state
+		switch t.state {
+		case StateNumeric:
+			t.createNumericToken()
+		case StateWhitespace:
+			t.createWhitespaceToken()
+		case StateSpecial:
+			t.createSpecialToken()
+		default:
+			t.createWordToken()
+		}
+	}
+}
+
+// ================================================
+// Token creation methods
+// ================================================
+
+func (t *Tokenizer) createWordToken() {
+	value := t.bufferToString()
+	// Create as basic Word type - classification happens later in classifyTokens()
+	tok := token.NewToken(token.TokenWord, value, token.PotentialWildcard)
+	t.tokens = append(t.tokens, tok)
+	t.clearBuffer()
+}
+
+func (t *Tokenizer) createNumericToken() {
+	value := t.bufferToString()
+	// Numeric tokens are potential wildcards - will be classified later
+	tok := token.NewToken(token.TokenNumeric, value, token.PotentialWildcard)
+	t.tokens = append(t.tokens, tok)
+	t.clearBuffer()
+}
+
+func (t *Tokenizer) createWhitespaceToken() {
+	// Normalize all whitespace (tabs, spaces, newlines, multiple spaces) to single space
+	value := " "
+	// Whitespace never becomes wildcard
+	tok := token.NewToken(token.TokenWhitespace, value, token.NotWildcard)
+	t.tokens = append(t.tokens, tok)
+	t.clearBuffer()
+}
+
+func (t *Tokenizer) createSpecialToken() {
+	value := t.bufferToString()
+	// Special characters (punctuation, symbols) should not wildcard - only merge if identical
+	// Examples: ":", "[", "@" - structural markers that must stay consistent
+	tok := token.NewToken(token.TokenWord, value, token.NotWildcard)
+	t.tokens = append(t.tokens, tok)
+	t.clearBuffer()
+}
diff --git a/pkg/logs/patterns/automaton/tokenizer_test.go b/pkg/logs/patterns/automaton/tokenizer_test.go
new file mode 100644
index 000000000000..5fdf716836bb
--- /dev/null
+++ b/pkg/logs/patterns/automaton/tokenizer_test.go
@@ -0,0 +1,460 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package automaton
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// TestTokenizer_SimpleTokenization tests basic tokenization and type classification
+func TestTokenizer_SimpleTokenization(t *testing.T) {
+	input := "GET /api 200"
+	tokenizer := NewTokenizer(input)
+	tokenList := tokenizer.Tokenize()
+
+	assert.NotEqual(t, 0, tokenList.Length(), "Expected tokens, got empty list")
+
+	// Should have: GET, whitespace, /api, whitespace, 200
+	assert.Equal(t, 5, tokenList.Length(), "Expected 5 tokens")
+
+	// Verify token types
+	expectedTypes := []token.TokenType{
+		token.TokenHTTPMethod,   // GET
+		token.TokenWhitespace,   // space
+		token.TokenAbsolutePath, // /api
+		token.TokenWhitespace,   // space
+		token.TokenHTTPStatus,   // 200
+	}
+
+	for i, expected := range expectedTypes {
+		if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) {
+			assert.Equal(t, expected, tokenList.Tokens[i].Type,
+				"Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected)
+		}
+	}
+}
+
+// TestTokenizer_StateTransitions tests the state transitions of the tokenizer
+func TestTokenizer_StateTransitions(t *testing.T) {
+	tests := []struct {
+		input          string
+		expectedStates []TokenizerState
+		description    string
+	}{
+		{"GET", []TokenizerState{StateStart, StateWord}, "Simple word"},
+		{"123", []TokenizerState{StateStart, StateNumeric}, "Simple numeric"},
+		{" ", []TokenizerState{StateStart, StateWhitespace}, "Single whitespace"},
+		{"/path", []TokenizerState{StateStart, StateWord}, "Path starts as word character"},
+		{"192.168.1.1", []TokenizerState{StateStart, StateNumeric}, "IPv4 stays in numeric state initially"},
+	}
+
+	for _, test := range tests {
+		tokenizer := NewTokenizer(test.input)
+
+		// Capture state transitions
+		var states []TokenizerState
+		states = append(states, tokenizer.state)
+
+		for tokenizer.pos < tokenizer.length {
+			if !tokenizer.processNextToken() {
+				break
+			}
+			states = append(states, tokenizer.state)
+		}
+
+		// For simple cases, check exact sequence
+		if test.input != "192.168.1.1" {
+			assert.GreaterOrEqual(t, len(states), len(test.expectedStates),
+				"Input '%s' (%s): expected at least %d states", test.input, test.description, len(test.expectedStates))
+
+			// Check that expected states appear in sequence
+			for i, expected := range test.expectedStates {
+				if assert.Less(t, i, len(states), "State %d should exist for input '%s'", i, test.input) {
+					assert.Equal(t, expected, states[i],
+						"Input '%s' (%s): expected state %d to be %v", test.input, test.description, i, expected)
+				}
+			}
+		} else {
+			// For IPv4 with simplified FSA, check basic state transitions
+			hasStart := false
+			hasNumeric := false
+
+			for _, state := range states {
+				switch state {
+				case StateStart:
+					hasStart = true
+				case StateNumeric:
+					hasNumeric = true
+				}
+			}
+
+			assert.True(t, hasStart, "IPv4 test: expected to see StateStart")
+			assert.True(t, hasNumeric, "IPv4 test: expected to see StateNumeric")
+		}
+	}
+}
+
+// TestTokenTypePreservation tests that TokenNumeric stays TokenNumeric when no pattern matches
+// This is critical: classification should upgrade OR preserve, never downgrade
+func TestTokenTypePreservation(t *testing.T) {
+	// Test that generic number stays TokenNumeric (not downgraded to TokenWord)
+	tokenList := TokenizeString("User 12345 logged in")
+
+	// Find the numeric token
+	var numericToken *token.Token
+	for i := range tokenList.Tokens {
+		if tokenList.Tokens[i].Value == "12345" {
+			numericToken = &tokenList.Tokens[i]
+			break
+		}
+	}
+
+	assert.NotNil(t, numericToken, "Expected to find numeric token '12345'")
+
+	// Should stay TokenNumeric, not become TokenWord
+	if numericToken != nil {
+		assert.Equal(t, token.TokenNumeric, numericToken.Type,
+			"Token '12345' should stay TokenNumeric")
+	}
+
+	// Test that numeric upgrades when pattern matches
+	tokenList = TokenizeString("User 192.168.1.1 logged in")
+
+	// Find the IP token
+	var ipToken *token.Token
+	for i := range tokenList.Tokens {
+		if tokenList.Tokens[i].Value == "192.168.1.1" {
+			ipToken = &tokenList.Tokens[i]
+			break
+		}
+	}
+
+	assert.NotNil(t, ipToken, "Expected to find IP token '192.168.1.1'")
+
+	// Should be upgraded to TokenIPv4
+	if ipToken != nil {
+		assert.Equal(t, token.TokenIPv4, ipToken.Type,
+			"Token '192.168.1.1' should be TokenIPv4")
+	}
+}
+
+// TestWildcardStatus tests that tokens are correctly marked as NotWildcard or PotentialWildcard
+func TestWildcardStatus(t *testing.T) {
+	tests := []struct {
+		input            string
+		tokenValue       string
+		expectedWildcard token.WildcardStatus
+		description      string
+	}{
+		{" ", " ", token.NotWildcard, "Whitespace should be NotWildcard"},
+		{":", ":", token.NotWildcard, "Punctuation should be NotWildcard"},
+		{"hello", "hello", token.PotentialWildcard, "Generic word should be PotentialWildcard"},
+		{"12345", "12345", token.PotentialWildcard, "Generic number should be PotentialWildcard"},
+		{"INFO User logged in", "INFO", token.PotentialWildcard, "Severity level should be PotentialWildcard"},
+	}
+
+	for _, test := range tests {
+		t.Run(test.description, func(t *testing.T) {
+			tokenList := TokenizeString(test.input)
+
+			var targetToken *token.Token
+			for i := range tokenList.Tokens {
+				if tokenList.Tokens[i].Value == test.tokenValue {
+					targetToken = &tokenList.Tokens[i]
+					break
+				}
+			}
+
+			assert.NotNil(t, targetToken, "Expected to find token '%s'", test.tokenValue)
+
+			if targetToken != nil {
+				assert.Equal(t, test.expectedWildcard, targetToken.Wildcard, test.description)
+			}
+		})
+	}
+}
+
+// Test the complete data flow
+func TestArchitectureCompliance(t *testing.T) {
+	// Test the exact call graph
+	// automaton.TokenizeString → NewTokenizer → Tokenizer.Tokenize → processNextToken → classifyToken → globalTrie.Match
+
+	input := "GET /api/users 200"
+
+	// Step 1: automaton.TokenizeString (main entry point)
+	tokenList := TokenizeString(input)
+
+	// Verify TokenList creation
+	assert.NotNil(t, tokenList, "TokenizeString returned nil")
+
+	// Step 2: Verify token classification used globalTrie.Match
+	var httpMethod, httpStatus, path *token.Token
+
+	for i := range tokenList.Tokens {
+		switch tokenList.Tokens[i].Type {
+		case token.TokenHTTPMethod:
+			httpMethod = &tokenList.Tokens[i]
+		case token.TokenHTTPStatus:
+			httpStatus = &tokenList.Tokens[i]
+		case token.TokenAbsolutePath:
+			path = &tokenList.Tokens[i]
+		}
+	}
+
+	if assert.NotNil(t, httpMethod, "HTTP method token not found - trie classification failed") {
+		assert.Equal(t, "GET", httpMethod.Value, "Expected HTTP method 'GET'")
+	}
+
+	if assert.NotNil(t, httpStatus, "HTTP status token not found - trie classification failed") {
+		assert.Equal(t, "200", httpStatus.Value, "Expected HTTP status '200'")
+	}
+
+	if assert.NotNil(t, path, "Path token not found - state machine failed") {
+		assert.Equal(t, "/api/users", path.Value, "Expected path '/api/users'")
+	}
+
+	// Step 3: Verify signature generation works
+	signature := token.NewSignature(tokenList)
+	assert.False(t, signature.IsEmpty(), "Signature generation failed")
+
+	expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus"
+	assert.Equal(t, expectedPosition, signature.Position, "Signature position mismatch")
+}
+
+// TestComplexLogScenarios tests complex log scenarios
+func TestComplexLogScenarios(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected []token.TokenType
+	}{
+		{
+			name:  "HTTP Request",
+			input: "GET /api/users 200",
+			expected: []token.TokenType{
+				token.TokenHTTPMethod, token.TokenWhitespace,
+				token.TokenAbsolutePath, token.TokenWhitespace,
+				token.TokenHTTPStatus,
+			},
+		},
+		{
+			name:  "Error Message",
+			input: "ERROR Database connection failed",
+			expected: []token.TokenType{
+				token.TokenSeverityLevel, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenWord,
+			},
+		},
+		{
+			name:  "User Login",
+			input: "INFO User 12345 logged in",
+			expected: []token.TokenType{
+				token.TokenSeverityLevel, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenNumeric, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenWord,
+			},
+		},
+		{
+			name:  "Complex with Email and IP",
+			input: "user@domain.com from 192.168.1.1",
+			expected: []token.TokenType{
+				token.TokenEmail, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenIPv4,
+			},
+		},
+		{
+			name:  "URL with Scheme",
+			input: "Visit https://example.com/docs",
+			expected: []token.TokenType{
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenURI,
+			},
+		},
+		{
+			name:  "Date in Context",
+			input: "Event on 2024-01-15",
+			expected: []token.TokenType{
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenWord, token.TokenWhitespace,
+				token.TokenDate,
+			},
+		},
+		{
+			name:  "False Positive - Single @ is not Email",
+			input: "Price @ $10 each",
+			expected: []token.TokenType{
+				token.TokenWord,       // Price
+				token.TokenWhitespace, // space
+				token.TokenWord,       // @
+				token.TokenWhitespace, // space
+				token.TokenWord,       // $
+				token.TokenNumeric,    // 10
+				token.TokenWhitespace, // space
+				token.TokenWord,       // each
+			},
+		},
+		{
+			name:  "False Positive - Division operator is not Path",
+			input: "Calculate 10 / 2 = 5",
+			expected: []token.TokenType{
+				token.TokenWord,       // Calculate
+				token.TokenWhitespace, // space
+				token.TokenNumeric,    // 10
+				token.TokenWhitespace, // space
+				token.TokenWord,       // /
+				token.TokenWhitespace, // space
+				token.TokenNumeric,    // 2
+				token.TokenWhitespace, // space
+				token.TokenWord,       // =
+				token.TokenWhitespace, // space
+				token.TokenNumeric,    // 5
+			},
+		},
+		{
+			name:  "False Positive - Phone number is not Date",
+			input: "Phone: 123-456-7890",
+			expected: []token.TokenType{
+				token.TokenWord,       // Phone
+				token.TokenWord,       // :
+				token.TokenWhitespace, // space
+				token.TokenNumeric,    // 123-456-7890 stays numeric, not date
+			},
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			tokenList := TokenizeString(test.input)
+
+			assert.Equal(t, len(test.expected), tokenList.Length(),
+				"Expected %d tokens, got: %v", len(test.expected), tokenTypesToString(tokenList.Tokens))
+
+			for i, expected := range test.expected {
+				if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) {
+					assert.Equal(t, expected, tokenList.Tokens[i].Type,
+						"Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected)
+				}
+			}
+		})
+	}
+}
+
+// TestWhitespaceNormalization tests that all whitespace types are normalized to single space
+func TestWhitespaceNormalization(t *testing.T) {
+	tests := []struct {
+		name     string
+		input    string
+		expected string
+	}{
+		{
+			name:     "Single space",
+			input:    "Error: message",
+			expected: " ",
+		},
+		{
+			name:     "Tab character",
+			input:    "Error:\tmessage",
+			expected: " ",
+		},
+		{
+			name:     "Multiple spaces",
+			input:    "Error:  message",
+			expected: " ",
+		},
+		{
+			name:     "Multiple tabs",
+			input:    "Error:\t\tmessage",
+			expected: " ",
+		},
+		{
+			name:     "Mixed tabs and spaces",
+			input:    "Error: \t message",
+			expected: " ",
+		},
+		{
+			name:     "Newline",
+			input:    "Error:\nmessage",
+			expected: " ",
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			tokenList := TokenizeString(test.input)
+
+			// Find whitespace token
+			var whitespaceToken *token.Token
+			for i := range tokenList.Tokens {
+				if tokenList.Tokens[i].Type == token.TokenWhitespace {
+					whitespaceToken = &tokenList.Tokens[i]
+					break
+				}
+			}
+
+			assert.NotNil(t, whitespaceToken, "Expected to find whitespace token")
+
+			if whitespaceToken != nil {
+				assert.Equal(t, test.expected, whitespaceToken.Value,
+					"Whitespace should be normalized to single space")
+				assert.Equal(t, token.NotWildcard, whitespaceToken.Wildcard,
+					"Whitespace should be NotWildcard")
+			}
+		})
+	}
+}
+
+// TestWhitespaceNormalization_Signature tests if whitespace normalization would allows logs with different whitespace to merge into the same pattern
+func TestWhitespaceNormalization_Signature(t *testing.T) {
+	// These logs differ only in whitespace - they should tokenize identically
+	log1 := "Error: connection failed"  // single space
+	log2 := "Error:\tconnection failed" // tab
+	log3 := "Error:  connection failed" // double space
+
+	tl1 := TokenizeString(log1)
+	tl2 := TokenizeString(log2)
+	tl3 := TokenizeString(log3)
+
+	// All should have same token count
+	assert.Equal(t, tl1.Length(), tl2.Length(), "Token counts should match")
+	assert.Equal(t, tl1.Length(), tl3.Length(), "Token counts should match")
+
+	// All whitespace tokens should be normalized to single space
+	for i := 0; i < tl1.Length(); i++ {
+		if tl1.Tokens[i].Type == token.TokenWhitespace {
+			assert.Equal(t, " ", tl1.Tokens[i].Value, "Whitespace in log1 should be normalized")
+			assert.Equal(t, " ", tl2.Tokens[i].Value, "Whitespace in log2 should be normalized")
+			assert.Equal(t, " ", tl3.Tokens[i].Value, "Whitespace in log3 should be normalized")
+		}
+	}
+
+	// Signatures should be identical
+	sig1 := token.NewSignature(tl1)
+	sig2 := token.NewSignature(tl2)
+	sig3 := token.NewSignature(tl3)
+
+	assert.True(t, sig1.Equals(sig2), "Signatures should be equal after normalization")
+	assert.True(t, sig1.Equals(sig3), "Signatures should be equal after normalization")
+}
+
+// ===============================
+// Helper functions
+// ===============================
+func tokenTypesToString(tokens []token.Token) []string {
+	result := make([]string, len(tokens))
+	for i, tok := range tokens {
+		result[i] = tok.String()
+	}
+	return result
+}
diff --git a/pkg/logs/patterns/automaton/trie.go b/pkg/logs/patterns/automaton/trie.go
new file mode 100644
index 000000000000..540b73a588f3
--- /dev/null
+++ b/pkg/logs/patterns/automaton/trie.go
@@ -0,0 +1,194 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package automaton provides log message tokenization using finite state automaton
+// and trie-based pattern matching for token classification.
+package automaton
+
+import (
+	"strings"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// TrieNode represents a node in the classification trie
+type TrieNode struct {
+	children   map[rune]*TrieNode
+	tokenType  token.TokenType
+	isTerminal bool
+}
+
+// Trie implements a prefix tree for token classification
+type Trie struct {
+	root *TrieNode
+}
+
+// GlobalRuleManager manages terminal rules
+var globalRuleManager *RuleManager
+
+// globalTrie is the singleton trie instance
+var globalTrie *Trie
+
+// init initializes the global trie and rule manager
+// todo: componentize this eventually
+func init() {
+	globalTrie = NewTrie()
+	globalRuleManager = NewRuleManager()
+	globalRuleManager.LoadPredefinedRules()
+	globalTrie.buildPredefinedPatterns()
+}
+
+// NewTrie creates a new trie
+func NewTrie() *Trie {
+	return &Trie{
+		root: &TrieNode{
+			children: make(map[rune]*TrieNode),
+		},
+	}
+}
+
+// Match performs token classification
+func (trie *Trie) Match(value string) token.TokenType {
+	if len(value) == 0 {
+		return token.TokenUnknown
+	}
+
+	if tokenType := trie.exactMatch(value); tokenType != token.TokenUnknown {
+		return tokenType
+	}
+
+	return trie.applyTerminalRules(value)
+}
+
+// exactMatch performs exact string matching
+func (trie *Trie) exactMatch(value string) token.TokenType {
+	node := trie.root
+
+	for _, char := range value {
+		child, exists := node.children[char]
+		if !exists {
+			return token.TokenUnknown
+		}
+		node = child
+	}
+
+	if node.isTerminal {
+		return node.tokenType
+	}
+
+	return token.TokenUnknown
+}
+
+// applyTerminalRules applies regex-based terminal rules
+func (trie *Trie) applyTerminalRules(value string) token.TokenType {
+	return globalRuleManager.ApplyRules(value)
+}
+
+// AddExactPattern adds an exact string pattern to the trie
+func (trie *Trie) AddExactPattern(pattern string, tokenType token.TokenType) {
+	node := trie.root
+
+	for _, char := range pattern {
+		if _, exists := node.children[char]; !exists {
+			node.children[char] = &TrieNode{
+				children: make(map[rune]*TrieNode),
+			}
+		}
+		node = node.children[char]
+	}
+
+	node.isTerminal = true
+	node.tokenType = tokenType
+}
+
+// buildPredefinedPatterns populates the trie with exact-match patterns
+// for fast classification of known strings (HTTP methods, severity levels, whitespace).
+// Regex-based terminal rules are handled by globalRuleManager via LoadPredefinedRules().
+func (trie *Trie) buildPredefinedPatterns() {
+	// HTTP methods - exact string matching
+	httpMethods := []string{"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH", "TRACE", "CONNECT"}
+	for _, method := range httpMethods {
+		trie.AddExactPattern(method, token.TokenHTTPMethod)
+	}
+
+	// Severity levels - exact string matching (both uppercase and lowercase)
+	severityLevels := []string{"TRACE", "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "FATAL", "PANIC", "EMERGENCY", "ALERT", "CRITICAL", "NOTICE"}
+	for _, level := range severityLevels {
+		trie.AddExactPattern(level, token.TokenSeverityLevel)
+		trie.AddExactPattern(strings.ToLower(level), token.TokenSeverityLevel)
+	}
+
+	// Whitespace - exact character matching
+	trie.AddExactPattern(" ", token.TokenWhitespace)
+	trie.AddExactPattern("\t", token.TokenWhitespace)
+	trie.AddExactPattern("\n", token.TokenWhitespace)
+	trie.AddExactPattern("\r\n", token.TokenWhitespace)
+}
+
+// TokenizeString is the main entry point
+func TokenizeString(input string) *token.TokenList {
+	if len(input) == 0 {
+		return token.NewTokenList()
+	}
+
+	tokenizer := NewTokenizer(input)
+	return tokenizer.Tokenize()
+}
+
+// Statistics
+
+// TrieStats is the stats of the trie
+type TrieStats struct {
+	ExactPatterns int
+	TerminalRules int
+	TrieNodes     int
+	MaxDepth      int
+}
+
+// GetStats returns trie statistics for testing purposes
+func (trie *Trie) GetStats() TrieStats {
+	nodeCount, maxDepth := trie.countNodes(trie.root, 0)
+
+	// Terminal rules are managed by globalRuleManager, not the trie itself
+	terminalRuleCount := 0
+	if globalRuleManager != nil {
+		terminalRuleCount = len(globalRuleManager.rules)
+	}
+
+	return TrieStats{
+		ExactPatterns: trie.countExactPatterns(trie.root),
+		TerminalRules: terminalRuleCount,
+		TrieNodes:     nodeCount,
+		MaxDepth:      maxDepth,
+	}
+}
+
+func (trie *Trie) countNodes(node *TrieNode, depth int) (int, int) {
+	count := 1
+	maxDepth := depth
+
+	for _, child := range node.children {
+		childCount, childDepth := trie.countNodes(child, depth+1)
+		count += childCount
+		if childDepth > maxDepth {
+			maxDepth = childDepth
+		}
+	}
+
+	return count, maxDepth
+}
+
+func (trie *Trie) countExactPatterns(node *TrieNode) int {
+	count := 0
+	if node.isTerminal {
+		count = 1
+	}
+
+	for _, child := range node.children {
+		count += trie.countExactPatterns(child)
+	}
+
+	return count
+}
diff --git a/pkg/logs/patterns/automaton/trie_test.go b/pkg/logs/patterns/automaton/trie_test.go
new file mode 100644
index 000000000000..8ae73f8d5d4c
--- /dev/null
+++ b/pkg/logs/patterns/automaton/trie_test.go
@@ -0,0 +1,210 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package automaton
+
+import (
+	"testing"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+func TestGlobalTrie_ExactMatch(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected token.TokenType
+	}{
+		{"GET", token.TokenHTTPMethod},
+		{"POST", token.TokenHTTPMethod},
+		{"ERROR", token.TokenSeverityLevel},
+		{"INFO", token.TokenSeverityLevel},
+		{"debug", token.TokenSeverityLevel}, // lowercase
+		{" ", token.TokenWhitespace},
+		{"\t", token.TokenWhitespace},
+		{"unknown", token.TokenWord}, // no rule matches - generic word
+	}
+
+	for _, test := range tests {
+		result := globalTrie.Match(test.input)
+		if result != test.expected {
+			t.Errorf("globalTrie.Match('%s'): expected %v, got %v",
+				test.input, test.expected, result)
+		}
+	}
+}
+
+func TestGlobalTrie_TerminalRules(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected token.TokenType
+	}{
+		{"200", token.TokenHTTPStatus},
+		{"404", token.TokenHTTPStatus},
+		{"500", token.TokenHTTPStatus},
+		{"192.168.1.1", token.TokenIPv4},
+		{"10.0.0.1", token.TokenIPv4},
+		{"test@example.com", token.TokenEmail},
+		{"user@domain.org", token.TokenEmail},
+		{"/api/users", token.TokenAbsolutePath},
+		{"/var/log/app.log", token.TokenAbsolutePath},
+		{"2023-12-25", token.TokenDate},
+		{"2023-12-25T14:30:00", token.TokenDate},
+		{"1234", token.TokenNumeric}, // 4 digits won't match HTTP status
+		{"0", token.TokenNumeric},
+		{"https://example.com", token.TokenURI},
+		{"http://api.domain.com/path", token.TokenURI},
+	}
+
+	for _, test := range tests {
+		result := globalTrie.Match(test.input)
+		if result != test.expected {
+			t.Errorf("globalTrie.Match('%s'): expected %v, got %v",
+				test.input, test.expected, result)
+		}
+	}
+}
+
+func TestTrieStats(t *testing.T) {
+	stats := globalTrie.GetStats()
+
+	if stats.ExactPatterns == 0 {
+		t.Error("Expected some exact patterns in trie")
+	}
+	if stats.TerminalRules == 0 {
+		t.Error("Expected some terminal rules")
+	}
+	if stats.TrieNodes == 0 {
+		t.Error("Expected some trie nodes")
+	}
+
+	t.Logf("Trie Stats: %d exact patterns, %d terminal rules, %d nodes, max depth %d",
+		stats.ExactPatterns, stats.TerminalRules, stats.TrieNodes, stats.MaxDepth)
+}
+
+func TestTrie_AddExactPattern(t *testing.T) {
+	// Create a new trie for testing
+	testTrie := NewTrie()
+
+	// Add a custom pattern
+	testTrie.AddExactPattern("CUSTOM", token.TokenWord)
+
+	// Test that it matches
+	result := testTrie.Match("CUSTOM")
+	if result != token.TokenWord {
+		t.Errorf("Expected TokenWord for 'CUSTOM', got %v", result)
+	}
+
+	// Test that unknown patterns fall back to TokenWord (generic word)
+	result = testTrie.Match("unknown")
+	if result != token.TokenWord {
+		t.Errorf("Expected TokenWord for 'unknown', got %v", result)
+	}
+}
+
+func TestTrie_AddTerminalRule(t *testing.T) {
+	// Test adding terminal rule to global rule manager
+	err := globalRuleManager.AddRule(
+		"TestRule",
+		`^TEST\d+$`,
+		"test",
+		"Test rule for testing",
+		token.TokenNumeric,
+		PriorityHigh, // Higher priority than existing rules
+		[]string{"TEST123"},
+	)
+	if err != nil {
+		t.Fatalf("Failed to add terminal rule: %v", err)
+	}
+
+	// Test that it matches using global trie
+	result := globalTrie.Match("TEST123")
+	if result != token.TokenNumeric {
+		t.Errorf("Expected TokenNumeric for 'TEST123', got %v", result)
+	}
+
+	// Test that non-matching patterns don't match
+	result = globalTrie.Match("TESTXYZ")
+	if result == token.TokenNumeric {
+		t.Error("Should not match non-numeric pattern")
+	}
+
+	// Clean up - remove the test rule
+	globalRuleManager.RemoveRule("TestRule")
+}
+
+func TestTrie_InvalidTerminalRule(t *testing.T) {
+	// Try to add invalid regex to global rule manager
+	err := globalRuleManager.AddRule(
+		"InvalidRule",
+		`[invalid(regex`,
+		"test",
+		"Invalid rule",
+		token.TokenWord,
+		PriorityMedium,
+		[]string{},
+	)
+	if err == nil {
+		t.Error("Expected error for invalid regex pattern")
+	}
+}
+
+func TestTrie_ExactMatchPriority(t *testing.T) {
+	testTrie := NewTrie()
+
+	// Add exact pattern
+	testTrie.AddExactPattern("TEST", token.TokenWord)
+
+	// Add terminal rule that would also match
+	globalRuleManager.AddRule(
+		"ExactMatchTestRule",
+		`^TEST$`,
+		"test",
+		"Test rule for exact match priority",
+		token.TokenNumeric,
+		PriorityHigh,
+		[]string{"TEST"},
+	)
+
+	// Exact match should take priority
+	result := testTrie.Match("TEST")
+	if result != token.TokenWord {
+		t.Errorf("Exact match should take priority, expected TokenWord, got %v", result)
+	}
+
+	// Clean up
+	globalRuleManager.RemoveRule("ExactMatchTestRule")
+}
+
+func TestTrie_EmptyInput(t *testing.T) {
+	result := globalTrie.Match("")
+	if result != token.TokenUnknown {
+		t.Errorf("Empty input should return TokenUnknown, got %v", result)
+	}
+}
+
+func TestTrieNodeStructure(t *testing.T) {
+	testTrie := NewTrie()
+	testTrie.AddExactPattern("ABC", token.TokenWord)
+
+	// Verify trie structure
+	stats := testTrie.GetStats()
+	if stats.TrieNodes < 4 { // root + A + B + C
+		t.Errorf("Expected at least 4 trie nodes, got %d", stats.TrieNodes)
+	}
+	if stats.ExactPatterns < 1 {
+		t.Errorf("Expected at least 1 exact pattern, got %d", stats.ExactPatterns)
+	}
+}
+
+func TestTrieDepthCalculation(t *testing.T) {
+	testTrie := NewTrie()
+	testTrie.AddExactPattern("A", token.TokenWord)
+	testTrie.AddExactPattern("ABCDEFGHIJ", token.TokenWord) // 10 chars deep
+
+	stats := testTrie.GetStats()
+	if stats.MaxDepth < 10 {
+		t.Errorf("Expected max depth >= 10, got %d", stats.MaxDepth)
+	}
+}
diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go
new file mode 100644
index 000000000000..5c00ddde9222
--- /dev/null
+++ b/pkg/logs/patterns/clustering/cluster.go
@@ -0,0 +1,163 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package clustering provides clustering functionality for grouping similar TokenLists
+// and identifying wildcard positions for pattern extraction.
+package clustering
+
+import (
+	"strings"
+	"time"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// Cluster represents a cluster with a group of TokenLists that have identical signatures.
+// A cluster may contain multiple patterns if token lists with the same signature cannot be merged since structural Fidelity is Valuable.
+// Examples:
+// "Status: OK"     → HTTP response format
+// "Status; OK"     → CSV-like format
+// "Status OK"      → Plain text format
+// These are different log formats, even if semantically similar → we need to keep them separate.
+type Cluster struct {
+	Signature token.Signature
+	Patterns  []*Pattern // Multiple patterns per cluster
+
+	// Timestamp tracking for the cluster itself
+	CreatedAt time.Time // When cluster was first created
+	UpdatedAt time.Time // When cluster was last modified (any pattern changed)
+}
+
+// NewCluster creates a new cluster.
+func NewCluster(signature token.Signature, tokenList *token.TokenList) *Cluster {
+	now := time.Now()
+	return &Cluster{
+		Signature: signature,
+		Patterns:  nil, // Will be generated when needed
+		CreatedAt: now,
+		UpdatedAt: now,
+	}
+}
+
+// =============================================================================
+// Core Clustering Logic
+// =============================================================================
+
+// AddTokenListToPatterns adds a TokenList to the appropriate pattern in the cluster.
+// If no matching pattern exists, creates a new one.
+func (c *Cluster) AddTokenListToPatterns(tokenList *token.TokenList) *Pattern {
+	// Ensure patterns are generated
+	if len(c.Patterns) == 0 {
+		// No patterns yet, create first one
+		patternID := generatePatternID()
+		pattern := newPattern(tokenList, patternID)
+
+		c.Patterns = []*Pattern{pattern}
+		// Update the cluster's new pattern at timestamp
+		c.UpdatedAt = time.Now()
+		return pattern
+	}
+
+	// Try to find a matching pattern
+	for _, p := range c.Patterns {
+		// Check if this TokenList can merge with this pattern's sample
+		if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) {
+			// CRITICAL: Also verify it can merge with the template
+			// If template has evolved differently, regeneratePattern will fail
+			// and we should create a new pattern instead
+			// Note: CanMergeTokenLists is not symmetric, so check both directions
+			if p.Template != nil {
+				templateCompatible1 := merging.CanMergeTokenLists(p.Template, tokenList)
+				templateCompatible2 := merging.CanMergeTokenLists(tokenList, p.Template)
+				templateCompatible := templateCompatible1 || templateCompatible2
+				if !templateCompatible {
+					// Log matches sample but not template - template has evolved incompatibly
+					// Skip this pattern and continue searching or create new one
+					// This will create a new pattern instead
+					continue
+				}
+			}
+
+			// Merge into existing pattern (same PatternID is preserved)
+			p.LogCount++
+			p.UpdatedAt = time.Now()
+			c.UpdatedAt = time.Now()
+
+			// Incrementally merge the new token list into the pattern template
+			// regeneratePattern will update template if merge succeeds
+			if c.regeneratePattern(p, tokenList) {
+				return p // Return existing pattern with updated template
+			}
+			// regeneratePattern failed - template couldn't merge with tokenList
+			// This shouldn't happen if we checked above, but handle it gracefully
+			// Create a new pattern instead
+			break
+		}
+	}
+
+	// No matching pattern found, create a new one
+	patternID := generatePatternID()
+	pattern := newPattern(tokenList, patternID)
+	c.Patterns = append(c.Patterns, pattern)
+	c.UpdatedAt = time.Now()
+	return pattern
+}
+
+// regeneratePattern incrementally merges a new token list into the pattern.
+// Returns true if merge succeeded, false if merge failed.
+func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) bool {
+	if p.Template == nil {
+		return false
+	}
+
+	// Incremental merge: merge new log with existing template
+	merged := merging.MergeTokenLists(p.Template, newTokenList)
+	if merged == nil {
+		// Merge failed - template and newTokenList are incompatible
+		return false
+	}
+
+	p.Template = merged
+	p.Positions = make([]int, 0, merged.Length())
+
+	// Build wildcard positions list when 2 tokenlists are mergable.
+	for i := 0; i < merged.Length(); i++ {
+		tok := merged.Tokens[i]
+		if tok.Wildcard == token.IsWildcard {
+			p.Positions = append(p.Positions, i)
+
+			// Special handling for path wildcards
+			if tok.Type == token.TokenAbsolutePath && p.Sample != nil && i < p.Sample.Length() {
+				firstPath := p.Sample.Tokens[i].Value
+				merged.Tokens[i].Value = getPathPattern(firstPath)
+			}
+		}
+	}
+
+	p.UpdatedAt = time.Now()
+	return true
+}
+
+// getPathPattern converts a path to hierarchical wildcard pattern
+func getPathPattern(path string) string {
+	if path == "/" {
+		return "/"
+	}
+
+	// Remove leading/trailing slashes and split
+	trimmed := strings.Trim(path, "/")
+	if trimmed == "" {
+		return "/"
+	}
+
+	parts := strings.Split(trimmed, "/")
+	result := ""
+	for i := 0; i < len(parts); i++ {
+		result += "/*"
+	}
+
+	return result
+}
diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go
new file mode 100644
index 000000000000..61e4bda83254
--- /dev/null
+++ b/pkg/logs/patterns/clustering/cluster_manager.go
@@ -0,0 +1,121 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package clustering provides clustering functionality for grouping similar TokenLists
+// and identifying wildcard positions for pattern extraction.
+package clustering
+
+import (
+	"crypto/rand"
+	"encoding/binary"
+	"sync"
+	"time"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+	"github.com/DataDog/datadog-agent/pkg/trace/log"
+)
+
+// PatternChangeType indicates what changed when adding a TokenList to the cluster manager
+type PatternChangeType int
+
+const (
+	// PatternNoChange means the TokenList was added to an existing cluster without structural changes
+	PatternNoChange PatternChangeType = iota
+	// PatternNew means a brand new pattern was created (first time seeing this signature)
+	PatternNew
+	// PatternUpdated means an existing pattern's structure changed (more wildcards added)
+	PatternUpdated
+)
+
+// ClusterManager manages the clustering of TokenLists using hash-based bucketing.
+type ClusterManager struct {
+	mu          sync.RWMutex
+	hashBuckets map[uint64][]*Cluster
+}
+
+// NewClusterManager creates a new ClusterManager.
+func NewClusterManager() *ClusterManager {
+	return &ClusterManager{
+		hashBuckets: make(map[uint64][]*Cluster),
+	}
+}
+
+// Add processes a TokenList and adds it to the appropriate cluster.
+// Returns the pattern that was created/updated and a PatternChangeType indicating what changed.
+func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChangeType) {
+	if tokenList == nil || tokenList.IsEmpty() {
+		log.Errorf("Cluster Manager failed to add log: %v for patterning. Token list is empty or nil.", tokenList.String())
+		return nil, PatternNoChange
+	}
+
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+
+	// Create new signature and hash it
+	signature := token.NewSignature(tokenList)
+	hash := signature.Hash
+
+	// Get hash bucket
+	clusters := cm.hashBuckets[hash]
+
+	// Look for existing cluster with matching signature
+	for _, cluster := range clusters {
+		if !cluster.Signature.Equals(signature) {
+			continue
+		}
+
+		// Find which pattern within the cluster the tokenList will match
+		var matchedPattern *Pattern
+		var oldWildcardCount int
+		for _, p := range cluster.Patterns {
+			if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) {
+				matchedPattern = p
+				oldWildcardCount = p.GetWildcardCount()
+				break
+			}
+		}
+
+		// Add the tokenList to the cluster (merges or creates new pattern)
+		pattern := cluster.AddTokenListToPatterns(tokenList)
+
+		// Check if a new pattern was created (no match found or merge failed)
+		if matchedPattern == nil || matchedPattern.PatternID != pattern.PatternID {
+			return pattern, PatternNew
+		}
+
+		// Check if wildcard count changed (pattern evolved)
+		if pattern.GetWildcardCount() != oldWildcardCount {
+			return pattern, PatternUpdated
+		}
+
+		return pattern, PatternNoChange
+	}
+
+	// If no matching pattern was found, create a new cluster and pattern.
+	newCluster := NewCluster(signature, tokenList)
+	// Add the token list to create the first pattern
+	pattern := newCluster.AddTokenListToPatterns(tokenList)
+	cm.hashBuckets[hash] = append(clusters, newCluster)
+
+	return pattern, PatternNew
+}
+
+// Clear removes all clusters.
+func (cm *ClusterManager) Clear() {
+	cm.mu.Lock()
+	defer cm.mu.Unlock()
+	cm.hashBuckets = make(map[uint64][]*Cluster)
+}
+
+// generatePatternID generates a unique pattern ID
+func generatePatternID() uint64 {
+	var buf [8]byte
+	_, err := rand.Read(buf[:])
+	if err != nil {
+		return uint64(time.Now().UnixNano())
+	}
+	return binary.BigEndian.Uint64(buf[:])
+}
diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go
new file mode 100644
index 000000000000..39c7a31953c1
--- /dev/null
+++ b/pkg/logs/patterns/clustering/cluster_manager_test.go
@@ -0,0 +1,300 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package clustering
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// Test-only helper functions
+
+// getCluster retrieves the cluster with the given signature.
+func getCluster(cm *ClusterManager, signature token.Signature) *Cluster {
+	hash := signature.Hash
+
+	cm.mu.RLock()
+	defer cm.mu.RUnlock()
+
+	clusters, exists := cm.hashBuckets[hash]
+	if !exists {
+		return nil
+	}
+
+	for _, cluster := range clusters {
+		if cluster.Signature.Equals(signature) {
+			return cluster
+		}
+	}
+
+	return nil
+}
+
+// getAllPatterns returns all patterns across all clusters.
+func getAllPatterns(cm *ClusterManager) []*Pattern {
+	var allPatterns []*Pattern
+
+	cm.mu.RLock()
+	defer cm.mu.RUnlock()
+
+	// Iterate through all clusters in all hash buckets
+	for _, clusters := range cm.hashBuckets {
+		for _, cluster := range clusters {
+			// Collect all patterns from this cluster
+			allPatterns = append(allPatterns, cluster.Patterns...)
+		}
+	}
+
+	return allPatterns
+}
+
+func TestClusterManager_NewClusterManager(t *testing.T) {
+	cm := NewClusterManager()
+
+	assert.NotNil(t, cm, "ClusterManager should not be nil")
+}
+
+func TestClusterManager_Add_NewCluster(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Create TokenList
+	tokens := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokenList := token.NewTokenListWithTokens(tokens)
+
+	pattern, changeType := cm.Add(tokenList)
+
+	assert.NotNil(t, pattern, "Should return a pattern")
+	assert.Equal(t, 1, pattern.LogCount, "Pattern should have log count 1")
+	assert.Equal(t, PatternNew, changeType, "Expected PatternNew for first add")
+}
+
+func TestClusterManager_Add_ExistingCluster(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Create two TokenLists with same signature
+	tokens1 := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokens2 := []token.Token{
+		{Value: "POST", Type: token.TokenHTTPMethod}, // Different value, same type
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/users", Type: token.TokenAbsolutePath}, // Different value, same type
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+
+	pattern1, changeType1 := cm.Add(tokenList1)
+	pattern2, changeType2 := cm.Add(tokenList2)
+
+	// Should be the same pattern (same cluster, merged together)
+	assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with same signature should merge into same pattern")
+	assert.Equal(t, 2, pattern2.LogCount, "Pattern should have log count 2")
+	assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add")
+
+	// With eager pattern generation, adding the second token list creates wildcards (pattern update)
+	assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)")
+}
+
+func TestClusterManager_Add_DifferentSignatures(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Create TokenLists with different signatures
+	tokens1 := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokens2 := []token.Token{
+		{Value: "ERROR", Type: token.TokenSeverityLevel}, // Different type
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord}, // Different type
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+
+	pattern1, _ := cm.Add(tokenList1)
+	pattern2, _ := cm.Add(tokenList2)
+
+	// Should be different patterns (different clusters)
+	assert.NotEqual(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with different signatures should create different patterns")
+}
+
+func TestClusterManager_GetCluster(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Create and add TokenList
+	tokens := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokenList := token.NewTokenListWithTokens(tokens)
+	signature := token.NewSignature(tokenList)
+
+	addedPattern, _ := cm.Add(tokenList)
+
+	// Retrieve cluster by signature
+	retrievedCluster := getCluster(cm, signature)
+
+	assert.NotNil(t, retrievedCluster, "Should retrieve cluster by signature")
+	assert.Equal(t, 1, len(retrievedCluster.Patterns), "Cluster should have 1 pattern")
+	assert.Equal(t, addedPattern.PatternID, retrievedCluster.Patterns[0].PatternID, "Pattern IDs should match")
+
+	// Try to get non-existent cluster
+	differentTokens := []token.Token{
+		{Value: "ERROR", Type: token.TokenSeverityLevel},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord},
+	}
+	differentTokenList := token.NewTokenListWithTokens(differentTokens)
+	differentSignature := token.NewSignature(differentTokenList)
+
+	nonExistentCluster := getCluster(cm, differentSignature)
+	assert.Nil(t, nonExistentCluster, "Should return nil for non-existent cluster")
+}
+
+func TestClusterManager_Clear(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Add some data
+	tokens := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokenList := token.NewTokenListWithTokens(tokens)
+	signature := token.NewSignature(tokenList)
+
+	cm.Add(tokenList)
+
+	// Verify data exists
+	assert.NotNil(t, getCluster(cm, signature), "Should have cluster before clear")
+
+	// Clear
+	cm.Clear()
+
+	// Verify data is gone
+	assert.Nil(t, getCluster(cm, signature), "Should have no cluster after clear")
+}
+
+func TestClusterManager_GetAllPatterns(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Initially empty
+	patterns := getAllPatterns(cm)
+	assert.Equal(t, 0, len(patterns), "Should have no patterns initially")
+
+	// Add pattern 1 (signature 1)
+	tokens1 := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	pattern1, _ := cm.Add(token.NewTokenListWithTokens(tokens1))
+
+	// Add pattern 2 (same signature, should merge into pattern 1)
+	tokens2 := []token.Token{
+		{Value: "POST", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/users", Type: token.TokenAbsolutePath},
+	}
+	pattern2, _ := cm.Add(token.NewTokenListWithTokens(tokens2))
+
+	// Add pattern 3 (different signature)
+	tokens3 := []token.Token{
+		{Value: "ERROR", Type: token.TokenSeverityLevel},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord},
+	}
+	pattern3, _ := cm.Add(token.NewTokenListWithTokens(tokens3))
+
+	// Get all patterns
+	allPatterns := getAllPatterns(cm)
+
+	// Should have 2 patterns: pattern1 (merged with pattern2) and pattern3
+	assert.Equal(t, 2, len(allPatterns), "Should have 2 patterns total")
+
+	// Verify we have both pattern IDs
+	patternIDs := make(map[uint64]bool)
+	for _, p := range allPatterns {
+		patternIDs[p.PatternID] = true
+	}
+	assert.True(t, patternIDs[pattern1.PatternID], "Should include pattern 1")
+	assert.True(t, patternIDs[pattern3.PatternID], "Should include pattern 3")
+	assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Pattern 1 and 2 should be the same (merged)")
+}
+
+func TestClusterManager_PatternChangeType(t *testing.T) {
+	cm := NewClusterManager()
+
+	// Create token lists with same signature (HTTP method, space, path)
+	tokens1 := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api/users", Type: token.TokenAbsolutePath},
+	}
+	tokens2 := []token.Token{
+		{Value: "POST", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api/orders", Type: token.TokenAbsolutePath},
+	}
+	tokens3 := []token.Token{
+		{Value: "PUT", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api/items", Type: token.TokenAbsolutePath},
+	}
+	tokens4 := []token.Token{
+		{Value: "DELETE", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api/products", Type: token.TokenAbsolutePath},
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+	tokenList3 := token.NewTokenListWithTokens(tokens3)
+	tokenList4 := token.NewTokenListWithTokens(tokens4)
+
+	// First add - should create a new pattern
+	pattern1, changeType1 := cm.Add(tokenList1)
+	assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add")
+	t.Logf("✅ Add #1: PatternNew (created pattern with PatternID=%d)", pattern1.PatternID)
+
+	// Second add - same signature, adding to existing pattern creates wildcards (pattern update)
+	pattern2, changeType2 := cm.Add(tokenList2)
+	assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)")
+	assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Should return same pattern for same signature")
+	t.Logf("✅ Add #2: PatternUpdated (wildcards created, logCount=%d)", pattern2.LogCount)
+	t.Logf("   Pattern after 2 logs: '%s'", pattern2.GetPatternString())
+
+	// Third add - pattern exists but wildcard count unchanged (still 2 wildcards)
+	pattern3, changeType3 := cm.Add(tokenList3)
+	assert.Equal(t, PatternNoChange, changeType3, "Expected PatternNoChange for third add (wildcard count unchanged)")
+	assert.Equal(t, pattern1.PatternID, pattern3.PatternID, "Should return same pattern for same signature")
+	t.Logf("✅ Add #3: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern3.LogCount)
+	t.Logf("   Pattern after 3 logs: '%s'", pattern3.GetPatternString())
+
+	// Fourth add - pattern exists, wildcard count still unchanged
+	pattern4, changeType4 := cm.Add(tokenList4)
+	assert.Equal(t, PatternNoChange, changeType4, "Expected PatternNoChange for fourth add (wildcard count unchanged)")
+	t.Logf("✅ Add #4: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern4.LogCount)
+
+	// Final pattern (eagerly generated by Add)
+	t.Logf("   Final pattern after 4 logs: '%s'", pattern4.GetPatternString())
+
+	// Verify all returned the same pattern
+	assert.Equal(t, 4, pattern4.LogCount, "Expected pattern log count 4")
+}
diff --git a/pkg/logs/patterns/clustering/cluster_test.go b/pkg/logs/patterns/clustering/cluster_test.go
new file mode 100644
index 000000000000..350ec8be5a0d
--- /dev/null
+++ b/pkg/logs/patterns/clustering/cluster_test.go
@@ -0,0 +1,456 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package clustering
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+func TestCluster_NewCluster(t *testing.T) {
+	// Create a simple TokenList
+	tokens := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokenList := token.NewTokenListWithTokens(tokens)
+	signature := token.NewSignature(tokenList)
+
+	cluster := NewCluster(signature, tokenList)
+
+	assert.Equal(t, 0, clusterSize(cluster), "Expected cluster size 0 initially")
+	assert.True(t, cluster.Signature.Equals(signature), "Cluster signature doesn't match expected signature")
+	assert.Empty(t, cluster.Patterns, "Patterns should be empty initially (computed lazily)")
+}
+
+func TestCluster_AddTokenListToPatterns(t *testing.T) {
+	// Create first TokenList
+	tokens1 := []token.Token{
+		{Value: "GET", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/api", Type: token.TokenAbsolutePath},
+	}
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	signature1 := token.NewSignature(tokenList1)
+
+	cluster := NewCluster(signature1, tokenList1)
+	cluster.AddTokenListToPatterns(tokenList1)
+
+	assert.Equal(t, 1, clusterSize(cluster), "Expected initial cluster size 1")
+
+	// Create second TokenList with same signature but different values
+	tokens2 := []token.Token{
+		{Value: "POST", Type: token.TokenHTTPMethod},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "/users", Type: token.TokenAbsolutePath},
+	}
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+
+	// Add tokenList with matching signature
+	cluster.AddTokenListToPatterns(tokenList2)
+
+	assert.Equal(t, 2, clusterSize(cluster), "Expected cluster size 2 after adding")
+	assert.NotEmpty(t, cluster.Patterns, "Expected patterns to exist after adding TokenLists")
+}
+
+func TestCluster_SinglePattern_SingleLog(t *testing.T) {
+	// When a cluster has only one log, it creates one pattern with no wildcards
+	tokens := []token.Token{
+		{Value: "ERROR", Type: token.TokenSeverityLevel},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord},
+	}
+	tokenList := token.NewTokenListWithTokens(tokens)
+	signature := token.NewSignature(tokenList)
+
+	cluster := NewCluster(signature, tokenList)
+	cluster.AddTokenListToPatterns(tokenList)
+
+	// Should have exactly one pattern (which is also the primary)
+	assert.Equal(t, 1, len(cluster.Patterns), "Should have exactly one pattern")
+
+	mostCommon := getMostCommonPattern(cluster)
+	assert.NotNil(t, mostCommon, "Most common pattern should not be nil")
+
+	pattern := mostCommon.Template
+	assert.NotNil(t, pattern, "Pattern template should not be nil")
+	assert.False(t, hasWildcards(cluster), "Single log should not have wildcards")
+	assert.Equal(t, tokenList.Length(), pattern.Length(), "Pattern length should match original TokenList")
+
+	for i, tok := range pattern.Tokens {
+		assert.Equal(t, tokenList.Tokens[i].Value, tok.Value,
+			"Pattern token %d value mismatch", i)
+	}
+}
+
+func TestCluster_MultiplePatterns_SpecialCharVariation(t *testing.T) {
+	// This is the key test for multi-pattern clusters!
+	// TokenLists with same signature but different special characters should create multiple patterns
+	// Note: Whitespace variations now merge (normalized to single space)
+
+	signature := token.Signature{
+		Position: "Error|Word|Whitespace|Word|Word|Word",
+		Length:   6,
+		Hash:     1234,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	// Create TokenLists with different special characters (cannot merge - structural difference)
+	tokens1 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Protected first word
+		{Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard},     // Colon
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokens2 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ";", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Semicolon - DIFFERENT!
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokens3 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Colon - matches tokens1
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "database", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "error", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+	tokenList3 := token.NewTokenListWithTokens(tokens3)
+
+	cluster.AddTokenListToPatterns(tokenList1)
+	cluster.AddTokenListToPatterns(tokenList2) // Different special char - new pattern
+	cluster.AddTokenListToPatterns(tokenList3) // Same special char as tokens1 - same pattern
+
+	// Should have 2 patterns (one for colon, one for semicolon)
+	assert.Len(t, cluster.Patterns, 2, "Expected 2 patterns due to special character variation")
+
+	// Verify pattern sizes
+	pattern1Size := cluster.Patterns[0].size()
+	pattern2Size := cluster.Patterns[1].size()
+
+	// One pattern should have 2 token lists, the other should have 1
+	validSizes := (pattern1Size == 2 && pattern2Size == 1) || (pattern1Size == 1 && pattern2Size == 2)
+	assert.True(t, validSizes, "Expected pattern sizes [2, 1], got [%d, %d]", pattern1Size, pattern2Size)
+
+	t.Logf("✅ Multi-pattern cluster created: %d patterns", len(cluster.Patterns))
+	t.Logf("   Pattern 1: %d token lists", cluster.Patterns[0].size())
+	t.Logf("   Pattern 2: %d token lists", cluster.Patterns[1].size())
+}
+
+func TestCluster_FindMatchingPattern(t *testing.T) {
+	signature := token.Signature{
+		Position: "Error|Word|Whitespace|Word",
+		Length:   4,
+		Hash:     5678,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	tokens1 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ":", Type: token.TokenWord},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokens2 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ":", Type: token.TokenWord},
+		{Value: "  ", Type: token.TokenWhitespace}, // Different whitespace
+		{Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+
+	pattern1 := cluster.AddTokenListToPatterns(tokenList1)
+	pattern2 := cluster.AddTokenListToPatterns(tokenList2)
+
+	// Should return different patterns
+	assert.NotEqual(t, pattern1, pattern2, "Should create different patterns for different whitespace")
+
+	// findMatchingPattern should return the correct pattern for each token list
+	found1 := findMatchingPattern(cluster, tokenList1)
+	found2 := findMatchingPattern(cluster, tokenList2)
+
+	assert.Equal(t, pattern1, found1, "Should find the first pattern for tokenList1")
+	assert.Equal(t, pattern2, found2, "Should find the second pattern for tokenList2")
+}
+
+func TestCluster_GetMostCommonPattern(t *testing.T) {
+	signature := token.Signature{
+		Position: "Word|Whitespace|Word",
+		Length:   3,
+		Hash:     9999,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	// Add multiple token lists that split into different patterns
+	// Pattern 1: 3 logs (should be most common)
+	for i := 0; i < 3; i++ {
+		tokens := []token.Token{
+			{Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard},
+			{Value: " ", Type: token.TokenWhitespace},
+			{Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		}
+		tokenList := token.NewTokenListWithTokens(tokens)
+		cluster.AddTokenListToPatterns(tokenList)
+	}
+
+	// Pattern 2: 1 log (less common)
+	tokens2 := []token.Token{
+		{Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: "  ", Type: token.TokenWhitespace}, // Different whitespace
+		{Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+	cluster.AddTokenListToPatterns(tokenList2)
+
+	mostCommon := getMostCommonPattern(cluster)
+	assert.NotNil(t, mostCommon, "Most common pattern should not be nil")
+	assert.Equal(t, 3, mostCommon.LogCount, "Most common pattern should have 3 logs")
+}
+
+func TestCluster_GetAllPatterns(t *testing.T) {
+	signature := token.Signature{
+		Position: "Word|Whitespace|Numeric",
+		Length:   3,
+		Hash:     1111,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	// Create 3 different patterns via whitespace variation
+	tokens1 := []token.Token{
+		{Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "42", Type: token.TokenNumeric},
+	}
+	tokens2 := []token.Token{
+		{Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: "  ", Type: token.TokenWhitespace}, // Different
+		{Value: "100", Type: token.TokenNumeric},
+	}
+	tokens3 := []token.Token{
+		{Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: "   ", Type: token.TokenWhitespace}, // Different
+		{Value: "200", Type: token.TokenNumeric},
+	}
+
+	cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1))
+	cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2))
+	cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens3))
+
+	allPatterns := cluster.Patterns
+	assert.Len(t, allPatterns, 3, "Expected 3 patterns")
+}
+
+func TestCluster_ExtractWildcardValues_MultiPattern(t *testing.T) {
+	signature := token.Signature{
+		Position: "Error|Word|Whitespace|Word",
+		Length:   4,
+		Hash:     2222,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	tokens1 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ":", Type: token.TokenWord},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokens2 := []token.Token{
+		{Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: ":", Type: token.TokenWord},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+
+	tokenList1 := token.NewTokenListWithTokens(tokens1)
+	tokenList2 := token.NewTokenListWithTokens(tokens2)
+
+	cluster.AddTokenListToPatterns(tokenList1)
+	cluster.AddTokenListToPatterns(tokenList2)
+
+	// Both should merge into same pattern
+	// Extract wildcard values from tokenList2
+	values := extractWildcardValues(cluster, tokenList2)
+
+	// Should have one wildcard value for the last word token
+	assert.Len(t, values, 1, "Expected 1 wildcard value")
+	if len(values) > 0 {
+		assert.Equal(t, "timeout", values[0], "Expected wildcard value 'timeout'")
+	}
+}
+
+func TestCluster_Size_MultiPattern(t *testing.T) {
+	signature := token.Signature{
+		Position: "Word|Whitespace|Word",
+		Length:   3,
+		Hash:     3333,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	// Add 2 token lists to pattern 1
+	for i := 0; i < 2; i++ {
+		tokens := []token.Token{
+			{Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard},
+			{Value: " ", Type: token.TokenWhitespace},
+			{Value: "passed", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		}
+		cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens))
+	}
+
+	// Add 3 token lists to pattern 2 (different whitespace)
+	for i := 0; i < 3; i++ {
+		tokens := []token.Token{
+			{Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard},
+			{Value: "  ", Type: token.TokenWhitespace}, // Different
+			{Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+		}
+		cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens))
+	}
+
+	// Total size should be 5 (2 + 3)
+	assert.Equal(t, 5, clusterSize(cluster), "Expected cluster size 5 (2 + 3)")
+}
+
+func TestCluster_BackwardCompatibility(t *testing.T) {
+	// Test that old API methods still work (GetPatternString, GetWildcardPositions, etc.)
+	signature := token.Signature{
+		Position: "Word|Whitespace|Word",
+		Length:   3,
+		Hash:     4444,
+	}
+
+	cluster := NewCluster(signature, nil)
+
+	tokens1 := []token.Token{
+		{Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+	tokens2 := []token.Token{
+		{Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard},
+		{Value: " ", Type: token.TokenWhitespace},
+		{Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard},
+	}
+
+	cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1))
+	cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2))
+
+	patternString := getPatternString(cluster)
+	assert.NotEmpty(t, patternString, "getPatternString should return a valid pattern string")
+
+	wildcards := hasWildcards(cluster)
+	assert.True(t, wildcards, "Should have wildcards")
+
+	wildcardPositions := getWildcardPositions(cluster)
+	assert.NotEmpty(t, wildcardPositions, "Should have wildcard positions")
+
+	t.Logf("✅ Backward compatibility: pattern='%s', wildcards=%v", patternString, wildcardPositions)
+}
+
+// =============================================================================
+// Test Helper Functions
+// =============================================================================
+
+// getMostCommonPattern returns the pattern with the highest log count in the cluster.
+func getMostCommonPattern(c *Cluster) *Pattern {
+	if len(c.Patterns) == 0 {
+		return nil
+	}
+
+	mostCommonIdx := 0
+	maxLogCount := c.Patterns[0].LogCount
+	for idx, p := range c.Patterns {
+		if p.LogCount > maxLogCount {
+			maxLogCount = p.LogCount
+			mostCommonIdx = idx
+		}
+	}
+	return c.Patterns[mostCommonIdx]
+}
+
+// getPatternString returns a string representation of the most common pattern.
+func getPatternString(c *Cluster) string {
+	mostCommon := getMostCommonPattern(c)
+	if mostCommon == nil {
+		return ""
+	}
+	return mostCommon.GetPatternString()
+}
+
+// getWildcardPositions returns wildcard token positions for the most common pattern.
+func getWildcardPositions(c *Cluster) []int {
+	mostCommon := getMostCommonPattern(c)
+	if mostCommon == nil {
+		return nil
+	}
+	return mostCommon.Positions
+}
+
+// hasWildcards returns true if any pattern in this cluster contains wildcard positions.
+func hasWildcards(c *Cluster) bool {
+	for _, p := range c.Patterns {
+		if len(p.Positions) > 0 {
+			return true
+		}
+	}
+	return false
+}
+
+// extractWildcardValues extracts wildcard values from a TokenList using the matching pattern.
+func extractWildcardValues(c *Cluster, tokenList *token.TokenList) []string {
+	p := findMatchingPattern(c, tokenList)
+	if p == nil {
+		return []string{}
+	}
+	return p.GetWildcardValues(tokenList)
+}
+
+// findMatchingPattern finds the Pattern that matches the given TokenList.
+func findMatchingPattern(c *Cluster, tokenList *token.TokenList) *Pattern {
+	if len(c.Patterns) == 0 {
+		return nil
+	}
+
+	// Try to find a Pattern where the TokenList can merge
+	for _, p := range c.Patterns {
+		// Check if this TokenList can merge with the pattern's sample
+		if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) {
+			return p
+		}
+	}
+
+	// No matching pattern found
+	return nil
+}
+
+// clusterSize returns the total number of logs across all patterns in the cluster.
+func clusterSize(c *Cluster) int {
+	total := 0
+	for _, p := range c.Patterns {
+		total += p.LogCount
+	}
+	return total
+}
diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go
new file mode 100644
index 000000000000..a778cb0d8f03
--- /dev/null
+++ b/pkg/logs/patterns/clustering/merging/merging.go
@@ -0,0 +1,102 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package merging provides intelligent mergeability logic for pattern generation.
+// It determines which TokenLists can be merged into unified patterns with wildcards,
+// and enforces protection rules to maintain semantic quality.
+package merging
+
+import (
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// shouldProtectPosition determines if the token at this position is the first word token.
+// The first word token is protected from wildcarding to preserve semantic meaning,
+// regardless of what position it appears at (e.g., after timestamps/dates).
+func shouldProtectPosition(position int, tokenType token.TokenType, tl *token.TokenList) bool {
+	// Only word tokens can be protected
+	if tokenType != token.TokenWord {
+		return false
+	}
+
+	// Check if any word token appears before this position
+	for i := 0; i < position; i++ {
+		if tl.Tokens[i].Type == token.TokenWord {
+			return false // Not the first word token
+		}
+	}
+
+	// This is the first word token
+	return true
+}
+
+// CanMergeTokenLists checks if incoming log (tl2) can merge with existing pattern's sample (tl1).
+// Returns true only if all token positions are either identical or mergeable according
+// to their comparison results and protection rules.
+func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool {
+	if tl1.Length() != tl2.Length() {
+		return false
+	}
+
+	for i := 0; i < tl1.Length(); i++ {
+		tok1 := &tl1.Tokens[i]
+		tok2 := &tl2.Tokens[i]
+
+		result := tok1.Compare(tok2)
+
+		// If tokens conflict, reject
+		if result == token.Conflict {
+			return false
+		}
+
+		// If tokens are identical, continue
+		if result == token.Identical {
+			continue
+		}
+
+		// For wildcard result, check first word protection rule
+		if result == token.Wildcard && shouldProtectPosition(i, tok1.Type, tl1) {
+			return false
+		}
+	}
+
+	return true
+}
+
+// MergeTokenLists performs the actual merge of two TokenLists, creating a new TokenList
+// with wildcards at positions where tokens differ but are mergeable.
+// Returns nil if the TokenLists cannot be merged.
+func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList {
+	if tl1.Length() != tl2.Length() {
+		return nil
+	}
+
+	merged := token.NewTokenList()
+
+	for i := 0; i < tl1.Length(); i++ {
+		tok1 := &tl1.Tokens[i]
+		tok2 := &tl2.Tokens[i]
+
+		result := tok1.Compare(tok2)
+
+		switch result {
+		case token.Conflict:
+			return nil // Abort entire merge
+
+		case token.Identical:
+			merged.Add(*tok1) // Keep same
+
+		case token.Wildcard:
+			// Check protection rules before wildcarding
+			if shouldProtectPosition(i, tok1.Type, tl1) {
+				return nil
+			}
+			// Create wildcard, preserving the first token's value as representative
+			merged.AddToken(tok1.Type, tok1.Value, token.IsWildcard)
+		}
+	}
+
+	return merged
+}
diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go
new file mode 100644
index 000000000000..510fdc169c12
--- /dev/null
+++ b/pkg/logs/patterns/clustering/merging/merging_test.go
@@ -0,0 +1,324 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package merging
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+func TestShouldProtectPosition(t *testing.T) {
+	tests := []struct {
+		name     string
+		tokens   []token.Token
+		position int
+		expected bool
+	}{
+		{
+			name: "First word at position 0 should be protected",
+			tokens: []token.Token{
+				token.NewToken(token.TokenWord, "ERROR", token.NotWildcard),
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenWord, "failed", token.NotWildcard),
+			},
+			position: 0,
+			expected: true,
+		},
+		{
+			name: "First numeric at position 0 should not be protected",
+			tokens: []token.Token{
+				token.NewToken(token.TokenNumeric, "2025", token.NotWildcard),
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenWord, "ERROR", token.NotWildcard),
+			},
+			position: 0,
+			expected: false,
+		},
+		{
+			name: "Second word should not be protected",
+			tokens: []token.Token{
+				token.NewToken(token.TokenWord, "ERROR", token.NotWildcard),
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenWord, "failed", token.NotWildcard),
+			},
+			position: 2,
+			expected: false,
+		},
+		{
+			name: "First word after timestamp should be protected",
+			tokens: []token.Token{
+				token.NewToken(token.TokenNumeric, "2025-11-16", token.NotWildcard),
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenNumeric, "07:03:09", token.NotWildcard),
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenWord, "ERROR", token.NotWildcard),
+			},
+			position: 4,
+			expected: true,
+		},
+		{
+			name: "Whitespace should not be protected",
+			tokens: []token.Token{
+				token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+				token.NewToken(token.TokenWord, "ERROR", token.NotWildcard),
+			},
+			position: 0,
+			expected: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			tl := token.NewTokenListWithTokens(tt.tokens)
+			result := shouldProtectPosition(tt.position, tt.tokens[tt.position].Type, tl)
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+func TestCanMergeTokenLists_IdenticalLists(t *testing.T) {
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "hello", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "world", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "hello", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "world", token.NotWildcard),
+	})
+
+	assert.True(t, CanMergeTokenLists(tl1, tl2))
+}
+
+func TestCanMergeTokenLists_PossiblyWildcardTokens(t *testing.T) {
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "user123", token.PotentialWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard),
+	})
+
+	assert.True(t, CanMergeTokenLists(tl1, tl2))
+}
+
+func TestCanMergeTokenLists_GenericWords(t *testing.T) {
+	// Generic words without possiblyWildcard flag should not merge
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "bob", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "likes", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "cat", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "likes", token.NotWildcard),
+	})
+
+	assert.False(t, CanMergeTokenLists(tl1, tl2))
+}
+
+func TestCanMergeTokenLists_DifferentLengths(t *testing.T) {
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "hello", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "world", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "hello", token.NotWildcard),
+	})
+
+	assert.False(t, CanMergeTokenLists(tl1, tl2))
+}
+
+func TestCanMergeTokenLists_FirstWordProtection(t *testing.T) {
+	// First word protection should prevent merge even with possiblyWildcard
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "user123", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+	})
+
+	assert.False(t, CanMergeTokenLists(tl1, tl2), "First word should be protected from wildcarding")
+}
+
+func TestMergeTokenLists_CreateWildcard(t *testing.T) {
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "user123", token.PotentialWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "logged", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard),
+	})
+
+	merged := MergeTokenLists(tl1, tl2)
+	assert.NotNil(t, merged)
+	assert.Equal(t, 3, merged.Length())
+	assert.Equal(t, "logged", merged.Tokens[0].Value)
+	assert.Equal(t, token.NotWildcard, merged.Tokens[0].Wildcard)
+	assert.Equal(t, " ", merged.Tokens[1].Value)
+	// Wildcard token has empty value - the Wildcard field tracks status
+	assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard)
+	assert.Equal(t, token.TokenWord, merged.Tokens[2].Type)
+}
+
+func TestMergeTokenLists_UnmergeableReturnsNil(t *testing.T) {
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "bob", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "likes", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "cat", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "likes", token.NotWildcard),
+	})
+
+	merged := MergeTokenLists(tl1, tl2)
+	assert.Nil(t, merged, "Unmergeable TokenLists should return nil")
+}
+
+func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) {
+	// Try to merge when first token is a word but differs
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "Login", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "successful", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "Logout", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "successful", token.NotWildcard),
+	})
+
+	// Should fail because first word is protected
+	merged := MergeTokenLists(tl1, tl2)
+	assert.Nil(t, merged, "Should not merge when first word differs (protected)")
+}
+
+func TestCanMergeTokenLists_TimestampPrefixedLogs(t *testing.T) {
+	// Test that first WORD (not severity level) after timestamp is protected
+	// Severity levels CAN wildcard, but the first actual word is protected
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "Failed", token.NotWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "Memory", token.NotWildcard),
+	})
+
+	// Should NOT merge because first word (Failed vs Memory) differs and is protected
+	// Note: Severity levels (ERROR vs WARN) CAN wildcard - they're not the "first word"
+	assert.False(t, CanMergeTokenLists(tl1, tl2), "First word token (after severity) should be protected")
+}
+
+func TestMergeTokenLists_TimestampPrefixedLogsSameFirstWord(t *testing.T) {
+	// Test that logs with same first word can merge, even with different timestamps and severity levels
+	// Pattern: * * * Failed *
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenNumeric, "2025-11-15", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "Failed", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "user123", token.PotentialWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "Failed", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard),
+	})
+
+	// Should merge - timestamps wildcard, severity wildcard, "Failed" is protected but identical, last word wildcards
+	merged := MergeTokenLists(tl1, tl2)
+	assert.NotNil(t, merged, "Should merge when first word matches")
+	assert.Equal(t, token.IsWildcard, merged.Tokens[0].Wildcard, "Date should be wildcarded")
+	assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard, "Time should be wildcarded")
+	assert.Equal(t, token.IsWildcard, merged.Tokens[4].Wildcard, "Severity level should be wildcarded")
+	assert.Equal(t, "Failed", merged.Tokens[6].Value, "Failed (first word) should be preserved")
+	assert.Equal(t, token.NotWildcard, merged.Tokens[6].Wildcard, "Failed should not be wildcarded (protected)")
+	assert.Equal(t, token.IsWildcard, merged.Tokens[8].Wildcard, "Last word should be wildcarded")
+}
+
+func TestMergeTokenLists_ProgressiveMerging(t *testing.T) {
+	// Test merging multiple TokenLists progressively
+	tl1 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "Request", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "123", token.PotentialWildcard),
+	})
+
+	tl2 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "Request", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "456", token.PotentialWildcard),
+	})
+
+	tl3 := token.NewTokenListWithTokens([]token.Token{
+		token.NewToken(token.TokenWord, "Request", token.NotWildcard),
+		token.NewToken(token.TokenWhitespace, " ", token.NotWildcard),
+		token.NewToken(token.TokenNumeric, "789", token.PotentialWildcard),
+	})
+
+	// Merge first two
+	merged12 := MergeTokenLists(tl1, tl2)
+	assert.NotNil(t, merged12)
+	assert.Equal(t, token.IsWildcard, merged12.Tokens[2].Wildcard)
+
+	// Merge result with third
+	merged123 := MergeTokenLists(merged12, tl3)
+	assert.NotNil(t, merged123)
+	assert.Equal(t, 3, merged123.Length())
+	assert.Equal(t, "Request", merged123.Tokens[0].Value)
+	// Wildcard token has empty value - the Wildcard field tracks status
+	assert.Equal(t, token.IsWildcard, merged123.Tokens[2].Wildcard)
+	assert.Equal(t, token.TokenNumeric, merged123.Tokens[2].Type)
+}
diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go
new file mode 100644
index 000000000000..788582678d8c
--- /dev/null
+++ b/pkg/logs/patterns/clustering/pattern.go
@@ -0,0 +1,149 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package clustering provides clustering functionality for grouping similar TokenLists
+// and identifying wildcard positions for pattern extraction.
+package clustering
+
+import (
+	"strings"
+	"time"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// Pattern represents a single pattern within a cluster.
+// A cluster with the same signature may contain multiple incompatible patterns
+// (e.g., different non-identical special characters that cannot merge).
+type Pattern struct {
+	Template  *token.TokenList // The pattern template with wildcards (matches proto "template")
+	Positions []int            // Token indices that are wildcards (matches proto "pos_list")
+	PatternID uint64           // Unique pattern ID (matches proto "pattern_id")
+	Sample    *token.TokenList // First log sample (for multi-pattern matching)
+	LogCount  int              // Total number of logs that matched this pattern
+
+	// Timestamp tracking for stateful encoding
+	CreatedAt time.Time // When pattern was first created
+	UpdatedAt time.Time // When pattern was last modified
+}
+
+// newPattern creates a new pattern from a single token list.
+func newPattern(tokenList *token.TokenList, patternID uint64) *Pattern {
+	now := time.Now()
+	return &Pattern{
+		Template:  tokenList, // First log becomes initial template
+		Positions: []int{},   // No wildcards yet
+		PatternID: patternID,
+		Sample:    tokenList, // Store first log as sample
+		LogCount:  1,         // First log
+		CreatedAt: now,
+		UpdatedAt: now,
+	}
+}
+
+// size returns the number of logs in this pattern.
+func (p *Pattern) size() int {
+	return p.LogCount
+}
+
+// GetPatternString returns the pattern template.
+// Pattern template has no wildcard placeholders and wildcard tokens are completely omitted
+func (p *Pattern) GetPatternString() string {
+	if p.Template == nil {
+		return ""
+	}
+
+	var parts []string
+	for _, tok := range p.Template.Tokens {
+		// Skip wildcard tokens entirely
+		if tok.Wildcard == token.IsWildcard {
+			continue
+		}
+		// Only use printable ASCII/UTF-8 characters in the template
+		cleaned := sanitizeForTemplate(tok.Value)
+		if cleaned != "" {
+			parts = append(parts, cleaned)
+		}
+	}
+	return strings.Join(parts, "")
+}
+
+// hasWildcards returns true if this pattern contains wildcard positions.
+func (p *Pattern) hasWildcards() bool {
+	return len(p.Positions) > 0
+}
+
+// GetWildcardCount returns the number of wildcard positions in this pattern.
+// This matches the ParamCount that will be sent in PatternDefine.
+func (p *Pattern) GetWildcardCount() int {
+	return len(p.Positions)
+}
+
+// GetWildcardCharPositions returns character indices where dynamic values should be injected.
+// The template does NOT contain wildcard placeholders - wildcards are omitted entirely.
+// Positions mark the injection points in the template string.
+// Example: Template "User  logged" (wildcard omitted) returns [5] (inject after "User ")
+func (p *Pattern) GetWildcardCharPositions() []int {
+	if p.Template == nil {
+		return nil
+	}
+
+	var charPositions []int
+	currentPos := 0
+
+	for _, tok := range p.Template.Tokens {
+		cleaned := sanitizeForTemplate(tok.Value)
+
+		if tok.Wildcard == token.IsWildcard {
+			// Mark the injection point (current position in template which excludes wildcards)
+			charPositions = append(charPositions, currentPos)
+			// Wildcard tokens are NOT in the template, so don't advance currentPos
+		} else if cleaned != "" {
+			// Add the length of the cleaned token value
+			currentPos += len(cleaned)
+		}
+	}
+
+	return charPositions
+}
+
+// GetWildcardValues extracts the wildcard values from a specific TokenList.
+func (p *Pattern) GetWildcardValues(tokenList *token.TokenList) []string {
+	if p.Template == nil || len(p.Positions) == 0 {
+		return []string{}
+	}
+
+	// Check if tokenList matches p.Template structure
+	templateMatches := merging.CanMergeTokenLists(p.Template, tokenList) || merging.CanMergeTokenLists(tokenList, p.Template)
+	if !templateMatches {
+		return nil
+	}
+
+	wildcardValues := make([]string, len(p.Positions))
+
+	for i, templatePos := range p.Positions {
+		if templatePos < tokenList.Length() {
+			wildcardValues[i] = tokenList.Tokens[templatePos].Value
+		} else {
+			wildcardValues[i] = ""
+		}
+	}
+
+	return wildcardValues
+}
+
+// sanitizeForTemplate removes non-printable characters from template strings
+func sanitizeForTemplate(s string) string {
+	runes := []rune(s)
+	result := make([]rune, 0, len(runes))
+	for _, r := range runes {
+		// Keep only printable characters (space and above, excluding DEL)
+		if r >= ' ' && r != 0x7F && r < 0xFFFD {
+			result = append(result, r)
+		}
+	}
+	return string(result)
+}
diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go
new file mode 100644
index 000000000000..474c72aa82b4
--- /dev/null
+++ b/pkg/logs/patterns/clustering/pattern_test.go
@@ -0,0 +1,432 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package clustering
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+)
+
+// Test-only helper functions
+
+func TestNewPattern(t *testing.T) {
+	// Create a simple token list
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard))
+
+	patternID := uint64(12345)
+	pattern := newPattern(tl, patternID)
+
+	assert.NotNil(t, pattern)
+	assert.Equal(t, patternID, pattern.PatternID)
+	assert.Equal(t, tl, pattern.Template, "Template should be the initial token list")
+	assert.Equal(t, tl, pattern.Sample, "Sample should be the initial token list")
+	assert.Equal(t, 1, pattern.LogCount, "LogCount should be 1 for first log")
+	assert.Equal(t, 0, len(pattern.Positions), "No wildcards initially")
+	assert.False(t, pattern.CreatedAt.IsZero(), "CreatedAt should be set")
+	assert.False(t, pattern.UpdatedAt.IsZero(), "UpdatedAt should be set")
+}
+
+func TestAddTokenList(t *testing.T) {
+	// Note: addTokenList() was inlined into Cluster.AddTokenListToPatterns()
+	// This test now verifies that LogCount and UpdatedAt can be modified directly
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+	initialLogCount := pattern.LogCount
+	initialUpdatedAt := pattern.UpdatedAt
+
+	// Simulate what cluster does when adding to existing pattern
+	time.Sleep(1 * time.Millisecond) // Ensure time difference
+	pattern.LogCount++
+	pattern.UpdatedAt = time.Now()
+
+	assert.Equal(t, initialLogCount+1, pattern.LogCount, "LogCount should increment")
+	assert.True(t, pattern.UpdatedAt.After(initialUpdatedAt), "UpdatedAt should be updated")
+}
+
+func TestSize(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+	assert.Equal(t, 1, pattern.size())
+
+	// Simulate adding more logs (what cluster does)
+	pattern.LogCount++
+	assert.Equal(t, 2, pattern.size())
+
+	pattern.LogCount++
+	assert.Equal(t, 3, pattern.size())
+}
+
+func TestGetPatternString_NoWildcards(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+	result := pattern.GetPatternString()
+
+	assert.Equal(t, "Service started", result)
+}
+
+func TestGetPatternString_WithWildcards(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+
+	pattern := newPattern(tl, 12345)
+	pattern.Positions = []int{2}
+	result := pattern.GetPatternString()
+
+	// Wildcard tokens are omitted from the template
+	assert.Equal(t, "Service ", result)
+}
+
+func TestGetPatternString_NilTemplate(t *testing.T) {
+	pattern := &Pattern{
+		Template: nil,
+	}
+	result := pattern.GetPatternString()
+
+	assert.Equal(t, "", result)
+}
+
+func TestHasWildcards(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+
+	// No wildcards initially
+	assert.False(t, pattern.hasWildcards())
+
+	// Add wildcard positions
+	pattern.Positions = []int{1, 3}
+	assert.True(t, pattern.hasWildcards())
+}
+
+func TestGetWildcardPositions(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+	pattern.Positions = []int{1, 3, 5}
+
+	assert.Equal(t, []int{1, 3, 5}, pattern.Positions)
+}
+
+// getParamCount returns the number of parameters/wildcards in a pattern.
+func getParamCount(p *Pattern) int {
+	return len(p.Positions)
+}
+
+func TestGetParamCount(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	pattern := newPattern(tl, 12345)
+
+	// No wildcards
+	assert.Equal(t, 0, getParamCount(pattern))
+
+	// Add wildcard positions
+	pattern.Positions = []int{1, 3, 5}
+	assert.Equal(t, 3, getParamCount(pattern))
+}
+
+func TestGetWildcardCharPositions(t *testing.T) {
+	// Create pattern: "Service " (wildcard omitted from template)
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+
+	pattern := newPattern(tl, 12345)
+	pattern.Positions = []int{2}
+
+	charPositions := pattern.GetWildcardCharPositions()
+	// "Service " = 8 chars, wildcard injection point is at position 8
+	assert.Equal(t, []int{8}, charPositions)
+}
+
+func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) {
+	// Create pattern: "Error  in " (both wildcards omitted from template)
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Error", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "code", token.IsWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "module", token.IsWildcard))
+
+	pattern := newPattern(tl, 12345)
+	pattern.Positions = []int{2, 6}
+
+	charPositions := pattern.GetWildcardCharPositions()
+	// Template is "Error  in " (wildcards omitted): "Error " (6 chars) + " in " (4 chars) = 10 chars
+	// First wildcard injection at position 6 (after "Error ")
+	// Second wildcard injection at position 10 (after "Error  in ")
+	assert.Equal(t, []int{6, 10}, charPositions)
+}
+
+func TestGetWildcardCharPositions_NilTemplate(t *testing.T) {
+	pattern := &Pattern{
+		Template: nil,
+	}
+
+	charPositions := pattern.GetWildcardCharPositions()
+	assert.Nil(t, charPositions)
+}
+
+func TestGetWildcardValues(t *testing.T) {
+	// Create sample log: "Service started"
+	sample := token.NewTokenList()
+	sample.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	sample.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	sample.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard))
+
+	// Create template with wildcard: "Service *"
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+
+	pattern := newPattern(sample, 12345)
+	pattern.Template = tl
+	pattern.Positions = []int{2}
+
+	values := pattern.GetWildcardValues(sample)
+	assert.Equal(t, []string{"started"}, values)
+}
+
+func TestGetWildcardValues_NilTemplate(t *testing.T) {
+	sample := token.NewTokenList()
+	sample.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	pattern := newPattern(sample, 12345)
+	pattern.Template = nil
+
+	values := pattern.GetWildcardValues(sample)
+	assert.Empty(t, values)
+}
+
+func TestGetWildcardValues_NilSample(t *testing.T) {
+	tl := token.NewTokenList()
+	tl.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard))
+
+	pattern := newPattern(tl, 12345)
+	pattern.Sample = nil
+	pattern.Positions = []int{0}
+
+	// Test with the template itself since sample is nil
+	values := pattern.GetWildcardValues(tl)
+	assert.Equal(t, []string{"Test"}, values)
+}
+
+func TestExtractWildcardValues(t *testing.T) {
+	// Create template: "Service *"
+	template := token.NewTokenList()
+	template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+
+	pattern := newPattern(template, 12345)
+	pattern.Template = template
+	pattern.Positions = []int{2}
+
+	// Create incoming log: "Service crashed"
+	incoming := token.NewTokenList()
+	incoming.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, "crashed", token.PotentialWildcard))
+
+	values := pattern.GetWildcardValues(incoming)
+	assert.Equal(t, []string{"crashed"}, values)
+}
+
+func TestExtractWildcardValues_MultipleWildcards(t *testing.T) {
+	// Create template: "* in * at *"
+	template := token.NewTokenList()
+	template.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value3", token.IsWildcard))
+
+	pattern := newPattern(template, 12345)
+	pattern.Template = template
+	pattern.Positions = []int{0, 4, 8}
+
+	// Create incoming log: "Error in module at line"
+	incoming := token.NewTokenList()
+	incoming.Add(token.NewToken(token.TokenWord, "Error", token.PotentialWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, "module", token.PotentialWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	incoming.Add(token.NewToken(token.TokenWord, "line", token.PotentialWildcard))
+
+	values := pattern.GetWildcardValues(incoming)
+	assert.Equal(t, []string{"Error", "module", "line"}, values)
+}
+
+func TestExtractWildcardValues_NilTemplate(t *testing.T) {
+	pattern := &Pattern{
+		Template:  nil,
+		Positions: []int{0},
+	}
+
+	incoming := token.NewTokenList()
+	incoming.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard))
+
+	values := pattern.GetWildcardValues(incoming)
+	assert.Equal(t, []string{}, values)
+}
+
+func TestExtractWildcardValues_NoPositions(t *testing.T) {
+	template := token.NewTokenList()
+	template.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard))
+
+	pattern := newPattern(template, 12345)
+	pattern.Positions = []int{} // No wildcards
+
+	incoming := token.NewTokenList()
+	incoming.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard))
+
+	values := pattern.GetWildcardValues(incoming)
+	assert.Equal(t, []string{}, values)
+}
+
+func TestExtractWildcardValues_PositionOutOfBounds(t *testing.T) {
+	template := token.NewTokenList()
+	template.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard))
+
+	pattern := newPattern(template, 12345)
+	pattern.Positions = []int{0, 5} // Position 5 is out of bounds
+
+	incoming := token.NewTokenList()
+	incoming.Add(token.NewToken(token.TokenWord, "Value", token.PotentialWildcard))
+
+	values := pattern.GetWildcardValues(incoming)
+	// CRITICAL: Must return same length as Positions to match ParamCount
+	// Out-of-bounds positions are filled with empty strings
+	assert.Equal(t, []string{"Value", ""}, values, "Should maintain Positions length with empty strings for out-of-bounds")
+}
+
+func TestSanitizeForTemplate_PrintableChars(t *testing.T) {
+	input := "Hello World 123"
+	result := sanitizeForTemplate(input)
+	assert.Equal(t, "Hello World 123", result)
+}
+
+func TestSanitizeForTemplate_NonPrintableChars(t *testing.T) {
+	// Include null byte, bell, backspace
+	input := "Hello\x00\x07\x08World"
+	result := sanitizeForTemplate(input)
+	assert.Equal(t, "HelloWorld", result, "Non-printable characters should be removed")
+}
+
+func TestSanitizeForTemplate_DELCharacter(t *testing.T) {
+	input := "Hello\x7FWorld"
+	result := sanitizeForTemplate(input)
+	assert.Equal(t, "HelloWorld", result, "DEL character should be removed")
+}
+
+func TestSanitizeForTemplate_SpecialChars(t *testing.T) {
+	input := "Service: Error! @user #tag"
+	result := sanitizeForTemplate(input)
+	assert.Equal(t, "Service: Error! @user #tag", result, "Special chars should be kept")
+}
+
+func TestSanitizeForTemplate_EmptyString(t *testing.T) {
+	input := ""
+	result := sanitizeForTemplate(input)
+	assert.Equal(t, "", result)
+}
+
+func TestSanitizeForTemplate_UnicodeChars(t *testing.T) {
+	input := "Hello 世界 🌍"
+	result := sanitizeForTemplate(input)
+	// Emoji (🌍) is above 0xFFFD and gets filtered out by sanitizeForTemplate
+	// CJK characters (世界) are within the acceptable range
+	assert.Equal(t, "Hello 世界 ", result, "CJK chars preserved, emoji filtered")
+}
+
+func TestPattern_IntegrationScenario(t *testing.T) {
+	// Simulate a realistic pattern lifecycle
+
+	// 1. First log arrives
+	log1 := token.NewTokenList()
+	log1.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard))
+	log1.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard))
+	log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log1.Add(token.NewToken(token.TokenWord, "Database", token.PotentialWildcard))
+	log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log1.Add(token.NewToken(token.TokenWord, "connection", token.PotentialWildcard))
+	log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log1.Add(token.NewToken(token.TokenWord, "failed", token.PotentialWildcard))
+
+	pattern := newPattern(log1, 9999)
+
+	assert.Equal(t, 1, pattern.LogCount)
+	assert.False(t, pattern.hasWildcards())
+	assert.Equal(t, "ERROR: Database connection failed", pattern.GetPatternString())
+
+	// 2. Pattern updated with wildcards (simulated)
+	template := token.NewTokenList()
+	template.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+	template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard))
+
+	pattern.Template = template
+	pattern.Positions = []int{3, 5, 7}
+	pattern.LogCount++ // Simulate second log being added
+	pattern.UpdatedAt = time.Now()
+
+	assert.Equal(t, 2, pattern.LogCount)
+	assert.True(t, pattern.hasWildcards())
+	assert.Equal(t, 3, getParamCount(pattern))
+	// Wildcard tokens are omitted from template, leaving: "ERROR: " + " " + " " = "ERROR:   "
+	assert.Equal(t, "ERROR:   ", pattern.GetPatternString())
+
+	// 3. Extract wildcard values from new log
+	log2 := token.NewTokenList()
+	log2.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard))
+	log2.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard))
+	log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log2.Add(token.NewToken(token.TokenWord, "Network", token.PotentialWildcard))
+	log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log2.Add(token.NewToken(token.TokenWord, "timeout", token.PotentialWildcard))
+	log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard))
+	log2.Add(token.NewToken(token.TokenWord, "reached", token.PotentialWildcard))
+
+	values := pattern.GetWildcardValues(log2)
+	assert.Equal(t, []string{"Network", "timeout", "reached"}, values)
+}
diff --git a/pkg/logs/patterns/token/signature.go b/pkg/logs/patterns/token/signature.go
new file mode 100644
index 000000000000..7f8411d39b56
--- /dev/null
+++ b/pkg/logs/patterns/token/signature.go
@@ -0,0 +1,95 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package token provides data structures and utilities for tokenizing log messages.
+package token
+
+import (
+	"fmt"
+	"hash/fnv"
+	"strings"
+)
+
+// Signature represents a structural signature of a TokenList
+type Signature struct {
+	Position string
+	Length   int
+	Hash     uint64
+}
+
+// NewSignature creates a signature from a TokenList
+func NewSignature(tl *TokenList) Signature {
+	if tl.IsEmpty() {
+		return Signature{
+			Position: "",
+			Length:   0,
+			Hash:     0,
+		}
+	}
+
+	position := positionSignature(tl)
+
+	// Include first word token value in signature if it exists
+	// This prevents messages with different first words but similar signature from being in the same cluster
+	// eg: I love burger vs You love burger
+	if len(tl.Tokens) > 0 && tl.Tokens[0].Type == TokenWord {
+		firstWordValue := tl.Tokens[0].Value
+		position = firstWordValue + position
+	}
+
+	hash := computeHash(position)
+	return Signature{
+		Position: position,
+		Length:   len(tl.Tokens),
+		Hash:     hash,
+	}
+}
+
+// Equals checks if two signatures are identical
+func (s *Signature) Equals(other Signature) bool {
+	return s.Position == other.Position &&
+		s.Length == other.Length
+}
+
+// computeHash generates a hash for the signature
+func computeHash(input string) uint64 {
+	hash := fnv.New64a()
+	hash.Write([]byte(input))
+	return hash.Sum64()
+}
+
+// String returns a string representation of the signature
+func (s *Signature) String() string {
+	return fmt.Sprintf("Sig{pos:%s, len:%d, hash:%x}",
+		s.Position, s.Length, s.Hash)
+}
+
+// IsEmpty returns true if the signature represents an empty TokenList
+func (s *Signature) IsEmpty() bool {
+	return s.Length == 0
+}
+
+// HasSameStructure checks if two signatures have the same positional structure
+func (s *Signature) HasSameStructure(other Signature) bool {
+	return s.Position == other.Position && s.Length == other.Length
+}
+
+// GetHashBucket returns the hash bucket for efficient clustering
+func (s *Signature) GetHashBucket() uint64 {
+	return s.Hash
+}
+
+// positionSignature generates position-based signature
+func positionSignature(tl *TokenList) string {
+	if tl.IsEmpty() {
+		return ""
+	}
+
+	var positionParts []string
+	for _, token := range tl.Tokens {
+		positionParts = append(positionParts, token.Type.String())
+	}
+	return strings.Join(positionParts, "|")
+}
diff --git a/pkg/logs/patterns/token/signature_test.go b/pkg/logs/patterns/token/signature_test.go
new file mode 100644
index 000000000000..885062fa90c4
--- /dev/null
+++ b/pkg/logs/patterns/token/signature_test.go
@@ -0,0 +1,241 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package token
+
+import (
+	"testing"
+)
+
+func TestNewSignature(t *testing.T) {
+	// Empty TokenList
+	emptyTL := NewTokenList()
+	emptySig := NewSignature(emptyTL)
+	if emptySig.Position != "" || emptySig.Length != 0 || emptySig.Hash != 0 {
+		t.Error("Empty TokenList should have empty signature")
+	}
+
+	// Non-empty TokenList
+	tokens := []Token{
+		{Type: TokenHTTPMethod, Value: "GET"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/api"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenHTTPStatus, Value: "200"},
+	}
+	tl := NewTokenListWithTokens(tokens)
+	sig := NewSignature(tl)
+
+	expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus"
+	if sig.Position != expectedPosition {
+		t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, sig.Position)
+	}
+
+	if sig.Length != 5 {
+		t.Errorf("Expected length 5, got %d", sig.Length)
+	}
+
+	if sig.Hash == 0 {
+		t.Error("Hash should not be 0 for non-empty TokenList")
+	}
+}
+
+func TestSignature_Equals(t *testing.T) {
+	// Test 1: Same structure, SAME first word, different other values → EQUAL signatures
+	tokens1 := []Token{
+		{Type: TokenWord, Value: "hello"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenWord, Value: "world"},
+	}
+	tokens2 := []Token{
+		{Type: TokenWord, Value: "hello"}, // Same first word!
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenWord, Value: "universe"}, // Different second word
+	}
+	tl1 := NewTokenListWithTokens(tokens1)
+	tl2 := NewTokenListWithTokens(tokens2)
+	sig1 := NewSignature(tl1)
+	sig2 := NewSignature(tl2)
+
+	if !sig1.Equals(sig2) {
+		t.Error("TokenLists with same first word and structure should have equal signatures")
+	}
+
+	// Test 2: Same structure, DIFFERENT first word → DIFFERENT signatures
+	tokens3 := []Token{
+		{Type: TokenWord, Value: "goodbye"}, // Different first word
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenWord, Value: "world"},
+	}
+	tl3 := NewTokenListWithTokens(tokens3)
+	sig3 := NewSignature(tl3)
+
+	if sig1.Equals(sig3) {
+		t.Error("TokenLists with different first word should NOT have equal signatures")
+	}
+
+	// Test 3: Different structure (different types) → DIFFERENT signatures
+	tokens4 := []Token{
+		{Type: TokenWord, Value: "hello"},
+		{Type: TokenNumeric, Value: "123"}, // Different type
+	}
+	tl4 := NewTokenListWithTokens(tokens4)
+	sig4 := NewSignature(tl4)
+
+	if sig1.Equals(sig4) {
+		t.Error("TokenLists with different structure should not have equal signatures")
+	}
+
+	// Test 4: Signature equality with itself
+	if !sig1.Equals(sig1) {
+		t.Error("Signature should equal itself")
+	}
+}
+
+func TestSignature_String(t *testing.T) {
+	tokens := []Token{
+		{Type: TokenWord, Value: "test"},
+	}
+	tl := NewTokenListWithTokens(tokens)
+	sig := NewSignature(tl)
+
+	str := sig.String()
+	if str == "" {
+		t.Error("Signature string should not be empty")
+	}
+
+	// Should contain key components
+	if !containsAll(str, []string{"pos:", "len:", "hash:"}) {
+		t.Errorf("Signature string should contain all components, got: %s", str)
+	}
+}
+
+func TestSignature_IsEmpty(t *testing.T) {
+	// Empty signature
+	emptyTL := NewTokenList()
+	emptySig := NewSignature(emptyTL)
+	if !emptySig.IsEmpty() {
+		t.Error("Empty signature should return true for IsEmpty()")
+	}
+
+	// Non-empty signature
+	tokens := []Token{{Type: TokenWord, Value: "test"}}
+	tl := NewTokenListWithTokens(tokens)
+	sig := NewSignature(tl)
+	if sig.IsEmpty() {
+		t.Error("Non-empty signature should return false for IsEmpty()")
+	}
+}
+
+func TestSignature_HasSameStructure(t *testing.T) {
+	// Same structure, different values
+	tokens1 := []Token{
+		{Type: TokenHTTPMethod, Value: "GET"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/api"},
+	}
+	tokens2 := []Token{
+		{Type: TokenHTTPMethod, Value: "POST"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/users"},
+	}
+
+	tl1 := NewTokenListWithTokens(tokens1)
+	tl2 := NewTokenListWithTokens(tokens2)
+	sig1 := NewSignature(tl1)
+	sig2 := NewSignature(tl2)
+
+	if !sig1.HasSameStructure(sig2) {
+		t.Error("Signatures with same structure should return true")
+	}
+
+	// Different structure
+	tokens3 := []Token{
+		{Type: TokenWord, Value: "different"},
+		{Type: TokenNumeric, Value: "123"},
+	}
+	tl3 := NewTokenListWithTokens(tokens3)
+	sig3 := NewSignature(tl3)
+
+	if sig1.HasSameStructure(sig3) {
+		t.Error("Signatures with different structure should return false")
+	}
+}
+
+func TestSignature_GetHashBucket(t *testing.T) {
+	tokens := []Token{
+		{Type: TokenWord, Value: "test"},
+	}
+	tl := NewTokenListWithTokens(tokens)
+	sig := NewSignature(tl)
+
+	hashBucket := sig.GetHashBucket()
+	if hashBucket != sig.Hash {
+		t.Error("GetHashBucket should return the signature hash")
+	}
+	if hashBucket == 0 {
+		t.Error("Hash bucket should not be 0 for non-empty signature")
+	}
+}
+
+func TestComputeHash(t *testing.T) {
+	// Test that same input produces same hash
+	input1 := "test input"
+	input2 := "test input"
+	input3 := "different input"
+
+	hash1 := computeHash(input1)
+	hash2 := computeHash(input2)
+	hash3 := computeHash(input3)
+
+	if hash1 != hash2 {
+		t.Error("Same input should produce same hash")
+	}
+	if hash1 == hash3 {
+		t.Error("Different input should produce different hash (very likely)")
+	}
+	if hash1 == 0 {
+		t.Error("Hash should not be 0")
+	}
+}
+
+func TestSignature_ConsistentHashing(t *testing.T) {
+	// Test that identical TokenLists produce identical signatures with same hash
+	tokens := []Token{
+		{Type: TokenHTTPMethod, Value: "GET"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/api"},
+	}
+
+	tl1 := NewTokenListWithTokens(tokens)
+	tl2 := NewTokenListWithTokens(tokens)
+
+	sig1 := NewSignature(tl1)
+	sig2 := NewSignature(tl2)
+
+	if sig1.Hash != sig2.Hash {
+		t.Error("Identical TokenLists should produce identical signature hashes")
+	}
+	if !sig1.Equals(sig2) {
+		t.Error("Identical TokenLists should produce equal signatures")
+	}
+}
+
+// Helper function to check if string contains all substrings
+func containsAll(str string, substrings []string) bool {
+	for _, substr := range substrings {
+		found := false
+		for i := 0; i <= len(str)-len(substr); i++ {
+			if str[i:i+len(substr)] == substr {
+				found = true
+				break
+			}
+		}
+		if !found {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go
new file mode 100644
index 000000000000..89b0dcdea904
--- /dev/null
+++ b/pkg/logs/patterns/token/token.go
@@ -0,0 +1,131 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package token provides data structures and utilities for tokenizing log messages.
+package token
+
+import (
+	"fmt"
+)
+
+//go:generate stringer -type=TokenType -trimprefix=Token
+
+// TokenType.String() method is auto-generated by stringer
+// Run: go generate ./pkg/logs/patterns/token to regenerate the stringer file if you make changes to the TokenType enum
+
+// TokenType represents the type of a token
+type TokenType int
+
+const (
+	// Basic token types
+	TokenUnknown    TokenType = iota // TokenUnknown is the unknown token type
+	TokenWord                        // TokenWord is the word token type
+	TokenNumeric                     // TokenNumeric is the numeric token type
+	TokenWhitespace                  // TokenWhitespace is the whitespace token type
+
+	// Network-related tokens
+	TokenIPv4         // TokenIPv4 is the IPv4 token type
+	TokenIPv6         // TokenIPv6 is the IPv6 token type
+	TokenEmail        // TokenEmail is the email token type
+	TokenURI          // TokenURI is the URI token type
+	TokenAbsolutePath // TokenAbsolutePath is the absolute path token type
+
+	// HTTP-related tokens
+	TokenHTTPMethod // TokenHTTPMethod is the HTTP method token type
+	TokenHTTPStatus // TokenHTTPStatus is the HTTP status token type
+
+	// Log-related tokens
+	TokenSeverityLevel // TokenSeverityLevel is the severity level token type
+	TokenDate          // TokenDate is the date token type
+)
+
+// WildcardStatus describes a token's potential to become a wildcard
+type WildcardStatus int
+
+const (
+	// NotWildcard - This token cannot become a wildcard
+	// Examples: whitespace or first word token
+	NotWildcard WildcardStatus = iota
+
+	// PotentialWildcard - This token can become a wildcard
+	// Examples: all non white space tokens
+	PotentialWildcard
+
+	// IsWildcard - This token is already a wildcard
+	IsWildcard
+)
+
+// MergeResult describes the result of comparing two tokens
+type MergeResult int
+
+const (
+	// Conflict - Tokens cannot merge, abort pattern creation
+	// Examples: different types, words with different values
+	Conflict MergeResult = iota
+
+	// Identical - Tokens are the same, keep as-is
+	// Examples: "Error" vs "Error", wildcard vs any value of same type
+	Identical
+
+	// Wildcard - Tokens can merge into wildcard
+	// Examples: "connection" vs "replication", "user123" vs "admin456", "GET" vs "POST"
+	Wildcard
+)
+
+// Token represents a single token in a log message
+type Token struct {
+	Type     TokenType
+	Value    string
+	Wildcard WildcardStatus
+}
+
+// NewToken creates a token with the specified wildcard status
+func NewToken(tokenType TokenType, value string, wildcard WildcardStatus) Token {
+	return Token{
+		Type:     tokenType,
+		Value:    value,
+		Wildcard: wildcard,
+	}
+}
+
+// String returns a string representation of the token
+func (t *Token) String() string {
+	return fmt.Sprintf("%s(%s)", t.Type, t.Value)
+}
+
+// Compare checks if two tokens can merge
+func (t *Token) Compare(t2 *Token) MergeResult {
+	// Different types cannot merge
+	if t.Type != t2.Type {
+		return Conflict
+	}
+
+	// Same type same value - check this first before type-specific logic
+	if t.Value == t2.Value {
+		return Identical
+	}
+
+	// t is wildcard - matches any value of same type
+	if t.Wildcard == IsWildcard {
+		return Identical
+	}
+
+	// Whitespace never wildcards (structural)
+	if t.Type == TokenWhitespace {
+		return Conflict
+	}
+
+	// Words only wildcard if both are PotentialWildcard
+	if t.Type == TokenWord {
+		if t.Wildcard == PotentialWildcard && t2.Wildcard == PotentialWildcard {
+			return Wildcard
+		}
+		return Conflict
+	}
+
+	// Structured types (HTTP, IP, Numeric, Date, etc.) wildcard if same type
+	// Same TokenDate type means same format structure (e.g., both RFC3339)
+	return Wildcard
+}
diff --git a/pkg/logs/patterns/token/token_test.go b/pkg/logs/patterns/token/token_test.go
new file mode 100644
index 000000000000..b9a81b68479e
--- /dev/null
+++ b/pkg/logs/patterns/token/token_test.go
@@ -0,0 +1,103 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package token
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewToken(t *testing.T) {
+	token := NewToken(TokenWord, "test", PotentialWildcard)
+
+	assert.Equal(t, TokenWord, token.Type, "Expected TokenWord")
+	assert.Equal(t, "test", token.Value, "Expected 'test'")
+	assert.Equal(t, PotentialWildcard, token.Wildcard, "Expected PotentialWildcard")
+}
+
+func TestToken_Compare_DifferentTypes(t *testing.T) {
+	word := NewToken(TokenWord, "hello", PotentialWildcard)
+	number := NewToken(TokenNumeric, "123", PotentialWildcard)
+
+	result := word.Compare(&number)
+	assert.Equal(t, Conflict, result, "Different types should return Conflict")
+}
+
+func TestToken_Compare_SameValue(t *testing.T) {
+	token1 := NewToken(TokenWord, "hello", PotentialWildcard)
+	token2 := NewToken(TokenWord, "hello", PotentialWildcard)
+
+	result := token1.Compare(&token2)
+	assert.Equal(t, Identical, result, "Same values should return Identical")
+}
+
+func TestToken_Compare_WildcardMatches(t *testing.T) {
+	wildcard := NewToken(TokenWord, "anything", IsWildcard)
+	concrete := NewToken(TokenWord, "hello", PotentialWildcard)
+
+	result := wildcard.Compare(&concrete)
+	assert.Equal(t, Identical, result, "Wildcard should match any value of same type")
+}
+
+func TestToken_Compare_WhitespaceConflict(t *testing.T) {
+	space1 := NewToken(TokenWhitespace, " ", NotWildcard)
+	space2 := NewToken(TokenWhitespace, "  ", NotWildcard)
+
+	result := space1.Compare(&space2)
+	assert.Equal(t, Conflict, result, "Different whitespace should return Conflict")
+}
+
+func TestToken_Compare_WordsWithDifferentValues(t *testing.T) {
+	// Both PotentialWildcard - should merge to wildcard
+	word1 := NewToken(TokenWord, "hello", PotentialWildcard)
+	word2 := NewToken(TokenWord, "world", PotentialWildcard)
+
+	result := word1.Compare(&word2)
+	assert.Equal(t, Wildcard, result, "Different PotentialWildcard words should return Wildcard")
+
+	// One is NotWildcard - should conflict
+	word3 := NewToken(TokenWord, "INFO", NotWildcard)
+	word4 := NewToken(TokenWord, "ERROR", PotentialWildcard)
+
+	result2 := word3.Compare(&word4)
+	assert.Equal(t, Conflict, result2, "Words with NotWildcard should return Conflict")
+}
+
+func TestToken_Compare_StructuredTypes(t *testing.T) {
+	// Different IPs should merge to wildcard
+	ip1 := NewToken(TokenIPv4, "192.168.1.1", PotentialWildcard)
+	ip2 := NewToken(TokenIPv4, "10.0.0.1", PotentialWildcard)
+
+	result := ip1.Compare(&ip2)
+	assert.Equal(t, Wildcard, result, "Different structured types (same type) should return Wildcard")
+
+	// Different numbers should merge to wildcard
+	num1 := NewToken(TokenNumeric, "123", PotentialWildcard)
+	num2 := NewToken(TokenNumeric, "456", PotentialWildcard)
+
+	result2 := num1.Compare(&num2)
+	assert.Equal(t, Wildcard, result2, "Different numeric values should return Wildcard")
+
+	// Different dates should merge to wildcard
+	date1 := NewToken(TokenDate, "2023-01-01", PotentialWildcard)
+	date2 := NewToken(TokenDate, "2023-12-31", PotentialWildcard)
+
+	result3 := date1.Compare(&date2)
+	assert.Equal(t, Wildcard, result3, "Different dates should return Wildcard")
+}
+
+func TestToken_String(t *testing.T) {
+	// Regular token
+	token := Token{Type: TokenWord, Value: "hello"}
+	expected := "Word(hello)"
+	assert.Equal(t, expected, token.String(), "Token String() should format correctly")
+
+	// Wildcard token - still shows the value, not "*"
+	wildcardToken := Token{Type: TokenWord, Value: "test", Wildcard: IsWildcard}
+	expectedWildcard := "Word(test)"
+	assert.Equal(t, expectedWildcard, wildcardToken.String(), "Wildcard token String() should show value")
+}
diff --git a/pkg/logs/patterns/token/tokenlist.go b/pkg/logs/patterns/token/tokenlist.go
new file mode 100644
index 000000000000..8cdbfa915fe7
--- /dev/null
+++ b/pkg/logs/patterns/token/tokenlist.go
@@ -0,0 +1,59 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package token provides data structures and utilities for tokenizing log messages.
+package token
+
+import (
+	"strings"
+)
+
+// TokenList represents a sequence of tokens
+type TokenList struct {
+	Tokens []Token
+}
+
+// NewTokenList creates a new empty TokenList
+func NewTokenList() *TokenList {
+	return &TokenList{Tokens: make([]Token, 0)}
+}
+
+// NewTokenListWithTokens creates a new TokenList with the provided tokens
+func NewTokenListWithTokens(tokens []Token) *TokenList {
+	return &TokenList{Tokens: tokens}
+}
+
+// Add appends one or more tokens to the list
+func (tl *TokenList) Add(tokens ...Token) {
+	tl.Tokens = append(tl.Tokens, tokens...)
+}
+
+// AddToken creates and adds a new token with the given type and value
+func (tl *TokenList) AddToken(tokenType TokenType, value string, wildcard WildcardStatus) {
+	tl.Tokens = append(tl.Tokens, NewToken(tokenType, value, wildcard))
+}
+
+// Length returns the number of tokens
+func (tl *TokenList) Length() int {
+	return len(tl.Tokens)
+}
+
+// IsEmpty returns true if the list is empty
+func (tl *TokenList) IsEmpty() bool {
+	return len(tl.Tokens) == 0
+}
+
+// String returns a string representation
+func (tl *TokenList) String() string {
+	if tl.IsEmpty() {
+		return "[]"
+	}
+
+	var parts []string
+	for _, token := range tl.Tokens {
+		parts = append(parts, token.String())
+	}
+	return "[" + strings.Join(parts, ", ") + "]"
+}
diff --git a/pkg/logs/patterns/token/tokenlist_test.go b/pkg/logs/patterns/token/tokenlist_test.go
new file mode 100644
index 000000000000..8a6b571f6f3d
--- /dev/null
+++ b/pkg/logs/patterns/token/tokenlist_test.go
@@ -0,0 +1,115 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package token
+
+import (
+	"testing"
+)
+
+func TestTokenList_NewTokenList(t *testing.T) {
+	// Empty token list
+	tl := NewTokenList()
+	if tl == nil {
+		t.Fatal("NewTokenList should not return nil")
+	}
+	if !tl.IsEmpty() {
+		t.Error("New TokenList should be empty")
+	}
+	if tl.Length() != 0 {
+		t.Errorf("New TokenList should have length 0, got %d", tl.Length())
+	}
+
+	// Token list with initial tokens
+	tokens := []Token{
+		{Type: TokenWord, Value: "hello"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenWord, Value: "world"},
+	}
+	tl2 := NewTokenListWithTokens(tokens)
+	if tl2.Length() != 3 {
+		t.Errorf("Expected length 3, got %d", tl2.Length())
+	}
+	if tl2.IsEmpty() {
+		t.Error("TokenList with tokens should not be empty")
+	}
+}
+
+func TestTokenList_Add(t *testing.T) {
+	tl := NewTokenList()
+
+	token1 := Token{Type: TokenWord, Value: "hello"}
+	tl.Add(token1)
+
+	if tl.Length() != 1 {
+		t.Errorf("Expected length 1, got %d", tl.Length())
+	}
+	if tl.IsEmpty() {
+		t.Error("TokenList should not be empty after adding token")
+	}
+	if tl.Tokens[0].Value != "hello" {
+		t.Errorf("Expected token value 'hello', got '%s'", tl.Tokens[0].Value)
+	}
+}
+
+func TestTokenList_String(t *testing.T) {
+	// Empty list
+	tl := NewTokenList()
+	if tl.String() != "[]" {
+		t.Errorf("Empty TokenList string should be '[]', got '%s'", tl.String())
+	}
+
+	// Non-empty list
+	tl.Add(Token{Type: TokenWord, Value: "hello"})
+	tl.Add(Token{Type: TokenWhitespace, Value: " "})
+	tl.Add(Token{Type: TokenWord, Value: "world"})
+
+	expected := "[Word(hello), Whitespace( ), Word(world)]"
+	if tl.String() != expected {
+		t.Errorf("Expected '%s', got '%s'", expected, tl.String())
+	}
+}
+
+func TestTokenList_PositionSignature(t *testing.T) {
+	// Empty token list
+	emptyTL := NewTokenList()
+	if positionSignature(emptyTL) != "" {
+		t.Error("Empty TokenList should have empty position signature")
+	}
+
+	// Non-empty token list
+	tokens := []Token{
+		{Type: TokenHTTPMethod, Value: "GET"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/api"},
+	}
+	tl := NewTokenListWithTokens(tokens)
+
+	expectedPosition := "HTTPMethod|Whitespace|AbsolutePath"
+	if positionSignature(tl) != expectedPosition {
+		t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, positionSignature(tl))
+	}
+}
+
+func TestTokenList_Signature(t *testing.T) {
+	// Test that TokenList.Signature() creates a proper signature
+	tokens := []Token{
+		{Type: TokenHTTPMethod, Value: "GET"},
+		{Type: TokenWhitespace, Value: " "},
+		{Type: TokenAbsolutePath, Value: "/api"},
+	}
+	tl := NewTokenListWithTokens(tokens)
+	sig := NewSignature(tl)
+
+	if sig.Length != 3 {
+		t.Errorf("Expected signature length 3, got %d", sig.Length)
+	}
+	if sig.Hash == 0 {
+		t.Error("Signature hash should not be 0")
+	}
+	if sig.Position == "" {
+		t.Error("Signature position should not be empty")
+	}
+}
diff --git a/pkg/logs/patterns/token/tokentype_string.go b/pkg/logs/patterns/token/tokentype_string.go
new file mode 100644
index 000000000000..91b756894d27
--- /dev/null
+++ b/pkg/logs/patterns/token/tokentype_string.go
@@ -0,0 +1,36 @@
+// Code generated by "stringer -type=TokenType -trimprefix=Token"; DO NOT EDIT.
+
+package token
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[TokenUnknown-0]
+	_ = x[TokenWord-1]
+	_ = x[TokenNumeric-2]
+	_ = x[TokenWhitespace-3]
+	_ = x[TokenIPv4-4]
+	_ = x[TokenIPv6-5]
+	_ = x[TokenEmail-6]
+	_ = x[TokenURI-7]
+	_ = x[TokenAbsolutePath-8]
+	_ = x[TokenHTTPMethod-9]
+	_ = x[TokenHTTPStatus-10]
+	_ = x[TokenSeverityLevel-11]
+	_ = x[TokenDate-12]
+}
+
+const _TokenType_name = "UnknownWordNumericWhitespaceIPv4IPv6EmailURIAbsolutePathHTTPMethodHTTPStatusSeverityLevelDate"
+
+var _TokenType_index = [...]uint8{0, 7, 11, 18, 28, 32, 36, 41, 44, 56, 66, 76, 89, 93}
+
+func (i TokenType) String() string {
+	idx := int(i) - 0
+	if i < 0 || idx >= len(_TokenType_index)-1 {
+		return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _TokenType_name[_TokenType_index[idx]:_TokenType_index[idx+1]]
+}
diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go
index 499209b0d313..9f36a896cc28 100644
--- a/pkg/logs/pipeline/pipeline.go
+++ b/pkg/logs/pipeline/pipeline.go
@@ -20,7 +20,9 @@ import (
 	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
 	"github.com/DataDog/datadog-agent/pkg/logs/processor"
 	"github.com/DataDog/datadog-agent/pkg/logs/sender"
+	grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"
 	compressioncommon "github.com/DataDog/datadog-agent/pkg/util/compression"
+	"github.com/DataDog/datadog-agent/pkg/util/log"
 )
 
 // Pipeline processes and sends messages to the backend
@@ -54,6 +56,10 @@ func NewPipeline(
 		} else {
 			encoder = processor.JSONServerlessInitEncoder
 		}
+	} else if endpoints.UseGRPC {
+		// Throwaway code to test with existing pipelines
+		// TODO change to real encoder once State component is ready
+		encoder = grpcsender.MockEncoder
 	} else if endpoints.UseHTTP {
 		encoder = processor.JSONEncoder
 	} else if endpoints.UseProto {
@@ -105,13 +111,20 @@ func getStrategy(
 	compressor logscompression.Component,
 	instanceID string,
 ) sender.Strategy {
-	if endpoints.UseHTTP || serverlessMeta.IsEnabled() {
+	if endpoints.UseGRPC || endpoints.UseHTTP || serverlessMeta.IsEnabled() {
 		var encoder compressioncommon.Compressor
 		encoder = compressor.NewCompressor(compressioncommon.NoneKind, 0)
 		if endpoints.Main.UseCompression {
 			encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel)
 		}
+		if endpoints.UseGRPC {
+			translator := grpcsender.NewMessageTranslator()
+			// TODO: Consider sharing cluster manager across pipelines for better pattern clustering:
+			// translator := grpcsender.NewMessageTranslator(getSharedClusterManager())
+			statefulInputChan := translator.Start(inputChan, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size"))
 
+			return grpcsender.NewBatchStrategy(statefulInputChan, outputChan, flushChan, endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID)
+		}
 		return sender.NewBatchStrategy(
 			inputChan,
 			outputChan,
@@ -125,5 +138,7 @@ func getStrategy(
 			pipelineMonitor,
 			instanceID)
 	}
+
+	log.Infof("Pipeline: Using StreamStrategy (default)")
 	return sender.NewStreamStrategy(inputChan, outputChan, compressor.NewCompressor(compressioncommon.NoneKind, 0))
 }
diff --git a/pkg/logs/pipeline/provider.go b/pkg/logs/pipeline/provider.go
index 9737f8a5c007..ad8c3002b800 100644
--- a/pkg/logs/pipeline/provider.go
+++ b/pkg/logs/pipeline/provider.go
@@ -22,6 +22,7 @@ import (
 	"github.com/DataDog/datadog-agent/pkg/logs/message"
 	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
 	"github.com/DataDog/datadog-agent/pkg/logs/sender"
+	grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"
 	httpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/http"
 	tcpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/tcp"
 	"github.com/DataDog/datadog-agent/pkg/logs/status/statusinterface"
@@ -88,7 +89,9 @@ func NewProvider(
 	var senderImpl sender.PipelineComponent
 	serverlessMeta := sender.NewServerlessMeta(serverless)
 
-	if endpoints.UseHTTP {
+	if endpoints.UseGRPC {
+		senderImpl = grpcsender.NewSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext)
+	} else if endpoints.UseHTTP {
 		senderImpl = httpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, serverlessMeta, legacyMode)
 	} else {
 		senderImpl = tcpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, status, serverlessMeta, legacyMode)
diff --git a/pkg/logs/processor/processor.go b/pkg/logs/processor/processor.go
index 6bff04fd564a..a4583b491aae 100644
--- a/pkg/logs/processor/processor.go
+++ b/pkg/logs/processor/processor.go
@@ -18,6 +18,7 @@ import (
 	"github.com/DataDog/datadog-agent/pkg/logs/diagnostic"
 	"github.com/DataDog/datadog-agent/pkg/logs/message"
 	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering"
 	"github.com/DataDog/datadog-agent/pkg/util/log"
 )
 
@@ -51,6 +52,9 @@ type Processor struct {
 	configChan                chan failoverConfig
 	failoverConfig            failoverConfig
 
+	// Pattern extraction components
+	clusterManager *clustering.ClusterManager
+
 	// Telemetry
 	pipelineMonitor metrics.PipelineMonitor
 	utilization     metrics.UtilizationMonitor
diff --git a/pkg/logs/sender/grpc/batch_strategy.go b/pkg/logs/sender/grpc/batch_strategy.go
new file mode 100644
index 000000000000..3bac15e1c61b
--- /dev/null
+++ b/pkg/logs/sender/grpc/batch_strategy.go
@@ -0,0 +1,260 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//nolint:revive // TODO(AML) Fix revive linter
+package grpc
+
+import (
+	"time"
+
+	"github.com/benbjohnson/clock"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
+	"github.com/DataDog/datadog-agent/pkg/logs/sender"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+	"github.com/DataDog/datadog-agent/pkg/telemetry"
+	"github.com/DataDog/datadog-agent/pkg/util/compression"
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+)
+
+var (
+	tlmDroppedTooLarge = telemetry.NewCounter("logs_sender_grpc_batch_strategy", "dropped_too_large", []string{"pipeline"}, "Number of payloads dropped due to being too large")
+)
+
+// StatefulExtra holds state changes (non-Log datums) from a batch
+// Used by inflight tracker to maintain snapshot state for stream rotation
+type StatefulExtra struct {
+	StateChanges []*statefulpb.Datum
+}
+
+// isStateDatum returns true if the datum represents a state change
+// (pattern/dict define/delete operations)
+func isStateDatum(datum *statefulpb.Datum) bool {
+	switch datum.Data.(type) {
+	case *statefulpb.Datum_PatternDefine, *statefulpb.Datum_PatternDelete,
+		*statefulpb.Datum_DictEntryDefine, *statefulpb.Datum_DictEntryDelete:
+		return true
+	default:
+		return false
+	}
+}
+
+// batchStrategy contains batching logic for gRPC sender without serializer
+// It collects Datum objects from StatefulMessages and creates Payload with serialized DatumSequence
+// Note: Serverless logs are not supported in this PoC implementation
+type batchStrategy struct {
+	inputChan    chan *message.StatefulMessage
+	outputChan   chan *message.Payload
+	flushChan    chan struct{}
+	buffer       *sender.MessageBuffer
+	pipelineName string
+	batchWait    time.Duration
+	compression  compression.Compressor
+	stopChan     chan struct{} // closed when the goroutine has finished
+	clock        clock.Clock
+
+	// For gRPC: store Datums separately since MessageBuffer only stores metadata
+	grpcDatums []*statefulpb.Datum
+
+	// Telemetry
+	pipelineMonitor metrics.PipelineMonitor
+	utilization     metrics.UtilizationMonitor
+	instanceID      string
+}
+
+// NewBatchStrategy returns a new gRPC batch strategy
+func NewBatchStrategy(inputChan chan *message.StatefulMessage,
+	outputChan chan *message.Payload,
+	flushChan chan struct{},
+	batchWait time.Duration,
+	maxBatchSize int,
+	maxContentSize int,
+	pipelineName string,
+	compression compression.Compressor,
+	pipelineMonitor metrics.PipelineMonitor,
+	instanceID string,
+) sender.Strategy {
+	return newBatchStrategyWithClock(inputChan, outputChan, flushChan, batchWait, maxBatchSize, maxContentSize, pipelineName, clock.New(), compression, pipelineMonitor, instanceID)
+}
+
+func newBatchStrategyWithClock(inputChan chan *message.StatefulMessage,
+	outputChan chan *message.Payload,
+	flushChan chan struct{},
+	batchWait time.Duration,
+	maxBatchSize int,
+	maxContentSize int,
+	pipelineName string,
+	clock clock.Clock,
+	compression compression.Compressor,
+	pipelineMonitor metrics.PipelineMonitor,
+	instanceID string,
+) sender.Strategy {
+
+	return &batchStrategy{
+		inputChan:       inputChan,
+		outputChan:      outputChan,
+		flushChan:       flushChan,
+		buffer:          sender.NewMessageBuffer(maxBatchSize, maxContentSize),
+		batchWait:       batchWait,
+		compression:     compression,
+		stopChan:        make(chan struct{}),
+		pipelineName:    pipelineName,
+		clock:           clock,
+		grpcDatums:      make([]*statefulpb.Datum, 0),
+		pipelineMonitor: pipelineMonitor,
+		utilization:     pipelineMonitor.MakeUtilizationMonitor(metrics.StrategyTlmName, instanceID),
+		instanceID:      instanceID,
+	}
+}
+
+// Mostly copy/pasted from sender/bactch_strategy.go
+func (s *batchStrategy) Stop() {
+	close(s.inputChan)
+	<-s.stopChan
+}
+
+// Mostly copy/pasted from sender/bactch_strategy.go
+func (s *batchStrategy) Start() {
+	go func() {
+		flushTicker := s.clock.Ticker(s.batchWait)
+		defer func() {
+			s.flushBuffer(s.outputChan)
+			flushTicker.Stop()
+			close(s.stopChan)
+		}()
+		for {
+			select {
+			case m, isOpen := <-s.inputChan:
+				if !isOpen {
+					// inputChan has been closed, no more payloads are expected
+					return
+				}
+				s.processMessage(m, s.outputChan)
+			case <-flushTicker.C:
+				// flush the payloads at a regular interval so pending messages don't wait here for too long.
+				s.flushBuffer(s.outputChan)
+			case <-s.flushChan:
+				// flush payloads on demand, used for infrequently running serverless functions
+				s.flushBuffer(s.outputChan)
+			}
+		}
+	}()
+}
+
+func (s *batchStrategy) addMessage(m *message.StatefulMessage) bool {
+	// No utilization tracking here - just trivial slice operations
+	// Real work (proto marshaling) is tracked in sendMessagesWithDatums()
+
+	// Defensive check - should never happen with proper message construction
+	if m.Datum == nil {
+		return false
+	}
+
+	// Try to add to buffer
+	if s.buffer.AddMessageWithSize(m.Metadata, m.Metadata.RawDataLen) {
+		s.grpcDatums = append(s.grpcDatums, m.Datum)
+		return true
+	}
+
+	// Buffer full (not an error)
+	return false
+}
+
+// Mostly copy/pasted from batch.go
+func (s *batchStrategy) processMessage(m *message.StatefulMessage, outputChan chan *message.Payload) {
+	// Track latency stats from metadata
+	if m.Metadata.Origin != nil {
+		m.Metadata.Origin.LogSource.LatencyStats.Add(m.Metadata.GetLatency())
+	}
+
+	added := s.addMessage(m)
+	if !added || s.buffer.IsFull() {
+		s.flushBuffer(outputChan)
+	}
+	if !added {
+		// it's possible that the m could not be added because the buffer was full
+		// so we need to retry once again
+		added = s.addMessage(m)
+		if !added {
+			log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, m.Metadata.RawDataLen, s.buffer.ContentSizeLimit())
+			tlmDroppedTooLarge.Inc(s.pipelineName)
+		}
+	}
+}
+
+// flushBuffer sends all the messages that are stored in the buffer and forwards them
+// to the next stage of the pipeline.
+func (s *batchStrategy) flushBuffer(outputChan chan *message.Payload) {
+	if s.buffer.IsEmpty() {
+		return
+	}
+
+	s.utilization.Start()
+
+	messagesMetadata := s.buffer.GetMessages()
+	s.buffer.Clear()
+
+	// Use the collected Datums and clear them
+	grpcDatums := s.grpcDatums
+	s.grpcDatums = make([]*statefulpb.Datum, 0)
+
+	s.sendMessagesWithDatums(messagesMetadata, grpcDatums, outputChan)
+}
+
+func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.MessageMetadata, grpcDatums []*statefulpb.Datum, outputChan chan *message.Payload) {
+	defer s.utilization.Stop()
+
+	unencodedSize := 0
+	for _, msgMeta := range messagesMetadata {
+		unencodedSize += msgMeta.RawDataLen
+	}
+
+	// Extract all state changes from this batch for snapshot management
+	var stateChanges []*statefulpb.Datum
+	for _, datum := range grpcDatums {
+		if isStateDatum(datum) {
+			stateChanges = append(stateChanges, datum)
+		}
+	}
+
+	// Create DatumSequence and marshal to bytes
+	datumSeq := &statefulpb.DatumSequence{
+		Data: grpcDatums,
+	}
+
+	serialized, err := proto.Marshal(datumSeq)
+	if err != nil {
+		log.Errorf("Failed to marshal DatumSequence: %v", err)
+		return
+	}
+
+	// Compress the serialized protobuf data
+	compressed, err := s.compression.Compress(serialized)
+	if err != nil {
+		log.Errorf("Failed to compress DatumSequence: %v", err)
+		return
+	}
+
+	// Create payload with compressed data
+	p := &message.Payload{
+		MessageMetas:  messagesMetadata,
+		Encoded:       compressed,
+		Encoding:      s.compression.ContentEncoding(),
+		UnencodedSize: unencodedSize,
+	}
+
+	// Store batch-level state changes in payload
+	if len(stateChanges) > 0 {
+		p.StatefulExtra = &StatefulExtra{
+			StateChanges: stateChanges,
+		}
+	}
+
+	outputChan <- p
+	s.pipelineMonitor.ReportComponentEgress(p, metrics.StrategyTlmName, s.instanceID)
+	s.pipelineMonitor.ReportComponentIngress(p, metrics.SenderTlmName, metrics.SenderTlmInstanceID)
+}
diff --git a/pkg/logs/sender/grpc/batch_strategy_test.go b/pkg/logs/sender/grpc/batch_strategy_test.go
new file mode 100644
index 000000000000..4a88d2deb216
--- /dev/null
+++ b/pkg/logs/sender/grpc/batch_strategy_test.go
@@ -0,0 +1,654 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build test
+
+package grpc
+
+import (
+	"testing"
+	"time"
+
+	"github.com/benbjohnson/clock"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/proto"
+
+	compressionfx "github.com/DataDog/datadog-agent/comp/serializer/logscompression/fx-mock"
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+	"github.com/DataDog/datadog-agent/pkg/util/compression"
+)
+
+// Helper to create test StatefulMessage with Datum
+func createTestStatefulMessage(content string) *message.StatefulMessage {
+	msg := message.NewMessage([]byte(content), nil, "", 0)
+	msg.MessageMetadata.RawDataLen = len(content)
+
+	datum := &statefulpb.Datum{
+		Data: &statefulpb.Datum_Logs{
+			Logs: &statefulpb.Log{
+				Timestamp: 12345,
+				Content: &statefulpb.Log_Raw{
+					Raw: content,
+				},
+			},
+		},
+	}
+
+	return &message.StatefulMessage{
+		Metadata: &msg.MessageMetadata,
+		Datum:    datum,
+	}
+}
+
+func TestBatchStrategySendsPayloadWhenBufferIsFull(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+
+	s := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		100*time.Millisecond,
+		2, // maxBatchSize
+		1000,
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	s.Start()
+
+	message1 := createTestStatefulMessage("a")
+	input <- message1
+
+	message2 := createTestStatefulMessage("b")
+	input <- message2
+
+	// Expect payload to be sent because buffer is full
+	payload := <-output
+	assert.Equal(t, 2, len(payload.MessageMetas))
+	assert.Equal(t, message1.Metadata, payload.MessageMetas[0])
+	assert.Equal(t, message2.Metadata, payload.MessageMetas[1])
+	assert.Equal(t, "identity", payload.Encoding)
+	assert.Equal(t, 2, payload.UnencodedSize)
+
+	// Verify the payload contains valid DatumSequence
+	var datumSeq statefulpb.DatumSequence
+	err := proto.Unmarshal(payload.Encoded, &datumSeq)
+	require.NoError(t, err)
+	assert.Equal(t, 2, len(datumSeq.Data))
+	assert.Equal(t, "a", datumSeq.Data[0].GetLogs().GetRaw())
+	assert.Equal(t, "b", datumSeq.Data[1].GetLogs().GetRaw())
+
+	s.Stop()
+
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+}
+
+func TestBatchStrategySendsPayloadWhenBufferIsOutdated(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+	timerInterval := 100 * time.Millisecond
+
+	clk := clock.NewMock()
+	s := newBatchStrategyWithClock(
+		input,
+		output,
+		flushChan,
+		timerInterval,
+		100, // maxBatchSize
+		1000,
+		"test",
+		clk,
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	s.Start()
+
+	for round := 0; round < 3; round++ {
+		m := createTestStatefulMessage("test")
+		input <- m
+
+		// It should flush in this time
+		clk.Add(2 * timerInterval)
+
+		payload := <-output
+		assert.EqualValues(t, m.Metadata, payload.MessageMetas[0])
+
+		// Verify payload contains valid DatumSequence
+		var datumSeq statefulpb.DatumSequence
+		err := proto.Unmarshal(payload.Encoded, &datumSeq)
+		require.NoError(t, err)
+		assert.Equal(t, 1, len(datumSeq.Data))
+	}
+
+	s.Stop()
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+}
+
+func TestBatchStrategySendsPayloadWhenClosingInput(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+
+	clk := clock.NewMock()
+	s := newBatchStrategyWithClock(
+		input,
+		output,
+		flushChan,
+		100*time.Millisecond,
+		2,
+		1000,
+		"test",
+		clk,
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	s.Start()
+
+	message := createTestStatefulMessage("test")
+	input <- message
+
+	go func() {
+		s.Stop()
+	}()
+
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+
+	// Expect payload to be sent before timer, so we never advance the clock; if this
+	// doesn't work, the test will hang
+	payload := <-output
+	assert.Equal(t, message.Metadata, payload.MessageMetas[0])
+}
+
+func TestBatchStrategyShouldNotBlockWhenStoppingGracefully(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+
+	s := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		100*time.Millisecond,
+		2,
+		1000,
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	s.Start()
+
+	message := createTestStatefulMessage("test")
+	input <- message
+
+	go func() {
+		s.Stop()
+	}()
+
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+
+	payload := <-output
+	assert.Equal(t, message.Metadata, payload.MessageMetas[0])
+}
+
+func TestBatchStrategySynchronousFlush(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+
+	// Batch size is large so it will not flush until we trigger it manually
+	// Flush time is large so it won't automatically trigger during this test
+	strategy := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		time.Hour,
+		100,
+		10000,
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// All of these messages will get buffered
+	messages := []*message.StatefulMessage{
+		createTestStatefulMessage("a"),
+		createTestStatefulMessage("b"),
+		createTestStatefulMessage("c"),
+	}
+
+	messageMeta := make([]*message.MessageMetadata, len(messages))
+	for idx, m := range messages {
+		input <- m
+		messageMeta[idx] = m.Metadata
+	}
+
+	// Since the batch size is large there should be nothing on the output yet
+	select {
+	case <-output:
+		assert.Fail(t, "there should be nothing on the output channel yet")
+	default:
+	}
+
+	go func() {
+		// Stop triggers the flush and make sure we can read the messages out now
+		strategy.Stop()
+	}()
+
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+
+	payload := <-output
+	assert.ElementsMatch(t, messageMeta, payload.MessageMetas)
+
+	select {
+	case <-output:
+		assert.Fail(t, "the output channel should still be empty")
+	default:
+	}
+}
+
+func TestBatchStrategyFlushChannel(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload)
+	flushChan := make(chan struct{})
+
+	// Batch size is large so it will not flush until we trigger it manually
+	// Flush time is large so it won't automatically trigger during this test
+	strategy := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		time.Hour,
+		100,
+		10000,
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// All of these messages will get buffered
+	messages := []*message.StatefulMessage{
+		createTestStatefulMessage("a"),
+		createTestStatefulMessage("b"),
+		createTestStatefulMessage("c"),
+	}
+	messageMeta := make([]*message.MessageMetadata, len(messages))
+	for idx, m := range messages {
+		input <- m
+		messageMeta[idx] = m.Metadata
+	}
+
+	// Since the batch size is large there should be nothing on the output yet
+	select {
+	case <-output:
+		assert.Fail(t, "there should be nothing on the output channel yet")
+	default:
+	}
+
+	// Trigger a manual flush
+	flushChan <- struct{}{}
+
+	payload := <-output
+	assert.ElementsMatch(t, messageMeta, payload.MessageMetas)
+
+	// Ensure we read all of the messages
+	select {
+	case <-output:
+		assert.Fail(t, "the output channel should still be empty")
+	default:
+	}
+
+	// End the test strategy
+	go func() {
+		// Stop triggers the flush and make sure we can read the messages out now
+		strategy.Stop()
+	}()
+
+	if _, isOpen := <-input; isOpen {
+		assert.Fail(t, "input should be closed")
+	}
+}
+
+func TestBatchStrategyMessageTooLarge(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload, 10) // Buffered to prevent deadlock
+	flushChan := make(chan struct{})
+
+	strategy := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		time.Hour,
+		100,
+		10, // Small content size limit
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// Send a message that fits
+	normalMessage := createTestStatefulMessage("small")
+	input <- normalMessage
+
+	// Send a message that's too large (will be dropped)
+	largeMessage := createTestStatefulMessage("this message is way too large for the content size limit")
+	input <- largeMessage
+
+	// Trigger flush
+	flushChan <- struct{}{}
+
+	// Should only receive the normal message
+	payload := <-output
+	assert.Equal(t, 1, len(payload.MessageMetas))
+	assert.Equal(t, normalMessage.Metadata, payload.MessageMetas[0])
+
+	// Verify no more payloads
+	select {
+	case <-output:
+		assert.Fail(t, "should not receive more payloads")
+	default:
+	}
+
+	strategy.Stop()
+}
+
+func TestBatchStrategyInvalidDatum(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload, 10) // Buffered to prevent deadlock
+	flushChan := make(chan struct{})
+
+	strategy := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		time.Hour,
+		100,
+		1000,
+		"test",
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// Send message with nil Datum
+	msg1 := message.NewMessage([]byte("test"), nil, "", 0)
+	invalidMsg1 := &message.StatefulMessage{
+		Metadata: &msg1.MessageMetadata,
+		Datum:    nil,
+	}
+	input <- invalidMsg1
+
+	// Note: With strongly-typed Datum field, wrong type is prevented at compile time
+
+	// Send a valid message
+	validMsg := createTestStatefulMessage("valid")
+	input <- validMsg
+
+	// Trigger flush
+	flushChan <- struct{}{}
+
+	// Should only receive the valid message
+	payload := <-output
+	assert.Equal(t, 1, len(payload.MessageMetas))
+	assert.Equal(t, validMsg.Metadata, payload.MessageMetas[0])
+
+	strategy.Stop()
+}
+
+func TestBatchStrategyCompression(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload, 10) // Buffered to prevent deadlock
+	flushChan := make(chan struct{})
+
+	// Use identity (no-op) compression for simplicity
+	// Testing actual compression behavior is covered by the compression package tests
+	compressor := compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1)
+
+	strategy := NewBatchStrategy(
+		input,
+		output,
+		flushChan,
+		time.Hour,
+		100,
+		10000,
+		"test",
+		compressor,
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// Send several messages
+	for i := 0; i < 5; i++ {
+		msg := createTestStatefulMessage("test message")
+		input <- msg
+	}
+
+	// Trigger flush
+	flushChan <- struct{}{}
+
+	payload := <-output
+	assert.Equal(t, 5, len(payload.MessageMetas))
+	assert.Equal(t, "identity", payload.Encoding)
+	assert.NotEmpty(t, payload.Encoded)
+
+	// Verify the payload contains valid DatumSequence (identity compression = no compression)
+	var datumSeq statefulpb.DatumSequence
+	err := proto.Unmarshal(payload.Encoded, &datumSeq)
+	require.NoError(t, err)
+	assert.Equal(t, 5, len(datumSeq.Data))
+	for _, datum := range datumSeq.Data {
+		assert.Equal(t, "test message", datum.GetLogs().GetRaw())
+	}
+
+	strategy.Stop()
+}
+
+// TestBatchStrategyStatefulExtra tests that state changes are correctly tracked in StatefulExtra
+func TestBatchStrategyStatefulExtra(t *testing.T) {
+	input := make(chan *message.StatefulMessage)
+	output := make(chan *message.Payload, 10) // Buffered to prevent blocking
+	flushChan := make(chan struct{})
+	timerInterval := 100 * time.Millisecond
+
+	clk := clock.NewMock()
+	strategy := newBatchStrategyWithClock(
+		input,
+		output,
+		flushChan,
+		timerInterval,
+		10, // maxBatchSize - large enough to not trigger size-based flush
+		1000,
+		"test",
+		clk,
+		compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1),
+		metrics.NewNoopPipelineMonitor(""),
+		"test")
+	strategy.Start()
+
+	// Helper to create state change messages
+	createPatternDefineMsg := func(id uint64, template string) *message.StatefulMessage {
+		msg := message.NewMessage([]byte(""), nil, "", 0)
+		msg.MessageMetadata.RawDataLen = 0
+		return &message.StatefulMessage{
+			Metadata: &msg.MessageMetadata,
+			Datum: &statefulpb.Datum{
+				Data: &statefulpb.Datum_PatternDefine{
+					PatternDefine: &statefulpb.PatternDefine{
+						PatternId: id,
+						Template:  template,
+					},
+				},
+			},
+		}
+	}
+
+	createDictEntryDefineMsg := func(id uint64, value string) *message.StatefulMessage {
+		msg := message.NewMessage([]byte(""), nil, "", 0)
+		msg.MessageMetadata.RawDataLen = 0
+		return &message.StatefulMessage{
+			Metadata: &msg.MessageMetadata,
+			Datum: &statefulpb.Datum{
+				Data: &statefulpb.Datum_DictEntryDefine{
+					DictEntryDefine: &statefulpb.DictEntryDefine{
+						Id:    id,
+						Value: value,
+					},
+				},
+			},
+		}
+	}
+
+	createPatternDeleteMsg := func(id uint64) *message.StatefulMessage {
+		msg := message.NewMessage([]byte(""), nil, "", 0)
+		msg.MessageMetadata.RawDataLen = 0
+		return &message.StatefulMessage{
+			Metadata: &msg.MessageMetadata,
+			Datum: &statefulpb.Datum{
+				Data: &statefulpb.Datum_PatternDelete{
+					PatternDelete: &statefulpb.PatternDelete{
+						PatternId: id,
+					},
+				},
+			},
+		}
+	}
+
+	createDictEntryDeleteMsg := func(id uint64) *message.StatefulMessage {
+		msg := message.NewMessage([]byte(""), nil, "", 0)
+		msg.MessageMetadata.RawDataLen = 0
+		return &message.StatefulMessage{
+			Metadata: &msg.MessageMetadata,
+			Datum: &statefulpb.Datum{
+				Data: &statefulpb.Datum_DictEntryDelete{
+					DictEntryDelete: &statefulpb.DictEntryDelete{
+						Id: id,
+					},
+				},
+			},
+		}
+	}
+
+	createLogMsg := func(content string) *message.StatefulMessage {
+		msg := message.NewMessage([]byte(content), nil, "", 0)
+		msg.MessageMetadata.RawDataLen = len(content)
+		return &message.StatefulMessage{
+			Metadata: &msg.MessageMetadata,
+			Datum: &statefulpb.Datum{
+				Data: &statefulpb.Datum_Logs{
+					Logs: &statefulpb.Log{
+						Timestamp: 12345,
+						Content: &statefulpb.Log_Raw{
+							Raw: content,
+						},
+					},
+				},
+			},
+		}
+	}
+
+	// Send all 14 events in sequence
+	// Batch 1 (5 entries): add p1, add d1, log, add p2, add d2
+	input <- createPatternDefineMsg(1, "pattern1")
+	input <- createDictEntryDefineMsg(1, "value1")
+	input <- createLogMsg("log with p1/d1")
+	input <- createPatternDefineMsg(2, "pattern2")
+	input <- createDictEntryDefineMsg(2, "value2")
+
+	// Advance clock to trigger timer-based flush for batch 1
+	clk.Add(2 * timerInterval)
+
+	// Receive and verify Batch 1
+	payload1 := <-output
+	require.Equal(t, 5, len(payload1.MessageMetas), "Batch 1 should have 5 messages")
+
+	// Verify StatefulExtra for Batch 1
+	require.NotNil(t, payload1.StatefulExtra, "Batch 1 should have StatefulExtra")
+	extra1, ok := payload1.StatefulExtra.(*StatefulExtra)
+	require.True(t, ok, "StatefulExtra should be of type *StatefulExtra")
+	require.Equal(t, 4, len(extra1.StateChanges), "Batch 1 should have 4 state changes")
+
+	// Check specific state changes in Batch 1
+	assert.Equal(t, uint64(1), extra1.StateChanges[0].GetPatternDefine().PatternId)
+	assert.Equal(t, "pattern1", extra1.StateChanges[0].GetPatternDefine().Template)
+	assert.Equal(t, uint64(1), extra1.StateChanges[1].GetDictEntryDefine().Id)
+	assert.Equal(t, "value1", extra1.StateChanges[1].GetDictEntryDefine().Value)
+	assert.Equal(t, uint64(2), extra1.StateChanges[2].GetPatternDefine().PatternId)
+	assert.Equal(t, "pattern2", extra1.StateChanges[2].GetPatternDefine().Template)
+	assert.Equal(t, uint64(2), extra1.StateChanges[3].GetDictEntryDefine().Id)
+	assert.Equal(t, "value2", extra1.StateChanges[3].GetDictEntryDefine().Value)
+
+	// Batch 2 (6 entries): log, del p1, del d1, add p3, add d3, log
+	input <- createLogMsg("log with p2/d2")
+	input <- createPatternDeleteMsg(1)
+	input <- createDictEntryDeleteMsg(1)
+	input <- createPatternDefineMsg(3, "pattern3")
+	input <- createDictEntryDefineMsg(3, "value3")
+	input <- createLogMsg("log with p3/d3")
+
+	// Advance clock to trigger timer-based flush for batch 2
+	clk.Add(2 * timerInterval)
+
+	// Receive and verify Batch 2
+	payload2 := <-output
+	require.Equal(t, 6, len(payload2.MessageMetas), "Batch 2 should have 6 messages")
+
+	// Verify StatefulExtra for Batch 2
+	require.NotNil(t, payload2.StatefulExtra, "Batch 2 should have StatefulExtra")
+	extra2, ok := payload2.StatefulExtra.(*StatefulExtra)
+	require.True(t, ok, "StatefulExtra should be of type *StatefulExtra")
+	require.Equal(t, 4, len(extra2.StateChanges), "Batch 2 should have 4 state changes")
+
+	// Check specific state changes in Batch 2
+	assert.Equal(t, uint64(1), extra2.StateChanges[0].GetPatternDelete().PatternId)
+	assert.Equal(t, uint64(1), extra2.StateChanges[1].GetDictEntryDelete().Id)
+	assert.Equal(t, uint64(3), extra2.StateChanges[2].GetPatternDefine().PatternId)
+	assert.Equal(t, "pattern3", extra2.StateChanges[2].GetPatternDefine().Template)
+	assert.Equal(t, uint64(3), extra2.StateChanges[3].GetDictEntryDefine().Id)
+	assert.Equal(t, "value3", extra2.StateChanges[3].GetDictEntryDefine().Value)
+
+	// Batch 3 (3 entries): add p4, add d4, log
+	input <- createPatternDefineMsg(4, "pattern4")
+	input <- createDictEntryDefineMsg(4, "value4")
+	input <- createLogMsg("log with p4/d4")
+
+	// Advance clock to trigger timer-based flush for batch 3
+	clk.Add(2 * timerInterval)
+
+	// Receive and verify Batch 3
+	payload3 := <-output
+	require.Equal(t, 3, len(payload3.MessageMetas), "Batch 3 should have 3 messages")
+
+	// Verify StatefulExtra for Batch 3
+	require.NotNil(t, payload3.StatefulExtra, "Batch 3 should have StatefulExtra")
+	extra3, ok := payload3.StatefulExtra.(*StatefulExtra)
+	require.True(t, ok, "StatefulExtra should be of type *StatefulExtra")
+	require.Equal(t, 2, len(extra3.StateChanges), "Batch 3 should have 2 state changes")
+
+	// Check specific state changes in Batch 3
+	assert.Equal(t, uint64(4), extra3.StateChanges[0].GetPatternDefine().PatternId)
+	assert.Equal(t, "pattern4", extra3.StateChanges[0].GetPatternDefine().Template)
+	assert.Equal(t, uint64(4), extra3.StateChanges[1].GetDictEntryDefine().Id)
+	assert.Equal(t, "value4", extra3.StateChanges[1].GetDictEntryDefine().Value)
+
+	strategy.Stop()
+}
diff --git a/pkg/logs/sender/grpc/inflight.go b/pkg/logs/sender/grpc/inflight.go
new file mode 100644
index 000000000000..07437f5d99b6
--- /dev/null
+++ b/pkg/logs/sender/grpc/inflight.go
@@ -0,0 +1,236 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package grpc
+
+import (
+	"google.golang.org/protobuf/proto"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+)
+
+// inflightTracker is a bounded FIFO queue that tracks payloads in two regions:
+// 1. Sent but awaiting acknowledgment (head to sentTail)
+// 2. Buffered but not yet sent to the network (sentTail to tail)
+//
+// Queue Layout:
+// [--sent awaiting ack--][--buffered not sent--]
+// ^                      ^                      ^
+// head                 sentTail                 tail
+//
+// BatchID tracking:
+// - Sent payloads have sequential batchIDs: [headBatchID, headBatchID+1, ..., headBatchID+sentSize-1]
+// - Only tracks headBatchID (oldest sent) and nextBatchID (next to be assigned)
+//
+// Snapshot State:
+// - Maintains accumulated state changes for stream bootstrapping
+// - Represents the state "before" the first payload in the queue
+// - Updated when payloads are acknowledged (popped)
+type inflightTracker struct {
+	items          []*message.Payload
+	head           int            // Index of the oldest sent item (awaiting ack)
+	sentTail       int            // Index of the first buffered item that's not yet sent
+	tail           int            // Index of the next available slot for new buffered items
+	cap            int            // Maximum total capacity of the tracker
+	headBatchID    uint32         // BatchID of the oldest sent payload (at head)
+	batchIDCounter uint32         // Next batchID to be assigned when markSent is called
+	snapshot       *snapshotState // Accumulated state for new streams
+}
+
+// newInflightTracker creates a new bounded inflight tracker with the given capacity
+// Allocates capacity+1 slots to implement the "waste one slot" ring buffer pattern
+func newInflightTracker(capacity int) *inflightTracker {
+	return &inflightTracker{
+		items:    make([]*message.Payload, capacity+1),
+		cap:      capacity,
+		snapshot: newSnapshotState(),
+	}
+}
+
+// hasSpace returns true if there is at least one free slot
+func (t *inflightTracker) hasSpace() bool {
+	return t.totalCount() < t.cap
+}
+
+// append adds a new payload to the buffered region (not yet sent)
+// Returns true if the payload was added, false if the tracker is full
+func (t *inflightTracker) append(payload *message.Payload) bool {
+	if !t.hasSpace() {
+		return false
+	}
+	t.items[t.tail] = payload
+	t.tail = (t.tail + 1) % len(t.items)
+	return true
+}
+
+// pop removes and returns the oldest sent payload (at head) after receiving an ack
+// Returns nil if there are no sent payloads
+// Also applies any state changes from the payload to the snapshot state
+func (t *inflightTracker) pop() *message.Payload {
+	if t.head == t.sentTail {
+		return nil
+	}
+	payload := t.items[t.head]
+	t.items[t.head] = nil // Allow GC
+	t.head = (t.head + 1) % len(t.items)
+
+	// Apply state changes from this payload to snapshot
+	if payload.StatefulExtra != nil {
+		if extra, ok := payload.StatefulExtra.(*StatefulExtra); ok {
+			t.snapshot.apply(extra)
+		}
+	}
+
+	// Advance headBatchID for the next payload
+	if t.head != t.sentTail {
+		t.headBatchID++
+	}
+
+	return payload
+}
+
+// hasUnacked returns true if there are sent payloads awaiting acknowledgment
+func (t *inflightTracker) hasUnacked() bool {
+	return t.head != t.sentTail
+}
+
+// hasUnSent returns true if there are buffered payloads not yet sent
+func (t *inflightTracker) hasUnSent() bool {
+	return t.sentTail != t.tail
+}
+
+// getHeadBatchID returns the expected batchID at the head (oldest sent payload)
+// Caller must check hasUnacked() first to ensure there are sent payloads
+func (t *inflightTracker) getHeadBatchID() uint32 {
+	return t.headBatchID
+}
+
+// nextBatchID returns the batchID that will be assigned to the next sent item
+// This is a peek operation (idempotent, no mutation)
+func (t *inflightTracker) nextBatchID() uint32 {
+	return t.batchIDCounter
+}
+
+// markSent moves a buffered payload to the sent region and assigns it a batchID
+// Returns true if successful, false if there are no buffered payloads
+func (t *inflightTracker) markSent() bool {
+	if t.sentTail == t.tail {
+		return false
+	}
+
+	// If this is the first sent item, set headBatchID
+	if t.head == t.sentTail {
+		t.headBatchID = t.batchIDCounter
+	}
+
+	t.sentTail = (t.sentTail + 1) % len(t.items)
+	t.batchIDCounter++ // Increment counter for next batch
+	return true
+}
+
+// nextToSend returns the next buffered payload ready to be sent (without removing it)
+// Returns nil if there are no buffered payloads
+func (t *inflightTracker) nextToSend() *message.Payload {
+	if t.sentTail == t.tail {
+		return nil
+	}
+	return t.items[t.sentTail]
+}
+
+// sentCount returns the number of sent payloads awaiting ack
+func (t *inflightTracker) sentCount() int {
+	return (t.sentTail - t.head + len(t.items)) % len(t.items)
+}
+
+// totalCount returns the total number of tracked payloads
+func (t *inflightTracker) totalCount() int {
+	return (t.tail - t.head + len(t.items)) % len(t.items)
+}
+
+// resetOnRotation set any un-acked payload as un-sent and reset the batchID.
+func (t *inflightTracker) resetOnRotation() {
+	// Move all sent items back to buffered region by resetting sentTail to head
+	// This makes all items [head, tail) buffered again
+	t.sentTail = t.head
+
+	// Reset batchID counter for the new stream
+	// Make the first batchID be 1, 0 is reserved for the snapshot state
+	t.headBatchID = 1
+	t.batchIDCounter = 1
+}
+
+// getSnapshot returns the current snapshot state for stream bootstrapping
+// Returns serialized bytes (marshaled DatumSequence) or nil if empty
+func (t *inflightTracker) getSnapshot() []byte {
+	return t.snapshot.serialize()
+}
+
+// snapshotState maintains the accumulated state changes for stream bootstrapping
+// It represents the state "before" the first payload in the inflight queue
+type snapshotState struct {
+	dictMap    map[uint64]*statefulpb.DictEntryDefine
+	patternMap map[uint64]*statefulpb.PatternDefine
+}
+
+// newSnapshotState creates a new empty snapshot state
+func newSnapshotState() *snapshotState {
+	return &snapshotState{
+		dictMap:    make(map[uint64]*statefulpb.DictEntryDefine),
+		patternMap: make(map[uint64]*statefulpb.PatternDefine),
+	}
+}
+
+// apply updates the snapshot state by processing state changes from a payload
+func (s *snapshotState) apply(extra *StatefulExtra) {
+	if extra == nil {
+		return
+	}
+
+	for _, datum := range extra.StateChanges {
+		switch d := datum.Data.(type) {
+		case *statefulpb.Datum_PatternDefine:
+			s.patternMap[d.PatternDefine.PatternId] = d.PatternDefine
+		case *statefulpb.Datum_PatternDelete:
+			delete(s.patternMap, d.PatternDelete.PatternId)
+		case *statefulpb.Datum_DictEntryDefine:
+			s.dictMap[d.DictEntryDefine.Id] = d.DictEntryDefine
+		case *statefulpb.Datum_DictEntryDelete:
+			delete(s.dictMap, d.DictEntryDelete.Id)
+		}
+	}
+}
+
+// serialize returns the current snapshot state as serialized bytes
+// Returns the marshaled DatumSequence containing all pattern and dictionary definitions
+// Used to send snapshot on new stream creation
+func (s *snapshotState) serialize() []byte {
+	// Calculate total datums needed
+	totalSize := len(s.patternMap) + len(s.dictMap)
+
+	if totalSize == 0 {
+		return nil
+	}
+
+	datums := make([]*statefulpb.Datum, 0, totalSize)
+
+	for _, pattern := range s.patternMap {
+		datums = append(datums, &statefulpb.Datum{
+			Data: &statefulpb.Datum_PatternDefine{PatternDefine: pattern},
+		})
+	}
+	for _, entry := range s.dictMap {
+		datums = append(datums, &statefulpb.Datum{
+			Data: &statefulpb.Datum_DictEntryDefine{DictEntryDefine: entry},
+		})
+	}
+
+	datumSeq := &statefulpb.DatumSequence{
+		Data: datums,
+	}
+
+	serialized, _ := proto.Marshal(datumSeq)
+	return serialized
+}
diff --git a/pkg/logs/sender/grpc/inflight_test.go b/pkg/logs/sender/grpc/inflight_test.go
new file mode 100644
index 000000000000..653db10b44c7
--- /dev/null
+++ b/pkg/logs/sender/grpc/inflight_test.go
@@ -0,0 +1,482 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package grpc
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+)
+
+// Helper function to create test payloads
+func createTestPayload(content string) *message.Payload {
+	return &message.Payload{
+		Encoded: []byte(content),
+	}
+}
+
+func TestNewInflightTracker(t *testing.T) {
+	tracker := newInflightTracker(10)
+
+	assert.NotNil(t, tracker)
+	assert.Equal(t, 10, tracker.cap)
+	assert.Equal(t, 0, tracker.head)
+	assert.Equal(t, 0, tracker.sentTail)
+	assert.Equal(t, 0, tracker.tail)
+	assert.Equal(t, uint32(0), tracker.headBatchID)
+	assert.Equal(t, uint32(0), tracker.batchIDCounter)
+	assert.True(t, tracker.hasSpace())
+	assert.False(t, tracker.hasUnacked())
+	assert.False(t, tracker.hasUnSent())
+}
+
+func TestInflightTrackerAppend(t *testing.T) {
+	tracker := newInflightTracker(10)
+
+	// Append first payload
+	payload1 := createTestPayload("test1")
+	assert.True(t, tracker.append(payload1))
+	assert.Equal(t, 1, tracker.totalCount())
+	assert.True(t, tracker.hasUnSent())
+	assert.False(t, tracker.hasUnacked())
+
+	// Append second payload
+	payload2 := createTestPayload("test2")
+	assert.True(t, tracker.append(payload2))
+	assert.Equal(t, 2, tracker.totalCount())
+	assert.True(t, tracker.hasSpace())
+
+	// Append third payload
+	payload3 := createTestPayload("test3")
+	assert.True(t, tracker.append(payload3))
+	assert.Equal(t, 3, tracker.totalCount())
+}
+
+func TestInflightTrackerAppendWhenFull(t *testing.T) {
+	// Test filling buffer to absolute capacity from empty state
+	tracker := newInflightTracker(3)
+
+	// Fill to capacity (3 items)
+	assert.True(t, tracker.append(createTestPayload("test1")))
+	assert.Equal(t, 1, tracker.totalCount())
+	assert.True(t, tracker.hasSpace())
+
+	assert.True(t, tracker.append(createTestPayload("test2")))
+	assert.Equal(t, 2, tracker.totalCount())
+	assert.True(t, tracker.hasSpace())
+
+	assert.True(t, tracker.append(createTestPayload("test3")))
+	assert.Equal(t, 3, tracker.totalCount())
+	assert.False(t, tracker.hasSpace())
+
+	// Append should fail when full
+	assert.False(t, tracker.append(createTestPayload("test4")))
+	assert.Equal(t, 3, tracker.totalCount())
+}
+
+func TestInflightTrackerMarkSent(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Add buffered payloads
+	payload1 := createTestPayload("test1")
+	payload2 := createTestPayload("test2")
+	tracker.append(payload1)
+	tracker.append(payload2)
+
+	assert.Equal(t, 0, tracker.sentCount())
+	assert.True(t, tracker.hasUnSent())
+	assert.False(t, tracker.hasUnacked())
+
+	// Mark first as sent
+	assert.True(t, tracker.markSent())
+	assert.Equal(t, 1, tracker.sentCount())
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(1), tracker.nextBatchID())
+	assert.True(t, tracker.hasUnacked())
+	assert.True(t, tracker.hasUnSent())
+
+	// Mark second as sent
+	assert.True(t, tracker.markSent())
+	assert.Equal(t, 2, tracker.sentCount())
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(2), tracker.nextBatchID())
+	assert.True(t, tracker.hasUnacked())
+	assert.False(t, tracker.hasUnSent())
+
+	// Try to mark sent when no buffered items
+	assert.False(t, tracker.markSent())
+}
+
+func TestInflightTrackerPop(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Add and mark payloads as sent
+	payload1 := createTestPayload("test1")
+	payload2 := createTestPayload("test2")
+	tracker.append(payload1)
+	tracker.append(payload2)
+	tracker.markSent()
+	tracker.markSent()
+
+	assert.Equal(t, 2, tracker.sentCount())
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+
+	// Pop first payload
+	popped1 := tracker.pop()
+	assert.Equal(t, payload1, popped1)
+	assert.Equal(t, 1, tracker.sentCount())
+	assert.Equal(t, uint32(1), tracker.getHeadBatchID())
+	assert.True(t, tracker.hasUnacked())
+
+	// Pop second payload
+	popped2 := tracker.pop()
+	assert.Equal(t, payload2, popped2)
+	assert.Equal(t, 0, tracker.sentCount())
+	assert.False(t, tracker.hasUnacked())
+
+	// Pop when empty should return nil
+	poppedNil := tracker.pop()
+	assert.Nil(t, poppedNil)
+}
+
+func TestInflightTrackerNextToSend(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// NextToSend on empty tracker should return nil
+	assert.Nil(t, tracker.nextToSend())
+
+	// Add buffered payloads
+	payload1 := createTestPayload("test1")
+	payload2 := createTestPayload("test2")
+	tracker.append(payload1)
+	tracker.append(payload2)
+
+	// NextToSend should return first buffered payload
+	next := tracker.nextToSend()
+	assert.Equal(t, payload1, next)
+
+	// Mark first as sent
+	tracker.markSent()
+
+	// NextToSend should return second buffered payload
+	next = tracker.nextToSend()
+	assert.Equal(t, payload2, next)
+
+	// Mark second as sent
+	tracker.markSent()
+
+	// NextToSend should return nil when no buffered payloads
+	next = tracker.nextToSend()
+	assert.Nil(t, next)
+}
+
+func TestInflightTrackerBatchIDSequence(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Add and send payloads
+	for i := 0; i < 3; i++ {
+		payload := createTestPayload("test")
+		tracker.append(payload)
+	}
+
+	// Initial batchIDCounter should be 0
+	assert.Equal(t, uint32(0), tracker.nextBatchID())
+
+	// Mark first as sent
+	tracker.markSent()
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(1), tracker.nextBatchID())
+
+	// Mark second as sent
+	tracker.markSent()
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(2), tracker.nextBatchID())
+
+	// Mark third as sent
+	tracker.markSent()
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(3), tracker.nextBatchID())
+
+	// Pop first - headBatchID should advance
+	tracker.pop()
+	assert.Equal(t, uint32(1), tracker.getHeadBatchID())
+
+	// Pop second - headBatchID should advance
+	tracker.pop()
+	assert.Equal(t, uint32(2), tracker.getHeadBatchID())
+}
+
+func TestInflightTrackerResetOnRotation(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Add payloads and mark some as sent
+	for i := 0; i < 3; i++ {
+		payload := createTestPayload("test")
+		tracker.append(payload)
+		tracker.markSent()
+	}
+
+	// Pop one ack
+	tracker.pop()
+
+	// State before reset: 2 sent (awaiting ack), 0 buffered
+	assert.Equal(t, 2, tracker.sentCount())
+	assert.Equal(t, 0, tracker.totalCount()-tracker.sentCount())
+	assert.Equal(t, uint32(1), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(3), tracker.nextBatchID())
+
+	// Reset on rotation
+	tracker.resetOnRotation()
+
+	// After reset: 0 sent, 2 buffered (un-acked payloads become buffered)
+	assert.Equal(t, 0, tracker.sentCount())
+	assert.Equal(t, 2, tracker.totalCount())
+	assert.True(t, tracker.hasUnSent())
+	assert.False(t, tracker.hasUnacked())
+
+	// Batch IDs should reset to 1
+	assert.Equal(t, uint32(1), tracker.headBatchID)
+	assert.Equal(t, uint32(1), tracker.nextBatchID())
+}
+
+func TestInflightTrackerWrapAround(t *testing.T) {
+	// Test wrap-around behavior without filling to absolute capacity
+	tracker := newInflightTracker(6)
+
+	// Fill and empty to advance head pointer
+	payload1 := createTestPayload("test1")
+	payload2 := createTestPayload("test2")
+
+	// Add, send, and ack first two to advance pointers
+	tracker.append(payload1)
+	tracker.markSent()
+	tracker.pop()
+
+	tracker.append(payload2)
+	tracker.markSent()
+	tracker.pop()
+
+	// Now add more items that will wrap around in the ring buffer
+	payload3 := createTestPayload("test3")
+	payload4 := createTestPayload("test4")
+	payload5 := createTestPayload("test5")
+
+	assert.True(t, tracker.append(payload3))
+	assert.True(t, tracker.append(payload4))
+	assert.True(t, tracker.append(payload5))
+
+	assert.Equal(t, 3, tracker.totalCount())
+	assert.True(t, tracker.hasSpace())
+
+	// Mark all as sent and pop them
+	tracker.markSent()
+	tracker.markSent()
+	tracker.markSent()
+
+	popped3 := tracker.pop()
+	popped4 := tracker.pop()
+	popped5 := tracker.pop()
+
+	assert.Equal(t, payload3, popped3)
+	assert.Equal(t, payload4, popped4)
+	assert.Equal(t, payload5, popped5)
+	assert.Equal(t, 0, tracker.totalCount())
+}
+
+func TestInflightTrackerSentCount(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Initially no sent items
+	assert.Equal(t, 0, tracker.sentCount())
+
+	// Add buffered payloads
+	tracker.append(createTestPayload("test1"))
+	tracker.append(createTestPayload("test2"))
+	tracker.append(createTestPayload("test3"))
+
+	assert.Equal(t, 0, tracker.sentCount())
+
+	// Mark as sent
+	tracker.markSent()
+	assert.Equal(t, 1, tracker.sentCount())
+
+	tracker.markSent()
+	assert.Equal(t, 2, tracker.sentCount())
+
+	// Pop one
+	tracker.pop()
+	assert.Equal(t, 1, tracker.sentCount())
+
+	// Mark another as sent
+	tracker.markSent()
+	assert.Equal(t, 2, tracker.sentCount())
+}
+
+func TestInflightTrackerTotalCount(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Initially empty
+	assert.Equal(t, 0, tracker.totalCount())
+
+	// Add buffered payloads
+	tracker.append(createTestPayload("test1"))
+	assert.Equal(t, 1, tracker.totalCount())
+
+	tracker.append(createTestPayload("test2"))
+	assert.Equal(t, 2, tracker.totalCount())
+
+	// Mark both as sent (doesn't change total count)
+	tracker.markSent()
+	tracker.markSent()
+	assert.Equal(t, 2, tracker.totalCount())
+
+	// Pop reduces total count
+	tracker.pop()
+	assert.Equal(t, 1, tracker.totalCount())
+
+	tracker.pop()
+	assert.Equal(t, 0, tracker.totalCount())
+}
+
+func TestInflightTrackerHasSpace(t *testing.T) {
+	tracker := newInflightTracker(10)
+
+	// Initially has space
+	assert.True(t, tracker.hasSpace())
+
+	// Add several items
+	for i := 0; i < 5; i++ {
+		tracker.append(createTestPayload("test"))
+	}
+	assert.True(t, tracker.hasSpace())
+
+	// Pop one to verify space tracking
+	tracker.markSent()
+	tracker.pop()
+	assert.True(t, tracker.hasSpace())
+}
+
+func TestInflightTrackerMixedOperations(t *testing.T) {
+	// Test a realistic sequence of operations
+	tracker := newInflightTracker(5)
+
+	// Add 3 buffered payloads
+	p1 := createTestPayload("msg1")
+	p2 := createTestPayload("msg2")
+	p3 := createTestPayload("msg3")
+
+	tracker.append(p1)
+	tracker.append(p2)
+	tracker.append(p3)
+
+	assert.Equal(t, 3, tracker.totalCount())
+	assert.Equal(t, 0, tracker.sentCount())
+
+	// Send first 2
+	tracker.markSent()
+	tracker.markSent()
+
+	assert.Equal(t, 3, tracker.totalCount())
+	assert.Equal(t, 2, tracker.sentCount())
+	assert.True(t, tracker.hasUnacked())
+	assert.True(t, tracker.hasUnSent())
+
+	// Receive ack for first
+	popped := tracker.pop()
+	assert.Equal(t, p1, popped)
+	assert.Equal(t, 2, tracker.totalCount())
+	assert.Equal(t, 1, tracker.sentCount())
+
+	// Add more payloads
+	p4 := createTestPayload("msg4")
+	p5 := createTestPayload("msg5")
+	tracker.append(p4)
+	tracker.append(p5)
+
+	assert.Equal(t, 4, tracker.totalCount())
+	assert.Equal(t, 1, tracker.sentCount())
+
+	// Send remaining buffered
+	tracker.markSent() // p3
+	tracker.markSent() // p4
+	tracker.markSent() // p5
+
+	assert.Equal(t, 4, tracker.totalCount())
+	assert.Equal(t, 4, tracker.sentCount())
+	assert.False(t, tracker.hasUnSent())
+
+	// Receive all remaining acks
+	assert.Equal(t, p2, tracker.pop())
+	assert.Equal(t, p3, tracker.pop())
+	assert.Equal(t, p4, tracker.pop())
+	assert.Equal(t, p5, tracker.pop())
+
+	assert.Equal(t, 0, tracker.totalCount())
+	assert.False(t, tracker.hasUnacked())
+}
+
+func TestInflightTrackerResetOnRotationWithBuffered(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Mix of sent and buffered payloads
+	tracker.append(createTestPayload("msg1"))
+	tracker.append(createTestPayload("msg2"))
+	tracker.append(createTestPayload("msg3"))
+	tracker.append(createTestPayload("msg4"))
+
+	// Send first two
+	tracker.markSent()
+	tracker.markSent()
+
+	// Ack first one
+	tracker.pop()
+
+	// State: 1 sent, 2 buffered, total 3
+	assert.Equal(t, 1, tracker.sentCount())
+	assert.Equal(t, 3, tracker.totalCount())
+
+	// Reset on rotation
+	tracker.resetOnRotation()
+
+	// All items should be buffered now
+	assert.Equal(t, 0, tracker.sentCount())
+	assert.Equal(t, 3, tracker.totalCount())
+	assert.True(t, tracker.hasUnSent())
+	assert.False(t, tracker.hasUnacked())
+
+	// Batch IDs reset
+	assert.Equal(t, uint32(1), tracker.nextBatchID())
+}
+
+func TestInflightTrackerBatchIDAfterRotation(t *testing.T) {
+	tracker := newInflightTracker(5)
+
+	// Add and send some payloads
+	tracker.append(createTestPayload("msg1"))
+	tracker.append(createTestPayload("msg2"))
+	tracker.markSent()
+	tracker.markSent()
+
+	assert.Equal(t, uint32(0), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(2), tracker.nextBatchID())
+
+	// Reset on rotation
+	tracker.resetOnRotation()
+
+	// Batch IDs should reset to 1 (0 is reserved for snapshot)
+	assert.Equal(t, uint32(1), tracker.nextBatchID())
+
+	// Send items with new batch IDs
+	tracker.markSent()
+	assert.Equal(t, uint32(1), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(2), tracker.nextBatchID())
+
+	tracker.markSent()
+	assert.Equal(t, uint32(1), tracker.getHeadBatchID())
+	assert.Equal(t, uint32(3), tracker.nextBatchID())
+}
diff --git a/pkg/logs/sender/grpc/mock_encoder.go b/pkg/logs/sender/grpc/mock_encoder.go
new file mode 100644
index 000000000000..00e97fa58490
--- /dev/null
+++ b/pkg/logs/sender/grpc/mock_encoder.go
@@ -0,0 +1,23 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package grpc
+
+import (
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/processor"
+)
+
+// MockEncoder is a no-op encoder for gRPC stateful streaming.
+// This is temporary scaffolding until the real State component is ready.
+// Encoding happens in StartMessageTranslator instead of the processor.
+var MockEncoder processor.Encoder = &mockEncoder{}
+
+type mockEncoder struct{}
+
+// Encode is a no-op implementation that satisfies the processor.Encoder interface
+func (g *mockEncoder) Encode(_ *message.Message, _ string) error {
+	return nil
+}
diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go
new file mode 100644
index 000000000000..c5b88e0938de
--- /dev/null
+++ b/pkg/logs/sender/grpc/mock_state.go
@@ -0,0 +1,264 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package grpc
+
+import (
+	"strings"
+	"time"
+	"unicode/utf8"
+
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering"
+	"github.com/DataDog/datadog-agent/pkg/logs/patterns/token"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+)
+
+const nanoToMillis = 1000000
+
+// MessageTranslator handles translation of message.Message to message.StatefulMessage
+// It manages pattern extraction, clustering, and stateful message creation
+type MessageTranslator struct {
+	clusterManager *clustering.ClusterManager
+}
+
+// NewMessageTranslator creates a new MessageTranslator instance
+// If clusterManager is nil, a new one will be created
+func NewMessageTranslator() *MessageTranslator {
+	return &MessageTranslator{
+		clusterManager: clustering.NewClusterManager(),
+	}
+
+	// Would be shared cluster manager instead across pipelines when implemented.
+	// if clusterManager == nil {
+	// 	clusterManager = clustering.NewClusterManager()
+	// }
+	// return &MessageTranslator{
+	// 	clusterManager: clusterManager,
+	// }
+}
+
+// Start starts a goroutine that translates message.Message to message.StatefulMessage
+// It handles pattern extraction by:
+// 1. Tokenizing the message content
+// 2. Using ClusterManager to create/update patterns
+// 3. Sending PatternDefine for new patterns, or PatternDelete+PatternDefine for updates
+// 4. Sending StructuredLog with wildcard values
+// Returns the output channel for StatefulMessages
+func (mt *MessageTranslator) Start(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage {
+	outputChan := make(chan *message.StatefulMessage, bufferSize)
+	go func() {
+		defer close(outputChan)
+
+		for msg := range inputChan {
+			mt.processMessage(msg, outputChan)
+		}
+	}()
+	return outputChan
+}
+
+// StartMessageTranslator is a convenience function that creates a MessageTranslator with a cluster manager
+// Returns the output channel for StatefulMessages
+func StartMessageTranslator(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage {
+	// Use a shared cluster manager for all pipelines (patterns shared across pipelines)
+	translator := NewMessageTranslator()
+	return translator.Start(inputChan, bufferSize)
+}
+
+// processMessage handles a single message: tokenizes, creates patterns, and sends appropriate datums
+func (mt *MessageTranslator) processMessage(msg *message.Message, outputChan chan *message.StatefulMessage) {
+	var patternDefineSent bool
+	var patternDefineParamCount uint32
+
+	ts := getMessageTimestamp(msg)
+
+	// Get message content
+	content := msg.GetContent()
+	if len(content) == 0 {
+		return
+	}
+
+	// Tokenize the message content
+	contentStr := string(content)
+	tokenList := tokenizeMessage(contentStr)
+
+	// Process tokenized log through cluster manager to get/create pattern
+	pattern, changeType := mt.clusterManager.Add(tokenList)
+
+	// Extract wildcard values from the pattern
+	wildcardValues := pattern.GetWildcardValues(tokenList)
+
+	// Handle pattern state changes (send PatternDefine/PatternDelete as needed)
+	mt.handlePatternChange(pattern, changeType, msg, outputChan, &patternDefineSent, &patternDefineParamCount)
+
+	// Send StructuredLog with pattern_id + dynamic values
+	mt.sendStructuredLog(outputChan, msg, pattern, wildcardValues, ts, patternDefineSent, patternDefineParamCount)
+}
+
+// getMessageTimestamp returns the timestamp for the message, preferring ServerlessExtra.Timestamp
+func getMessageTimestamp(msg *message.Message) time.Time {
+	ts := time.Now().UTC()
+	if !msg.ServerlessExtra.Timestamp.IsZero() {
+		ts = msg.ServerlessExtra.Timestamp
+	}
+	return ts
+}
+
+// tokenizeMessage tokenizes the message content string
+func tokenizeMessage(contentStr string) *token.TokenList {
+	tokenizer := automaton.NewTokenizer(contentStr)
+	return tokenizer.Tokenize()
+}
+
+// handlePatternChange handles pattern changes based on PatternChangeType from cluster manager
+// Uses the change type to determine if we need to send PatternDefine/PatternDelete
+// The snapshot mechanism in inflight.go tracks what's been sent for stream recovery
+func (mt *MessageTranslator) handlePatternChange(pattern *clustering.Pattern, changeType clustering.PatternChangeType, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) {
+	switch changeType {
+	case clustering.PatternNew:
+		// New pattern - send PatternDefine (may have 0 wildcards initially)
+		mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount)
+
+	case clustering.PatternUpdated:
+		// Pattern structure changed (e.g., 0→N wildcards, or N→M wildcards)
+		mt.sendPatternDelete(pattern.PatternID, msg, outputChan)
+		mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount)
+
+	case clustering.PatternNoChange:
+	}
+}
+
+// sendPatternDefine creates and sends a PatternDefine datum
+func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) {
+	patternDatum := buildPatternDefine(pattern)
+	if pd := patternDatum.GetPatternDefine(); pd != nil {
+		*patternDefineParamCount = pd.ParamCount
+	}
+	outputChan <- &message.StatefulMessage{
+		Datum:    patternDatum,
+		Metadata: &msg.MessageMetadata,
+	}
+	*patternDefineSent = true
+}
+
+// sendPatternDelete creates and sends a PatternDelete datum
+func (mt *MessageTranslator) sendPatternDelete(patternID uint64, msg *message.Message, outputChan chan *message.StatefulMessage) {
+	deleteDatum := buildPatternDelete(patternID)
+	outputChan <- &message.StatefulMessage{
+		Datum:    deleteDatum,
+		Metadata: &msg.MessageMetadata,
+	}
+}
+
+// sendRawLog creates and sends a raw log datum
+func (mt *MessageTranslator) sendRawLog(outputChan chan *message.StatefulMessage, msg *message.Message, contentStr string, ts time.Time) {
+	logDatum := buildRawLog(contentStr, ts)
+	outputChan <- &message.StatefulMessage{
+		Datum:    logDatum,
+		Metadata: &msg.MessageMetadata,
+	}
+}
+
+// sendStructuredLog creates and sends a StructuredLog datum
+func (mt *MessageTranslator) sendStructuredLog(outputChan chan *message.StatefulMessage, msg *message.Message, pattern *clustering.Pattern, wildcardValues []string, ts time.Time, patternDefineSent bool, patternDefineParamCount uint32) {
+	logDatum := buildStructuredLog(pattern.PatternID, wildcardValues, ts)
+	outputChan <- &message.StatefulMessage{
+		Datum:    logDatum,
+		Metadata: &msg.MessageMetadata,
+	}
+}
+
+// buildPatternDefine creates a PatternDefine Datum from a Pattern
+func buildPatternDefine(pattern *clustering.Pattern) *statefulpb.Datum {
+	charPositions := pattern.GetWildcardCharPositions()
+	posList := make([]uint32, len(charPositions))
+	for i, pos := range charPositions {
+		posList[i] = uint32(pos)
+	}
+
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_PatternDefine{
+			PatternDefine: &statefulpb.PatternDefine{
+				PatternId:  pattern.PatternID,
+				Template:   pattern.GetPatternString(),
+				ParamCount: uint32(pattern.GetWildcardCount()),
+				PosList:    posList,
+			},
+		},
+	}
+}
+
+// buildPatternDelete creates a PatternDelete Datum for a pattern ID
+func buildPatternDelete(patternID uint64) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_PatternDelete{
+			PatternDelete: &statefulpb.PatternDelete{
+				PatternId: patternID,
+			},
+		},
+	}
+}
+
+// buildStructuredLog creates a Datum containing a StructuredLog
+func buildStructuredLog(patternID uint64, wildcardValues []string, ts time.Time) *statefulpb.Datum {
+	// Convert wildcard values to DynamicValue format
+	dynamicValues := make([]*statefulpb.DynamicValue, len(wildcardValues))
+	for i, value := range wildcardValues {
+		dynamicValues[i] = &statefulpb.DynamicValue{
+			Value: &statefulpb.DynamicValue_StringValue{
+				StringValue: value,
+			},
+		}
+	}
+
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_Logs{
+			Logs: &statefulpb.Log{
+				Timestamp: uint64(ts.UnixNano() / nanoToMillis),
+				Content: &statefulpb.Log_Structured{
+					Structured: &statefulpb.StructuredLog{
+						PatternId:     patternID,
+						DynamicValues: dynamicValues,
+					},
+				},
+			},
+		},
+	}
+}
+
+// buildRawLog creates a Datum containing a raw log (no pattern)
+func buildRawLog(content string, ts time.Time) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_Logs{
+			Logs: &statefulpb.Log{
+				Timestamp: uint64(ts.UnixNano() / nanoToMillis),
+				Content: &statefulpb.Log_Raw{
+					Raw: content,
+				},
+			},
+		},
+	}
+}
+
+// toValidUtf8 ensures all characters are UTF-8
+func toValidUtf8(data []byte) string {
+	if utf8.Valid(data) {
+		return string(data)
+	}
+
+	var str strings.Builder
+	str.Grow(len(data))
+
+	for len(data) > 0 {
+		r, size := utf8.DecodeRune(data)
+		// in case of invalid utf-8, DecodeRune returns (utf8.RuneError, 1)
+		// and since RuneError is the same as unicode.ReplacementChar
+		// no need to handle the error explicitly
+		str.WriteRune(r)
+		data = data[size:]
+	}
+	return str.String()
+}
diff --git a/pkg/logs/sender/grpc/sender.go b/pkg/logs/sender/grpc/sender.go
new file mode 100644
index 000000000000..e4dd93c4a1b1
--- /dev/null
+++ b/pkg/logs/sender/grpc/sender.go
@@ -0,0 +1,265 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+// Package grpc implements gRPC-based log sender
+package grpc
+
+import (
+	"context"
+	"crypto/tls"
+	"fmt"
+	"time"
+
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/credentials/insecure"
+	"google.golang.org/grpc/keepalive"
+
+	"github.com/DataDog/datadog-agent/comp/logs/agent/config"
+	pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model"
+	"github.com/DataDog/datadog-agent/pkg/logs/client"
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/metrics"
+	"github.com/DataDog/datadog-agent/pkg/logs/sender"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+	"github.com/DataDog/datadog-agent/pkg/version"
+
+	"go.uber.org/atomic"
+)
+
+const (
+	// inputChanBufferSize is the buffer size for worker input channels - may become configurable
+	inputChanBufferSize = 100
+)
+
+// headerCredentials implements credentials.PerRPCCredentials to add headers to RPC calls
+type headerCredentials struct {
+	endpoint config.Endpoint
+}
+
+// GetRequestMetadata adds required headers to each RPC call
+func (h *headerCredentials) GetRequestMetadata(_ context.Context, _ ...string) (map[string]string, error) {
+	headers := map[string]string{
+		"dd-api-key": h.endpoint.GetAPIKey(),
+	}
+
+	// Add protocol header if specified
+	if h.endpoint.Protocol != "" {
+		headers["dd-protocol"] = string(h.endpoint.Protocol)
+	}
+
+	// Add origin headers if specified
+	if h.endpoint.Origin != "" {
+		headers["dd-evp-origin"] = string(h.endpoint.Origin)
+		headers["dd-evp-origin-version"] = version.AgentVersion
+	}
+
+	return headers, nil
+}
+
+// RequireTransportSecurity indicates whether the credentials require transport security
+func (h *headerCredentials) RequireTransportSecurity() bool {
+	return false // We handle TLS separately via WithTransportCredentials
+}
+
+// Sender implements PipelineComponent interface for gRPC log transmission.
+// It manages multiple streamWorker instances (one per pipeline) using round-robin distribution.
+// It is similar to Sender/Worker architecture
+type Sender struct {
+	// Configuration
+	endpoint            config.Endpoint
+	destinationsContext *client.DestinationsContext
+	cfg                 pkgconfigmodel.Reader
+	numberOfWorkers     int
+
+	// Pipeline integration
+	pipelineMonitor metrics.PipelineMonitor
+
+	// Stream management (similar to Sender's workers and queues)
+	workers []*streamWorker
+	queues  []chan *message.Payload
+	idx     *atomic.Uint32
+
+	// Auditor integration
+	sink sender.Sink
+
+	// gRPC connection management (shared across all streams)
+	conn   *grpc.ClientConn
+	client statefulpb.StatefulLogsServiceClient
+}
+
+// NewSender creates a new gRPC sender that implements PipelineComponent
+// numberOfPipelines determines how many streamWorker to create (same as number of pipelines)
+func NewSender(
+	numberOfPipelines int,
+	cfg pkgconfigmodel.Reader,
+	sink sender.Sink,
+	endpoints *config.Endpoints,
+	destinationsCtx *client.DestinationsContext,
+) *Sender {
+
+	// For now, use the first reliable endpoint
+	// TODO: Support multiple endpoints with failover
+	var endpoint config.Endpoint
+	if len(endpoints.GetReliableEndpoints()) > 0 {
+		endpoint = endpoints.GetReliableEndpoints()[0]
+	} else {
+		log.Error("No reliable gRPC endpoints configured")
+		return nil
+	}
+
+	// For the moment, we use the number of pipelines as the number of workers
+	numberOfWorkers := numberOfPipelines
+
+	// Get stream lifetime from config
+	streamLifetime := config.StreamLifetime(cfg)
+
+	sender := &Sender{
+		endpoint:            endpoint,
+		destinationsContext: destinationsCtx,
+		cfg:                 cfg,
+		numberOfWorkers:     numberOfWorkers,
+		pipelineMonitor:     metrics.NewTelemetryPipelineMonitor(),
+		workers:             make([]*streamWorker, 0, numberOfWorkers),
+		queues:              make([]chan *message.Payload, numberOfWorkers),
+		idx:                 &atomic.Uint32{},
+		sink:                sink,
+	}
+
+	// Note: outputChan will be set in each streamWorker's start() method when sink.Channel() is available
+
+	// Create gRPC connection (shared by all streams inside streamWorkers)
+	if err := sender.createConnection(); err != nil {
+		log.Errorf("Failed to create gRPC connection: %v", err)
+		return nil
+	}
+
+	// Create multiple streamWorker instances (like Sender creates Workers)
+	for i := 0; i < numberOfWorkers; i++ {
+		workerID := fmt.Sprintf("worker-%d", i)
+
+		// Create input queue for this worker (like Sender creates queues)
+		sender.queues[i] = make(chan *message.Payload, inputChanBufferSize)
+
+		// Create streamWorker instance
+		worker := newStreamWorker(
+			workerID,
+			sender.queues[i],
+			destinationsCtx,
+			sender.conn,
+			sender.client,
+			sender.sink,
+			endpoint,
+			streamLifetime,
+		)
+
+		sender.workers = append(sender.workers, worker)
+	}
+
+	log.Infof("Created gRPC sender with %d streams for endpoint %s:%d",
+		numberOfWorkers, endpoint.Host, endpoint.Port)
+	return sender
+}
+
+// createConnection establishes the shared gRPC connection
+func (s *Sender) createConnection() error {
+	log.Infof("Creating gRPC connection to %s:%d", s.endpoint.Host, s.endpoint.Port)
+
+	// Build connection options
+	var opts []grpc.DialOption
+
+	// Configure TLS
+	if s.endpoint.UseSSL() {
+		tlsConfig := &tls.Config{
+			ServerName: s.endpoint.Host,
+		}
+		opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)))
+	} else {
+		opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials()))
+	}
+
+	// Configure keepalive
+	keepaliveParams := keepalive.ClientParameters{
+		Time:                30 * time.Second,
+		Timeout:             5 * time.Second,
+		PermitWithoutStream: true,
+	}
+	opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams))
+
+	// Add user agent
+	userAgent := fmt.Sprintf("datadog-agent/%s", version.AgentVersion)
+	opts = append(opts, grpc.WithUserAgent(userAgent))
+
+	// Add headers via per-RPC credentials
+	headerCreds := &headerCredentials{endpoint: s.endpoint}
+	opts = append(opts, grpc.WithPerRPCCredentials(headerCreds))
+
+	// Add load balancing configuration, to utilize all available LB IPs
+	opts = append(opts, grpc.WithDefaultServiceConfig(
+		`{"loadBalancingPolicy":"round_robin"}`,
+	))
+
+	// Create connection, lazy connection establishment, does not block
+	address := fmt.Sprintf("%s:%d", s.endpoint.Host, s.endpoint.Port)
+	conn, err := grpc.NewClient(address, opts...)
+	if err != nil {
+		return fmt.Errorf("failed to create gRPC connection: %w", err)
+	}
+
+	s.conn = conn
+	s.client = statefulpb.NewStatefulLogsServiceClient(conn)
+
+	log.Infof("Successfully created gRPC connection to %s", address)
+	return nil
+}
+
+// PipelineComponent interface implementation
+
+// In returns the input channel using round-robin distribution (same as Sender.In())
+func (s *Sender) In() chan *message.Payload {
+	idx := s.idx.Inc() % uint32(len(s.queues))
+	return s.queues[idx]
+}
+
+// PipelineMonitor returns the pipeline monitor
+func (s *Sender) PipelineMonitor() metrics.PipelineMonitor {
+	return s.pipelineMonitor
+}
+
+// Start starts all streamWorker instances (same pattern as Sender.Start())
+func (s *Sender) Start() {
+	log.Infof("Starting gRPC sender with %d workers", len(s.workers))
+
+	for _, worker := range s.workers {
+		worker.start()
+	}
+
+	log.Info("All streamWorkers started")
+}
+
+// Stop stops all streamWorker instances and closes the connection
+func (s *Sender) Stop() {
+	log.Info("Stopping gRPC sender")
+
+	// Stop all workers (same pattern as Sender.Stop())
+	for _, worker := range s.workers {
+		worker.stop()
+	}
+
+	// Close all queues
+	for _, queue := range s.queues {
+		close(queue)
+	}
+
+	// Close the shared connection
+	if s.conn != nil {
+		if err := s.conn.Close(); err != nil {
+			log.Warnf("Error closing gRPC connection: %v", err)
+		}
+	}
+
+	log.Info("gRPC sender stopped")
+}
diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go
new file mode 100644
index 000000000000..f2cee3ad3592
--- /dev/null
+++ b/pkg/logs/sender/grpc/stream_worker.go
@@ -0,0 +1,712 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+package grpc
+
+import (
+	"context"
+	"errors"
+	"io"
+	"time"
+
+	"github.com/benbjohnson/clock"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/connectivity"
+	"google.golang.org/grpc/status"
+
+	"github.com/DataDog/datadog-agent/comp/logs/agent/config"
+	"github.com/DataDog/datadog-agent/pkg/logs/client"
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/logs/sender"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+	"github.com/DataDog/datadog-agent/pkg/util/backoff"
+	"github.com/DataDog/datadog-agent/pkg/util/log"
+)
+
+// TODO For PoC Stage 1
+// - implement snapshot state transmission
+// - better handle unrecoverable errors - auth/perm, protocol, stream-level gRPC status
+// - telemetries (send/recv, failure, rotations)
+
+// TODO for PoC Stage 2
+// - implement more graceful shutdown, the current version we could lose some acks
+// - currently, s.currentStream.stream.Send(batch) can still block, especially
+//   if we have a lot of buffered payloads to re-send after a stream rotation,
+//   especially if we are flow controlled. This will block the supervisor loop
+// 	 and potentially backpressure the input channel
+// - implement proper "stream/ordered" backpressure
+
+// TODO for production
+// - implement stream neotiation (state size, etc), able to downgrade to HTTP transport
+// - Testing plan
+
+const (
+	// Various constants - may become configurable
+	batchAckChanBuffer = 10
+	maxInflight        = 10000
+	connectionTimeout  = 10 * time.Second
+	drainTimeout       = 5 * time.Second
+)
+
+// streamState represents the current state of the stream worker
+//
+//go:generate stringer -type=streamState
+type streamState int
+
+const (
+	// disconnected is the initial state or stream creation failure backoff state
+	disconnected streamState = iota
+	// connecting is the state while waiting for asyncCreateNewStream to complete or fail
+	connecting
+	// active is the normal operating state with a valid stream
+	active
+	// draining waits for all acks to arrive before rotating to a new stream
+	draining
+)
+
+// streamInfo holds all stream-related information
+type streamInfo struct {
+	stream statefulpb.StatefulLogsService_LogsStreamClient
+	ctx    context.Context
+	cancel context.CancelFunc
+}
+
+// streamCreationResult represents the result of async stream creation
+type streamCreationResult struct {
+	info *streamInfo
+	err  error
+}
+
+// batchAck wraps a batch acknowledgment with stream identity to prevent stale signals
+type batchAck struct {
+	stream *streamInfo
+	status *statefulpb.BatchStatus
+}
+
+// streamWorker manages a single gRPC bidirectional stream with Master-Slave threading model
+// Architecture: One supervisor/sender goroutine + one receiver goroutine per worker
+type streamWorker struct {
+	// Configuration
+	workerID            string
+	destinationsContext *client.DestinationsContext
+
+	// Pipeline integration
+	inputChan  chan *message.Payload
+	outputChan chan *message.Payload // For auditor acknowledgments
+	sink       sender.Sink           // For getting auditor channel
+
+	// gRPC connection management (shared with other streams)
+	conn   *grpc.ClientConn
+	client statefulpb.StatefulLogsServiceClient
+
+	// Stream management
+	currentStream  *streamInfo
+	streamState    streamState
+	recvFailureCh  chan *streamInfo          // Signal receiver failure with stream identity
+	batchAckCh     chan *batchAck            // Signal batch acknowledgments with stream identity
+	streamReadyCh  chan streamCreationResult // Signal when async stream creation completes
+	streamLifetime time.Duration
+	streamTimer    *clock.Timer // Timer for stream lifetime, trigger soft rotation
+	drainTimer     *clock.Timer // In case of unacked payloads, drain/wait before soft rotation
+	backoffTimer   *clock.Timer // In case of stream creation failure, backoff before retrying
+
+	// Inflight tracking - tracks sent (awaiting ack) and buffered (not sent) payloads
+	inflight *inflightTracker
+
+	// Retry backoff
+	backoffPolicy backoff.Policy
+	nbErrors      int
+
+	// Control
+	stopChan chan struct{}
+	done     chan struct{}
+	clock    clock.Clock
+}
+
+// newStreamWorker creates a new gRPC stream worker
+func newStreamWorker(
+	workerID string,
+	inputChan chan *message.Payload,
+	destinationsCtx *client.DestinationsContext,
+	conn *grpc.ClientConn,
+	client statefulpb.StatefulLogsServiceClient,
+	sink sender.Sink,
+	endpoint config.Endpoint,
+	streamLifetime time.Duration,
+) *streamWorker {
+	return newStreamWorkerWithClock(workerID, inputChan, destinationsCtx, conn, client, sink,
+		endpoint, streamLifetime, clock.New(), nil)
+}
+
+// newStreamWorkerWithClock creates a new gRPC stream worker with injectable clock for testing
+func newStreamWorkerWithClock(
+	workerID string,
+	inputChan chan *message.Payload,
+	destinationsCtx *client.DestinationsContext,
+	conn *grpc.ClientConn,
+	client statefulpb.StatefulLogsServiceClient,
+	sink sender.Sink,
+	endpoint config.Endpoint,
+	streamLifetime time.Duration,
+	clock clock.Clock,
+	inflightTracker *inflightTracker,
+) *streamWorker {
+	backoffPolicy := backoff.NewExpBackoffPolicy(
+		endpoint.BackoffFactor,
+		endpoint.BackoffBase,
+		endpoint.BackoffMax,
+		endpoint.RecoveryInterval,
+		endpoint.RecoveryReset,
+	)
+
+	// Use provided inflightTracker (testing) or create default one
+	if inflightTracker == nil {
+		inflightTracker = newInflightTracker(maxInflight)
+	}
+
+	worker := &streamWorker{
+		workerID:            workerID,
+		destinationsContext: destinationsCtx,
+		inputChan:           inputChan,
+		outputChan:          nil,
+		sink:                sink,
+		conn:                conn,
+		client:              client,
+		streamState:         disconnected,
+		recvFailureCh:       make(chan *streamInfo),
+		batchAckCh:          make(chan *batchAck, batchAckChanBuffer),
+		streamReadyCh:       make(chan streamCreationResult),
+		streamLifetime:      streamLifetime,
+		inflight:            inflightTracker,
+		backoffPolicy:       backoffPolicy,
+		nbErrors:            0,
+		stopChan:            make(chan struct{}),
+		done:                make(chan struct{}),
+		clock:               clock,
+		streamTimer:         createStoppedTimer(clock, 0),
+		backoffTimer:        createStoppedTimer(clock, 0),
+		drainTimer:          createStoppedTimer(clock, 0),
+	}
+
+	return worker
+}
+
+// start begins the supervisor goroutine & creates a new stream asynchronously
+func (s *streamWorker) start() {
+	log.Infof("Starting gRPC stream worker %s", s.workerID)
+	s.outputChan = s.sink.Channel()
+
+	// Start supervisor/sender goroutine (master)
+	go s.supervisorLoop()
+
+	s.asyncCreateNewStream()
+
+	log.Infof("Worker %s: Started", s.workerID)
+}
+
+// stop shuts down the stream worker
+func (s *streamWorker) stop() {
+	log.Infof("Stopping gRPC stream worker %s", s.workerID)
+	close(s.stopChan)
+	<-s.done
+	log.Infof("Worker %s: Stopped", s.workerID)
+}
+
+// supervisorLoop is the master goroutine that handles sending and stream lifecycle
+func (s *streamWorker) supervisorLoop() {
+	defer close(s.done)
+
+	// supervisor loop starts without a stream, but asyncCreateNewStream is called
+	// right after in streamWorker's start(), so we are in connecting state right away
+	s.streamState = connecting
+
+	for {
+		// Conditional inputChan - only enabled when inflight tracker has space
+		// This backpressures to upstream when at capacity
+		var inputChan <-chan *message.Payload
+		if s.inflight.hasSpace() {
+			inputChan = s.inputChan // Enable reading
+		} else {
+			inputChan = nil // Disable reading
+		}
+
+		select {
+		case payload := <-inputChan:
+			// Fires in any state (gated only by inflight capacity), payload is always
+			// added to the inflight tracker. But we only proceed to send if we are
+			// in the active state with a valid stream
+			s.inflight.append(payload)
+			s.sendPayloads()
+
+		case ack := <-s.batchAckCh:
+			// Fires in any state
+			s.handleBatchAck(ack)
+
+		case failedStream := <-s.recvFailureCh:
+			// Fires in active/draining/connecting states
+			s.handleRecvFailure(failedStream)
+
+		case result := <-s.streamReadyCh:
+			// Fires only in connecting state
+			s.handleStreamReady(result)
+
+		case <-s.streamTimer.C:
+			// Fires only in active state (except rare timing race, it's in connecting)
+			s.handleStreamTimeout()
+
+		case <-s.drainTimer.C:
+			// Fires in draining state or (rarely) in connecting/active state
+			// If in non-draining state, it means acks arrival at the same time
+			// as the drain timer expiration, so we will skip the signal
+			s.handleDrainTimeout()
+
+		case <-s.backoffTimer.C:
+			// Fires only in disconnected state
+			s.handleBackoffTimeout()
+
+		case <-s.stopChan:
+			// Fires in any state
+			s.handleShutdown()
+			return
+		}
+	}
+}
+
+// sendPayloads attempts to send all buffered payloads when in Active state
+// the same function is used to send new payload in normal operation, and
+// to send (or resend) buffered payloads after a stream rotation
+func (s *streamWorker) sendPayloads() {
+	if s.streamState != active {
+		return
+	}
+
+	// Send all buffered payloads in order
+	for {
+		payload := s.inflight.nextToSend()
+		if payload == nil {
+			// No more buffered payloads to send
+			break
+		}
+
+		batchID := s.inflight.nextBatchID()
+		batch := createBatch(payload.Encoded, batchID)
+
+		// TODO Send call can block, by TCP/HTTP2 flow controls
+		if err := s.currentStream.stream.Send(batch); err != nil {
+			log.Warnf("Worker %s: Send failed, initiating stream rotation: %v", s.workerID, err)
+			s.beginStreamRotation()
+			return // stop sending, payloads remain buffered for next rotation
+		}
+
+		// Successfully sent, mark as sent in the inflight tracker
+		s.inflight.markSent()
+	}
+}
+
+// sendSnapshot sends the snapshot state as batch 0 on a new stream
+// Returns true if successful, initiates stream rotation and returns false if failed
+func (s *streamWorker) sendSnapshot() bool {
+	serialized := s.inflight.getSnapshot()
+
+	// Snapshot is empty means no state
+	if serialized == nil {
+		return true
+	}
+
+	// Create batch with batchID 0 (reserved for snapshot)
+	batch := createBatch(serialized, 0)
+
+	// Send snapshot
+	if err := s.currentStream.stream.Send(batch); err != nil {
+		log.Warnf("Worker %s: Failed to send snapshot: %v, initiating stream rotation", s.workerID, err)
+		s.beginStreamRotation()
+		return false
+	}
+
+	log.Infof("Worker %s: Sent snapshot (%d bytes)", s.workerID, len(serialized))
+	return true
+}
+
+// handleBatchAck processes a BatchStatus acknowledgment from the server
+func (s *streamWorker) handleBatchAck(ack *batchAck) {
+	// Ignore stale acks from old streams
+	if ack.stream != s.currentStream {
+		return
+	}
+
+	receivedBatchID := uint32(ack.status.BatchId)
+
+	// Handle snapshot/state ack (batch 0) - no payload to pop
+	if receivedBatchID == 0 {
+		return
+	}
+
+	// The two errors below should never happen if Intake is implemented
+	// correctly, but we are being defensive.
+
+	// Verify we have "sent payloads" awaiting ack
+	if !s.inflight.hasUnacked() {
+		log.Errorf("Worker %s: Received ack for batch %d but no sent payloads in inflight tracker, "+
+			"irrecoverable error - initiating stream rotation", s.workerID, receivedBatchID)
+		s.beginStreamRotation()
+		return
+	}
+
+	// Verify batchID matches expected sequence
+	expectedBatchID := s.inflight.getHeadBatchID()
+	if receivedBatchID != expectedBatchID {
+		log.Errorf("Worker %s: BatchID mismatch! Expected %d, received %d. "+
+			"ut-of-order or duplicate ack, irrecoverable error - initiating stream rotation",
+			s.workerID, expectedBatchID, receivedBatchID)
+		s.beginStreamRotation()
+		return
+	}
+
+	// Pop the acknowledged payload and send to auditor
+	payload := s.inflight.pop()
+	if s.outputChan != nil {
+		select {
+		case s.outputChan <- payload:
+			// Successfully sent to auditor
+		default:
+			log.Warnf("Worker %s: Auditor channel full, dropping ack for batch %d", s.workerID, receivedBatchID)
+		}
+	}
+
+	// If in Draining state and all acks received, transition to Connecting
+	if s.streamState == draining && !s.inflight.hasUnacked() {
+		log.Infof("Worker %s: All acks received in draining state, proceeding with rotation", s.workerID)
+		s.drainTimer.Stop()
+		s.beginStreamRotation()
+	}
+}
+
+// handleRecvFailure processes receiver failure signals
+func (s *streamWorker) handleRecvFailure(failedStream *streamInfo) {
+	// Ignore if: stale signal OR not in active/draining state
+	if failedStream != s.currentStream || (s.streamState != active && s.streamState != draining) {
+		return
+	}
+
+	log.Infof("Worker %s: Receiver reported failure (state: %v), initiating stream rotation", s.workerID, s.streamState)
+	s.beginStreamRotation()
+}
+
+// handleStreamReady processes async stream creation results
+func (s *streamWorker) handleStreamReady(result streamCreationResult) {
+	if s.streamState != connecting {
+		return
+	}
+
+	if result.err != nil {
+		s.nbErrors = s.backoffPolicy.IncError(s.nbErrors)
+		s.handleStreamCreationFailure(result.err)
+	} else {
+		s.nbErrors = s.backoffPolicy.DecError(s.nbErrors)
+		s.finishStreamRotation(result.info)
+	}
+}
+
+// handleStreamTimeout processes stream lifetime expiration
+func (s *streamWorker) handleStreamTimeout() {
+	if s.streamState != active {
+		return
+	}
+
+	if s.inflight.hasUnacked() {
+		log.Infof("Worker %s: Stream lifetime expired with %d unacked payloads, entering Draining state",
+			s.workerID, s.inflight.sentCount())
+		s.streamState = draining
+		s.drainTimer.Reset(drainTimeout)
+	} else {
+		log.Infof("Worker %s: Stream lifetime expired with no unacked payloads, rotating immediately",
+			s.workerID)
+		s.beginStreamRotation()
+	}
+}
+
+// handleDrainTimeout handles drain timer expiration
+func (s *streamWorker) handleDrainTimeout() {
+	if s.streamState != draining {
+		return
+	}
+
+	log.Warnf("Worker %s: Drain timer expired in draining state, proceeding with rotation (may lose some acks)",
+		s.workerID)
+	s.beginStreamRotation()
+}
+
+// handleBackoffTimeout processes backoff timer expiration and retries stream creation
+func (s *streamWorker) handleBackoffTimeout() {
+	if s.streamState != disconnected {
+		return
+	}
+
+	log.Infof("Worker %s: Backoff timer expired, retrying stream creation (error count: %d)", s.workerID, s.nbErrors)
+	s.streamState = connecting
+	s.asyncCreateNewStream()
+}
+
+// handleShutdown performs graceful shutdown cleanup
+func (s *streamWorker) handleShutdown() {
+	log.Infof("Worker %s: Shutting down", s.workerID)
+	s.streamTimer.Stop()
+	s.backoffTimer.Stop()
+	s.drainTimer.Stop()
+	s.closeStream(s.currentStream)
+}
+
+// beginStreamRotation initiates stream rotation
+// Closes current stream and starts async creation of a new stream
+func (s *streamWorker) beginStreamRotation() {
+	log.Infof("Worker %s: Beginning stream rotation (state: %v → connecting)", s.workerID, s.streamState)
+
+	s.closeStream(s.currentStream)
+	s.currentStream = nil
+	s.streamTimer.Stop()
+	s.drainTimer.Stop()
+	s.backoffTimer.Stop()
+
+	s.streamState = connecting
+	s.asyncCreateNewStream()
+}
+
+// finishStreamRotation completes stream rotation (Connecting → Active transition)
+// Activates the newly created stream and starts the receiver
+// Transmits the snapshot state first, then (if any) the buffered payloads
+func (s *streamWorker) finishStreamRotation(streamInfo *streamInfo) {
+	log.Infof("Worker %s: Finishing stream rotation (state: connecting → active)", s.workerID)
+
+	s.currentStream = streamInfo
+	s.streamState = active
+
+	go s.receiverLoop(streamInfo)
+
+	s.streamTimer.Reset(s.streamLifetime)
+
+	// Convert all the unacked items to buffered items by resetting inflight tracker
+	// because we need to resent them.
+	s.inflight.resetOnRotation()
+
+	log.Infof("Worker %s: Stream rotation complete, now active", s.workerID)
+
+	// Send snapshot state first (batch 0)
+	if !s.sendSnapshot() {
+		return
+	}
+
+	// Then send the remaining buffered payloads (batch 1, 2, ...)
+	if s.inflight.hasUnSent() {
+		s.sendPayloads()
+	}
+}
+
+// handleStreamCreationFailure processes stream creation failures with exponential backoff
+func (s *streamWorker) handleStreamCreationFailure(err error) {
+	backoffDuration := s.backoffPolicy.GetBackoffDuration(s.nbErrors)
+
+	log.Warnf("Worker %s: Stream creation failed: %v. Backing off for %v (error count: %d)",
+		s.workerID, err, backoffDuration, s.nbErrors)
+
+	s.streamState = disconnected
+
+	if backoffDuration > 0 {
+		s.backoffTimer.Reset(backoffDuration)
+	} else {
+		// it shouldn't happen, but be defensive
+		// retry immediately by transitioning directly to connecting
+		log.Infof("Worker %s: Zero backoff duration, retrying immediately", s.workerID)
+		s.streamState = connecting
+		s.asyncCreateNewStream()
+	}
+}
+
+// asyncCreateNewStream creates a new gRPC stream asynchronously
+// Signals completion (success or failure) via streamReadyCh
+func (s *streamWorker) asyncCreateNewStream() {
+	go func() {
+		log.Infof("Worker %s: Starting async stream creation", s.workerID)
+
+		var result streamCreationResult
+
+		// Ensure the connection is ready, can block up to connectionTimeout
+		err := s.ensureConnectionReady()
+		if err != nil {
+			log.Errorf("Worker %s: Async stream creation failed (connection failure) %v", s.workerID, err)
+			result = streamCreationResult{info: nil, err: err}
+		} else {
+			// Create per-stream context derived from destinations context
+			streamCtx, streamCancel := context.WithCancel(s.destinationsContext.Context())
+
+			// Create the stream, shouldn't block at this point.
+			stream, err := s.client.LogsStream(streamCtx)
+
+			if err != nil {
+				streamCancel()
+				log.Errorf("Worker %s: Async stream creation failed (post-connection): %v", s.workerID, err)
+				result = streamCreationResult{info: nil, err: err}
+			} else {
+				log.Infof("Worker %s: Async stream creation succeeded", s.workerID)
+				result = streamCreationResult{
+					info: &streamInfo{
+						stream: stream,
+						ctx:    streamCtx,
+						cancel: streamCancel,
+					},
+					err: nil,
+				}
+			}
+		}
+
+		// Signal result to supervisor (blocks until received or stopped)
+		select {
+		case s.streamReadyCh <- result:
+		case <-s.stopChan:
+			// Worker stopped before supervisor could receive result
+			// We own cleanup since supervisor never got the stream
+			if result.info != nil {
+				s.closeStream(result.info)
+			}
+		}
+	}()
+}
+
+func (s *streamWorker) ensureConnectionReady() error {
+	// Skip connection check if conn is nil (for testing with mock clients)
+	if s.conn == nil {
+		return nil
+	}
+
+	connCtx, cancel := context.WithTimeout(s.destinationsContext.Context(), connectionTimeout)
+	defer cancel()
+
+	// Nudge dialing if idle; doesn't block
+	s.conn.Connect()
+
+	for {
+		state := s.conn.GetState()
+		switch state {
+		case connectivity.Ready:
+			return nil
+		case connectivity.Shutdown:
+			return errors.New("gRPC conn is shutdown")
+		}
+		// Wait for state change or timeout/cancel.
+		if !s.conn.WaitForStateChange(connCtx, state) {
+			// context done (timeout or cancellation)
+			return connCtx.Err()
+		}
+	}
+}
+
+// closeStream safely closes a stream and cancels its context
+func (s *streamWorker) closeStream(streamInfo *streamInfo) {
+	if streamInfo != nil {
+		if err := streamInfo.stream.CloseSend(); err != nil {
+			log.Debugf("Worker %s: Error closing stream send: %v", s.workerID, err)
+		}
+		streamInfo.cancel()
+	}
+}
+
+// receiverLoop runs in the receiver goroutine to process server responses for a specific stream
+// The receiver is stateless - it only forwards acks/errors to the supervisor
+// This goroutine exits when the stream fails (after signaling the supervisor)
+func (s *streamWorker) receiverLoop(streamInfo *streamInfo) {
+	stream := streamInfo.stream
+	for {
+		msg, err := stream.Recv()
+		if err == nil {
+			// Normal message (batch acknowledgment) - forward to supervisor
+			s.signalBatchAck(streamInfo, msg)
+			continue
+		}
+
+		// Clean inbound close (server OK in trailers): policy = signal receiver failure
+		if errors.Is(err, io.EOF) {
+			log.Warnf("Worker %s: Stream closed by server", s.workerID)
+			s.signalRecvFailure(streamInfo)
+			return
+		}
+
+		// Local cancel/deadline (supervisor rotated, worker shutdown): just exit
+		ctxErr := streamInfo.ctx.Err()
+		if errors.Is(ctxErr, context.Canceled) || errors.Is(ctxErr, context.DeadlineExceeded) {
+			log.Infof("Worker %s: Stream context cancelled, receiver exiting", s.workerID)
+			return
+		}
+
+		// Stream-level gRPC status (non-OK): RPC is over → signal receiver failure or block terminal
+		if st, ok := status.FromError(err); ok {
+			log.Warnf("Worker %s: recv: gRPC error (code %v): %v", s.workerID, st.Code(), err)
+
+			switch st.Code() {
+			case codes.Unauthenticated, codes.PermissionDenied:
+				// Terminal until fixed; do not signal receiver failure here
+				s.handleIrrecoverableError("auth/perm: "+st.Message(), streamInfo)
+				return
+			case codes.InvalidArgument, codes.FailedPrecondition, codes.OutOfRange, codes.Unimplemented:
+				// Terminal protocol/semantic issue; do not signal receiver failure
+				s.handleIrrecoverableError("protocol: "+st.Message(), streamInfo)
+				return
+			default:
+				// All other non-OK statuses: signal receiver failure
+				s.signalRecvFailure(streamInfo)
+				return
+			}
+		}
+
+		// Transport error without status (RST/GOAWAY/TLS, socket close): signal receiver failure
+		log.Warnf("Worker %s: Transport error: %v", s.workerID, err)
+		s.signalRecvFailure(streamInfo)
+		return
+	}
+}
+
+// signalRecvFailure signals the supervisor to rotate the stream
+func (s *streamWorker) signalRecvFailure(streamInfo *streamInfo) {
+	// This signaling is blocking by design, it's okey to block the receiver,
+	// since the only way we get here is through an irrecoverable error.
+	select {
+	case s.recvFailureCh <- streamInfo:
+	case <-s.stopChan:
+	}
+}
+
+// signalBatchAck forwards a batch acknowledgment to the supervisor
+// If the worker is stopped, returns without delivering (shutdown is in progress anyway)
+func (s *streamWorker) signalBatchAck(streamInfo *streamInfo, msg *statefulpb.BatchStatus) {
+	select {
+	case s.batchAckCh <- &batchAck{stream: streamInfo, status: msg}:
+	case <-s.stopChan:
+	}
+}
+
+// handleIrrecoverableError are errors that shouldn't be retried, and ideally
+// should be block the ingestion, until the error is resolved.
+func (s *streamWorker) handleIrrecoverableError(_ string, streamInfo *streamInfo) {
+	// Currently this is treated as stream error, which will trigger a stream rotation
+	// and retry of the same payload, which loops on. this IS NOT the desired behavior.
+	// TODO: Implement proper handling of irrecoverable errors, by blocking the ingestion
+	s.signalRecvFailure(streamInfo)
+}
+
+// createBatch creates a StatefulBatch from serialized data and batch ID
+func createBatch(data []byte, batchID uint32) *statefulpb.StatefulBatch {
+	return &statefulpb.StatefulBatch{
+		BatchId: batchID,
+		Data:    data,
+	}
+}
+
+// createStoppedTimer creates a timer that is stopped and has its channel drained
+func createStoppedTimer(clk clock.Clock, d time.Duration) *clock.Timer {
+	t := clk.Timer(d)
+	if !t.Stop() {
+		<-t.C
+	}
+	return t
+}
diff --git a/pkg/logs/sender/grpc/stream_worker_test.go b/pkg/logs/sender/grpc/stream_worker_test.go
new file mode 100644
index 000000000000..59c5114646e2
--- /dev/null
+++ b/pkg/logs/sender/grpc/stream_worker_test.go
@@ -0,0 +1,1053 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2016-present Datadog, Inc.
+
+//go:build test
+
+package grpc
+
+import (
+	"context"
+	"errors"
+	"io"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/benbjohnson/clock"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+	"google.golang.org/protobuf/proto"
+
+	"github.com/DataDog/datadog-agent/comp/logs/agent/config"
+	"github.com/DataDog/datadog-agent/pkg/logs/client"
+	"github.com/DataDog/datadog-agent/pkg/logs/message"
+	"github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb"
+)
+
+const (
+	testTimeout      = 100 * time.Millisecond
+	testTickInterval = 10 * time.Millisecond
+	testShortWait    = 50 * time.Millisecond
+)
+
+// mockSink implements sender.Sink for testing
+type mockSink struct {
+	outputChan chan *message.Payload
+}
+
+func newMockSink() *mockSink {
+	return &mockSink{
+		outputChan: make(chan *message.Payload, 100),
+	}
+}
+
+func (m *mockSink) Channel() chan *message.Payload {
+	return m.outputChan
+}
+
+// mockLogsStream implements StatefulLogsService_LogsStreamClient for testing
+type mockLogsStream struct {
+	grpc.ClientStream
+
+	mu sync.Mutex
+
+	// Channels for communication
+	sendCh chan *statefulpb.StatefulBatch // Batches sent by client
+	recvCh chan *statefulpb.BatchStatus   // Acks to send to client
+	errCh  chan error                     // To inject immediate errors in Recv()
+
+	// Error control
+	sendErr error // If set, next Send() will return this error
+	recvErr error // If set, next Recv() will return this error
+
+	// Track sent batches
+	sentBatches []*statefulpb.StatefulBatch
+
+	// Context
+	ctx context.Context
+}
+
+func newMockLogsStream(ctx context.Context) *mockLogsStream {
+	return &mockLogsStream{
+		sendCh:      make(chan *statefulpb.StatefulBatch, 100),
+		recvCh:      make(chan *statefulpb.BatchStatus, 100),
+		errCh:       make(chan error, 1),
+		sentBatches: make([]*statefulpb.StatefulBatch, 0),
+		ctx:         ctx,
+	}
+}
+
+func (m *mockLogsStream) Send(batch *statefulpb.StatefulBatch) error {
+	m.mu.Lock()
+	if m.sendErr != nil {
+		err := m.sendErr
+		m.mu.Unlock()
+		return err
+	}
+	m.mu.Unlock()
+
+	select {
+	case m.sendCh <- batch:
+		m.mu.Lock()
+		m.sentBatches = append(m.sentBatches, batch)
+		m.mu.Unlock()
+		return nil
+	case <-m.ctx.Done():
+		return m.ctx.Err()
+	}
+}
+
+func (m *mockLogsStream) Recv() (*statefulpb.BatchStatus, error) {
+	m.mu.Lock()
+	if m.recvErr != nil {
+		err := m.recvErr
+		m.mu.Unlock()
+		return nil, err
+	}
+	m.mu.Unlock()
+
+	select {
+	case ack := <-m.recvCh:
+		return ack, nil
+	case err := <-m.errCh:
+		return nil, err
+	case <-m.ctx.Done():
+		return nil, m.ctx.Err()
+	}
+}
+
+func (m *mockLogsStream) CloseSend() error {
+	return nil
+}
+
+// Helper to set send error
+func (m *mockLogsStream) setSendError(err error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.sendErr = err
+}
+
+// Helper to send an ack to the client
+func (m *mockLogsStream) sendAck(batchID int32) {
+	m.recvCh <- &statefulpb.BatchStatus{
+		BatchId: batchID,
+	}
+}
+
+// Helper to inject an error immediately (unblocks Recv())
+func (m *mockLogsStream) injectRecvError(err error) {
+	m.errCh <- err
+}
+
+// Helper to get sent batch count
+func (m *mockLogsStream) getSentBatchCount() int {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return len(m.sentBatches)
+}
+
+// Helper to get a specific sent batch by index
+func (m *mockLogsStream) getSentBatch(index int) *statefulpb.StatefulBatch {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if index < 0 || index >= len(m.sentBatches) {
+		return nil
+	}
+	return m.sentBatches[index]
+}
+
+// mockLogsClient implements StatefulLogsServiceClient for testing
+type mockLogsClient struct {
+	mu sync.Mutex
+
+	// Control stream creation
+	createStreamErr       error // If set, LogsStream() will return this error
+	failStreamCreationFor int   // Fail the next N stream creation attempts
+	currentStream         *mockLogsStream
+	streamCtx             context.Context
+	streamCancel          context.CancelFunc
+}
+
+func newMockLogsClient() *mockLogsClient {
+	return &mockLogsClient{}
+}
+
+func (m *mockLogsClient) LogsStream(ctx context.Context, _ ...grpc.CallOption) (statefulpb.StatefulLogsService_LogsStreamClient, error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+
+	// Check counter-based failure first
+	if m.failStreamCreationFor > 0 {
+		m.failStreamCreationFor--
+		err := m.createStreamErr
+		// Clear error when counter reaches 0
+		if m.failStreamCreationFor == 0 {
+			m.createStreamErr = nil
+		}
+		return nil, err
+	}
+
+	// Check error-based failure (only if counter is not in use)
+	if m.createStreamErr != nil {
+		return nil, m.createStreamErr
+	}
+
+	// Create a new stream with a child context
+	m.streamCtx, m.streamCancel = context.WithCancel(ctx)
+	m.currentStream = newMockLogsStream(m.streamCtx)
+	return m.currentStream, nil
+}
+
+// Helper to fail the next N stream creation attempts
+func (m *mockLogsClient) failNextStreamCreations(count int, err error) {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.failStreamCreationFor = count
+	m.createStreamErr = err
+}
+
+// Helper to get current stream
+func (m *mockLogsClient) getCurrentStream() *mockLogsStream {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.currentStream
+}
+
+// testFixture holds all the components needed for testing
+type testFixture struct {
+	t              *testing.T
+	mockClock      *clock.Mock
+	mockClient     *mockLogsClient
+	mockSink       *mockSink
+	inputChan      chan *message.Payload
+	outputChan     chan *message.Payload
+	destCtx        *client.DestinationsContext
+	endpoint       config.Endpoint
+	streamLifetime time.Duration
+	worker         *streamWorker
+}
+
+// newTestFixture creates all the test infrastructure
+func newTestFixture(t *testing.T) *testFixture {
+	// Create mock client
+	mockClient := newMockLogsClient()
+
+	// Create mock sink
+	mockSink := newMockSink()
+
+	// Create input channel
+	inputChan := make(chan *message.Payload, 100)
+
+	// Create mock destination context
+	destCtx := client.NewDestinationsContext()
+	destCtx.Start()
+
+	// Create endpoint config with test backoff settings
+	endpoint := config.Endpoint{
+		BackoffFactor:    2.0,
+		BackoffBase:      1.0,
+		BackoffMax:       10.0,
+		RecoveryInterval: 2,
+		RecoveryReset:    false,
+	}
+
+	// Create mock clock
+	mockClock := clock.NewMock()
+
+	fixture := &testFixture{
+		t:              t,
+		mockClock:      mockClock,
+		mockClient:     mockClient,
+		mockSink:       mockSink,
+		inputChan:      inputChan,
+		outputChan:     mockSink.outputChan,
+		destCtx:        destCtx,
+		endpoint:       endpoint,
+		streamLifetime: 10 * time.Second,
+	}
+
+	return fixture
+}
+
+// createWorker creates a streamWorker with the fixture's components
+func (f *testFixture) createWorker() *streamWorker {
+	return f.createWorkerWithInflight(nil) // nil = use default maxInflight
+}
+
+// createWorkerWithInflight creates a streamWorker with custom inflight capacity for testing
+func (f *testFixture) createWorkerWithInflight(inflight *inflightTracker) *streamWorker {
+	worker := newStreamWorkerWithClock(
+		"test-worker",
+		f.inputChan,
+		f.destCtx,
+		nil, // conn not needed with mock client
+		f.mockClient,
+		f.mockSink,
+		f.endpoint,
+		f.streamLifetime,
+		f.mockClock,
+		inflight,
+	)
+	f.worker = worker
+	return worker
+}
+
+// cleanup shuts down all resources
+func (f *testFixture) cleanup() {
+	if f.worker != nil {
+		// Check if worker is still running before stopping
+		select {
+		case <-f.worker.done:
+			// Already stopped
+		default:
+			f.worker.stop()
+		}
+	}
+	if f.destCtx != nil {
+		f.destCtx.Stop()
+	}
+}
+
+// Helper to create test payload for stream worker tests
+func createWorkerTestPayload(content string) *message.Payload {
+	return &message.Payload{
+		Encoded: []byte(content),
+		MessageMetas: []*message.MessageMetadata{
+			{
+				RawDataLen: len(content),
+			},
+		},
+	}
+}
+
+// TestStreamWorkerBasicStartStop tests the basic lifecycle
+func TestStreamWorkerBasicStartStop(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+
+	// Start the worker
+	worker.start()
+
+	// Wait for stream to become active (mocked stream creation should be quick)
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval, "Worker should transition to active state")
+
+	// Verify stream was created
+	stream := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream, "Stream should be created")
+
+	// Stop the worker
+	worker.stop()
+
+	// Verify clean shutdown
+	select {
+	case <-worker.done:
+		// Success
+	case <-time.After(testTimeout):
+		t.Fatal("Worker did not shut down in time")
+	}
+}
+
+// TestStreamWorkerSendReceive tests basic message flow from input to output
+func TestStreamWorkerSendReceive(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream)
+
+	// Send one message
+	payload := createWorkerTestPayload("test message")
+	fixture.inputChan <- payload
+
+	// Wait for message to be sent to stream
+	require.Eventually(t, func() bool {
+		return stream.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval)
+
+	// Send ack for batch 1
+	stream.sendAck(1)
+
+	// Verify message appears in output channel
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after ack")
+	}
+}
+
+// TestStreamWorkerReceiverFailureRotation tests stream rotation on receiver failure
+// with an inflight message that gets re-sent on the new stream
+func TestStreamWorkerReceiverFailureRotation(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream1 := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream1)
+
+	// Send 1 message
+	payload := createWorkerTestPayload("test message")
+	fixture.inputChan <- payload
+
+	// Wait for message to be sent to stream1
+	require.Eventually(t, func() bool {
+		return stream1.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval)
+
+	// Give receiverLoop time to enter Recv() and block
+	time.Sleep(testShortWait)
+
+	// Inject receiver error immediately (this unblocks Recv() and triggers stream rotation)
+	// Note: We do NOT send an ack, so the message stays inflight
+	stream1.injectRecvError(io.EOF)
+
+	// Wait for rotation to complete (stream changes and state is active again)
+	// Note: Rotation is very fast with mocks, so we just check for the new stream
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Should complete stream rotation with new stream")
+
+	// The inflight message should be re-sent on the new stream (after rotation reset, it's batch 1 again)
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream")
+
+	// Send ack for batch 1 on new stream
+	stream2.sendAck(1)
+
+	// Verify message appears in output channel
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after ack on new stream")
+	}
+}
+
+// TestStreamWorkerStreamTimeout tests stream rotation triggered by stream timer expiration
+func TestStreamWorkerStreamTimeout(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream1 := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream1)
+
+	// Advance clock past stream lifetime to trigger stream timeout
+	fixture.mockClock.Add(fixture.streamLifetime + time.Second)
+
+	// Wait for rotation to complete (new stream created and active)
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Should rotate to new stream after timer expires")
+
+	// Send a message on the new stream
+	payload := createWorkerTestPayload("test on stream2")
+	fixture.inputChan <- payload
+
+	// Wait for message to be sent on stream2
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Message should be sent on new stream")
+
+	// Send ack
+	stream2.sendAck(1)
+
+	// Verify message appears in output
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after ack")
+	}
+}
+
+// TestStreamWorkerStreamTimeoutWithDrain tests graceful rotation when stream timer expires with inflight messages
+func TestStreamWorkerStreamTimeoutWithDrain(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream1 := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream1)
+
+	// Step 1: Send 1 message on stream1, don't send ack
+	payload1 := createWorkerTestPayload("message 1")
+	fixture.inputChan <- payload1
+
+	// Wait for message to be sent on stream1
+	require.Eventually(t, func() bool {
+		return stream1.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval)
+
+	// Step 2 & 3: Advance clock to trigger stream timeout, verify draining state
+	fixture.mockClock.Add(fixture.streamLifetime + time.Second)
+
+	// Should transition to draining (not connecting) because there's an unacked message
+	require.Eventually(t, func() bool {
+		return worker.streamState == draining
+	}, testTimeout, testTickInterval, "Should transition to draining state with unacked messages")
+
+	// Step 4: Send another message, verify it's buffered (NOT sent on stream1)
+	payload2 := createWorkerTestPayload("message 2")
+	fixture.inputChan <- payload2
+
+	// Give time for message to be processed if it was going to be sent
+	time.Sleep(testShortWait)
+
+	// stream1 should still only have 1 batch sent
+	require.Equal(t, 1, stream1.getSentBatchCount(), "Message 2 should be buffered, not sent on stream1")
+
+	// Step 5 & 6 & 7: Send ack for batch 1, verify it appears in output
+	stream1.sendAck(1)
+
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload1, output, "First message should appear in output")
+	case <-time.After(testTimeout):
+		t.Fatal("Message 1 should appear in outputChan after ack")
+	}
+
+	// Step 8: Verify stream2 is created (draining → connecting → active)
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Should complete rotation to new stream after ack received")
+
+	// Step 9: Verify message 2 is sent on stream2 (batch ID resets to 1 after rotation)
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Buffered message 2 should be sent on new stream")
+
+	// Send ack for batch 1 on stream2 to verify it's the second message
+	stream2.sendAck(1)
+
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload2, output, "Second message should appear in output")
+	case <-time.After(testTimeout):
+		t.Fatal("Message 2 should appear in outputChan after ack on stream2")
+	}
+}
+
+// TestStreamWorkerDrainTimeout tests forced rotation when drain timer expires without receiving all acks
+func TestStreamWorkerDrainTimeout(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream1 := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream1)
+
+	// Step 1: Send message on stream1, don't send ack (stays inflight)
+	payload := createWorkerTestPayload("message 1")
+	fixture.inputChan <- payload
+
+	// Wait for message to be sent on stream1
+	require.Eventually(t, func() bool {
+		return stream1.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval)
+
+	// Step 2: Advance clock to trigger stream timeout → enter draining
+	fixture.mockClock.Add(fixture.streamLifetime + time.Second)
+
+	require.Eventually(t, func() bool {
+		return worker.streamState == draining
+	}, testTimeout, testTickInterval, "Should transition to draining state")
+
+	// Step 3: Advance clock to trigger drain timeout (without sending ack) → force rotation
+	fixture.mockClock.Add(drainTimeout + time.Second)
+
+	// Step 4: Verify stream2 is created (draining → connecting → active)
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Should complete rotation to new stream after drain timeout")
+
+	// Step 5: Verify batch 1 is re-sent on stream2 (inflight message replayed)
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream")
+
+	// Send ack for batch 1 on stream2
+	stream2.sendAck(1)
+
+	// Verify message appears in output
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after ack on new stream")
+	}
+}
+
+// TestStreamWorkerBackoff tests exponential backoff on stream creation failure
+func TestStreamWorkerBackoff(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+
+	// Configure mock to fail stream creation once, then succeed
+	testErr := errors.New("simulated stream creation failure")
+	fixture.mockClient.failNextStreamCreations(1, testErr)
+
+	// Start worker (will attempt to create stream and should fail)
+	worker.start()
+
+	// Should fail to create stream and enter disconnected state
+	require.Eventually(t, func() bool {
+		return worker.streamState == disconnected
+	}, testTimeout, testTickInterval, "Should transition to disconnected state after stream creation failure")
+
+	// Verify no stream was created
+	require.Nil(t, fixture.mockClient.getCurrentStream(), "No stream should be created on error")
+
+	// Advance clock gradually to trigger backoff timer and verify stream is established
+	// For first error, backoff is between 1-2 seconds (base=1s, factor=2, jitter)
+	var stream *mockLogsStream
+	require.Eventually(t, func() bool {
+		fixture.mockClock.Add(500 * time.Millisecond)
+		stream = fixture.mockClient.getCurrentStream()
+		return stream != nil && worker.streamState == active
+	}, testTimeout, testTickInterval, "Should transition to active state after backoff expires")
+
+	// Verify we can send a message on the new stream
+	payload := createWorkerTestPayload("test message")
+	fixture.inputChan <- payload
+
+	require.Eventually(t, func() bool {
+		return stream.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Message should be sent on new stream")
+
+	stream.sendAck(1)
+
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after ack")
+	}
+}
+
+// TestStreamWorkerBackpressure verifies that inputChan blocks when inflight is full
+func TestStreamWorkerBackpressure(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	// Use small inflight capacity for fast test
+	smallInflight := newInflightTracker(5)
+	worker := fixture.createWorkerWithInflight(smallInflight)
+	worker.start()
+
+	// Wait for active state
+	require.Eventually(t, func() bool {
+		return worker.streamState == active
+	}, testTimeout, testTickInterval)
+
+	stream := fixture.mockClient.getCurrentStream()
+	require.NotNil(t, stream)
+
+	// Send 5 messages (don't send acks, so they stay in "sent" state and fill inflight)
+	for i := 0; i < 5; i++ {
+		fixture.inputChan <- createWorkerTestPayload("test")
+	}
+
+	// Wait for inflight to be full
+	require.Eventually(t, func() bool {
+		return !worker.inflight.hasSpace()
+	}, testTimeout, testTickInterval, "Inflight should be full")
+
+	// Verify backpressure: send one more message, it should NOT be consumed
+	fixture.inputChan <- createWorkerTestPayload("blocked")
+	time.Sleep(testShortWait)
+	require.Equal(t, 1, len(fixture.inputChan), "Message should remain in inputChan due to backpressure")
+
+	// Send ack for batch 1 to free up space
+	stream.sendAck(1)
+
+	// Verify backpressure released: the blocked message should now be consumed
+	require.Eventually(t, func() bool {
+		return len(fixture.inputChan) == 0
+	}, testTimeout, testTickInterval, "Message should be consumed after ack frees space")
+}
+
+// TestStreamWorkerErrorRecovery tests that Send() and Recv() failures trigger rotation and retry
+func TestStreamWorkerErrorRecovery(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for initial stream to be active
+	var stream1 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream1 = fixture.mockClient.getCurrentStream()
+		return stream1 != nil && worker.streamState == active
+	}, testTimeout, testTickInterval, "Worker should reach active state")
+
+	// Inject send error BEFORE sending message
+	stream1.setSendError(errors.New("simulated send failure"))
+
+	// Send message - this will trigger Send() failure and rotation
+	payload := createWorkerTestPayload("test message")
+	fixture.inputChan <- payload
+
+	// Wait for stream rotation (new stream created)
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Worker should rotate to new stream after send error")
+
+	// New stream should have retried the message (batch 1)
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Message should be retried on new stream")
+
+	// Send ack on new stream
+	stream2.sendAck(1)
+
+	// Verify message appears in outputChan
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after rotation and ack")
+	}
+
+	// Part 2: Test injectRecvError with retriable gRPC error
+	// Inject recv error (codes.Unavailable falls into default case -> rotation)
+	stream2.injectRecvError(status.Error(codes.Unavailable, "simulated unavailable error"))
+
+	// Send another message
+	payload2 := createWorkerTestPayload("test message 2")
+	fixture.inputChan <- payload2
+
+	// Wait for stream rotation (new stream created)
+	var stream3 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream3 = fixture.mockClient.getCurrentStream()
+		return stream3 != nil && stream3 != stream2 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Worker should rotate to new stream after recv error")
+
+	// New stream should have retried the message (batch 1 - reset after rotation)
+	require.Eventually(t, func() bool {
+		return stream3.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Message should be retried on new stream after recv error")
+
+	// Send ack on new stream
+	stream3.sendAck(1)
+
+	// Verify message appears in outputChan
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, payload2, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Message should appear in outputChan after recv error rotation and ack")
+	}
+}
+
+// Helper functions to create Datum objects for testing
+func createPatternDefine(id uint64, template string) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_PatternDefine{
+			PatternDefine: &statefulpb.PatternDefine{
+				PatternId: id,
+				Template:  template,
+			},
+		},
+	}
+}
+
+func createPatternDelete(id uint64) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_PatternDelete{
+			PatternDelete: &statefulpb.PatternDelete{
+				PatternId: id,
+			},
+		},
+	}
+}
+
+func createDictEntryDefine(id uint64, value string) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_DictEntryDefine{
+			DictEntryDefine: &statefulpb.DictEntryDefine{
+				Id:    id,
+				Value: value,
+			},
+		},
+	}
+}
+
+func createDictEntryDelete(id uint64) *statefulpb.Datum {
+	return &statefulpb.Datum{
+		Data: &statefulpb.Datum_DictEntryDelete{
+			DictEntryDelete: &statefulpb.DictEntryDelete{
+				Id: id,
+			},
+		},
+	}
+}
+
+// createPayloadWithState creates a payload with state changes in StatefulExtra
+func createPayloadWithState(content string, stateChanges []*statefulpb.Datum) *message.Payload {
+	payload := createWorkerTestPayload(content)
+	if len(stateChanges) > 0 {
+		payload.StatefulExtra = &StatefulExtra{
+			StateChanges: stateChanges,
+		}
+	}
+	return payload
+}
+
+// verifySnapshotContents checks if a snapshot batch contains the expected state
+func verifySnapshotContents(t *testing.T, batch *statefulpb.StatefulBatch, expectedPatterns map[uint64]string, expectedDictEntries map[uint64]string) {
+	require.NotNil(t, batch)
+	require.Equal(t, uint32(0), batch.BatchId, "Snapshot should have batch ID 0")
+
+	// Deserialize the snapshot data (it's a DatumSequence)
+	var datumSeq statefulpb.DatumSequence
+	err := proto.Unmarshal(batch.Data, &datumSeq)
+	require.NoError(t, err)
+
+	// Count what we find
+	foundPatterns := make(map[uint64]string)
+	foundDictEntries := make(map[uint64]string)
+
+	for _, datum := range datumSeq.Data {
+		switch d := datum.Data.(type) {
+		case *statefulpb.Datum_PatternDefine:
+			foundPatterns[d.PatternDefine.PatternId] = d.PatternDefine.Template
+		case *statefulpb.Datum_DictEntryDefine:
+			foundDictEntries[d.DictEntryDefine.Id] = d.DictEntryDefine.Value
+		default:
+			t.Fatalf("Snapshot should only contain PatternDefine and DictEntryDefine, got: %T", datum.Data)
+		}
+	}
+
+	require.Equal(t, expectedPatterns, foundPatterns, "Snapshot patterns mismatch")
+	require.Equal(t, expectedDictEntries, foundDictEntries, "Snapshot dict entries mismatch")
+}
+
+// TestStreamWorkerSnapshot tests the snapshot functionality across stream rotations
+func TestStreamWorkerSnapshot(t *testing.T) {
+	fixture := newTestFixture(t)
+	defer fixture.cleanup()
+
+	// Override stream lifetime for this test
+	fixture.streamLifetime = time.Second
+	worker := fixture.createWorker()
+	worker.start()
+
+	// Wait for initial stream to be ready
+	var stream1 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream1 = fixture.mockClient.getCurrentStream()
+		return stream1 != nil && worker.streamState == active
+	}, testTimeout, testTickInterval, "Initial stream should be established")
+
+	// === Step 1: Send Batch 1 (5 entries) ===
+	batch1StateChanges := []*statefulpb.Datum{
+		createPatternDefine(1, "pattern1"),
+		createDictEntryDefine(1, "value1"),
+		createPatternDefine(2, "pattern2"),
+		createDictEntryDefine(2, "value2"),
+	}
+	batch1Payload := createPayloadWithState("log with p1/d1", batch1StateChanges)
+	fixture.inputChan <- batch1Payload
+
+	// Wait for batch 1 to be sent
+	require.Eventually(t, func() bool {
+		return stream1.getSentBatchCount() == 1
+	}, testTimeout, testTickInterval, "Batch 1 should be sent")
+
+	// === Step 2: Ack Batch 1 ===
+	stream1.sendAck(1)
+
+	// Verify batch 1 appears in outputChan
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, batch1Payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Batch 1 should appear in outputChan")
+	}
+
+	// === Step 3: Send Batch 2 (6 entries) ===
+	batch2StateChanges := []*statefulpb.Datum{
+		createPatternDelete(1),
+		createDictEntryDelete(1),
+		createPatternDefine(3, "pattern3"),
+		createDictEntryDefine(3, "value3"),
+	}
+	batch2Payload := createPayloadWithState("log with p2/d2 and p3/d3", batch2StateChanges)
+	fixture.inputChan <- batch2Payload
+
+	// Wait for batch 2 to be sent
+	require.Eventually(t, func() bool {
+		return stream1.getSentBatchCount() == 2
+	}, testTimeout, testTickInterval, "Batch 2 should be sent")
+
+	// === Step 4: Cut stream with recv failure (before acking batch 2) ===
+	stream1.injectRecvError(io.EOF)
+
+	// Wait for stream rotation
+	var stream2 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream2 = fixture.mockClient.getCurrentStream()
+		return stream2 != nil && stream2 != stream1 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Stream should rotate after recv failure")
+
+	// === Step 5: Verify snapshot on new stream ===
+	// Snapshot should contain state BEFORE batch 2: {p1, p2, d1, d2}
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() >= 1
+	}, testTimeout, testTickInterval, "Snapshot should be sent on new stream")
+
+	snapshotBatch := stream2.getSentBatch(0)
+	expectedPatterns1 := map[uint64]string{
+		1: "pattern1",
+		2: "pattern2",
+	}
+	expectedDictEntries1 := map[uint64]string{
+		1: "value1",
+		2: "value2",
+	}
+	verifySnapshotContents(t, snapshotBatch, expectedPatterns1, expectedDictEntries1)
+
+	// === Step 6: Ack snapshot (batch 0) ===
+	stream2.sendAck(0)
+
+	// === Step 7: Verify Batch 2 is retransmitted ===
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 2
+	}, testTimeout, testTickInterval, "Batch 2 should be retransmitted")
+
+	batch2Retransmitted := stream2.getSentBatch(1)
+	require.Equal(t, uint32(1), batch2Retransmitted.BatchId)
+
+	// === Step 8: Ack Batch 2 ===
+	stream2.sendAck(1)
+
+	// Verify batch 2 appears in outputChan
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, batch2Payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Batch 2 should appear in outputChan")
+	}
+
+	// === Step 9: Send Batch 3 (3 entries) ===
+	batch3StateChanges := []*statefulpb.Datum{
+		createPatternDefine(4, "pattern4"),
+		createDictEntryDefine(4, "value4"),
+	}
+	batch3Payload := createPayloadWithState("log with p4/d4", batch3StateChanges)
+	fixture.inputChan <- batch3Payload
+
+	// Wait for batch 3 to be sent
+	require.Eventually(t, func() bool {
+		return stream2.getSentBatchCount() == 3
+	}, testTimeout, testTickInterval, "Batch 3 should be sent")
+
+	// === Step 10: Stream timer expires ===
+	fixture.mockClock.Add(time.Second)
+
+	// Worker should enter draining state (batch 3 is still inflight)
+	require.Eventually(t, func() bool {
+		return worker.streamState == draining
+	}, testTimeout, testTickInterval, "Worker should enter draining state")
+
+	// === Step 11: Drain timer expires (force rotation) ===
+	fixture.mockClock.Add(5 * time.Second) // drainTimeout is 5 seconds
+
+	// Wait for new stream to be created
+	var stream3 *mockLogsStream
+	require.Eventually(t, func() bool {
+		stream3 = fixture.mockClient.getCurrentStream()
+		return stream3 != nil && stream3 != stream2 && worker.streamState == active
+	}, testTimeout, testTickInterval, "Stream should rotate after drain timeout")
+
+	// === Step 12: Verify snapshot on new stream ===
+	// Snapshot should contain state AFTER batch 2, BEFORE batch 3: {p2, p3, d2, d3}
+	// (p1/d1 were deleted in batch 2)
+	require.Eventually(t, func() bool {
+		return stream3.getSentBatchCount() >= 1
+	}, testTimeout, testTickInterval, "Snapshot should be sent on new stream")
+
+	snapshotBatch2 := stream3.getSentBatch(0)
+	expectedPatterns2 := map[uint64]string{
+		2: "pattern2",
+		3: "pattern3",
+	}
+	expectedDictEntries2 := map[uint64]string{
+		2: "value2",
+		3: "value3",
+	}
+	verifySnapshotContents(t, snapshotBatch2, expectedPatterns2, expectedDictEntries2)
+
+	// Ack snapshot and batch 3
+	stream3.sendAck(0)
+	stream3.sendAck(1)
+
+	// Verify batch 3 appears in outputChan
+	select {
+	case output := <-fixture.outputChan:
+		require.Equal(t, batch3Payload, output)
+	case <-time.After(testTimeout):
+		t.Fatal("Batch 3 should appear in outputChan")
+	}
+}
diff --git a/pkg/logs/sender/grpc/streamstate_string.go b/pkg/logs/sender/grpc/streamstate_string.go
new file mode 100644
index 000000000000..6081ed22284e
--- /dev/null
+++ b/pkg/logs/sender/grpc/streamstate_string.go
@@ -0,0 +1,27 @@
+// Code generated by "stringer -type=streamState"; DO NOT EDIT.
+
+package grpc
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[disconnected-0]
+	_ = x[connecting-1]
+	_ = x[active-2]
+	_ = x[draining-3]
+}
+
+const _streamState_name = "disconnectedconnectingactivedraining"
+
+var _streamState_index = [...]uint8{0, 12, 22, 28, 36}
+
+func (i streamState) String() string {
+	idx := int(i) - 0
+	if i < 0 || idx >= len(_streamState_index)-1 {
+		return "streamState(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _streamState_name[_streamState_index[idx]:_streamState_index[idx+1]]
+}
diff --git a/pkg/logs/sender/message_buffer.go b/pkg/logs/sender/message_buffer.go
index ad2112193eaa..b1185e16ade7 100644
--- a/pkg/logs/sender/message_buffer.go
+++ b/pkg/logs/sender/message_buffer.go
@@ -28,8 +28,15 @@ func NewMessageBuffer(batchSizeLimit int, contentSizeLimit int) *MessageBuffer {
 // returns true if the message was added.
 func (p *MessageBuffer) AddMessage(message *message.Message) bool {
 	contentSize := len(message.GetContent())
+	return p.AddMessageWithSize(&message.MessageMetadata, contentSize)
+}
+
+// AddMessageWithSize adds a message to the buffer if there is still some free space,
+// returns true if the message was added.
+// As input it takes directly metadata and content size, instead of a message.
+func (p *MessageBuffer) AddMessageWithSize(metadata *message.MessageMetadata, contentSize int) bool {
 	if len(p.messageBuffer) < cap(p.messageBuffer) && p.contentSize+contentSize <= p.contentSizeLimit {
-		meta := message.MessageMetadata // Copy metadata instead of taking reference
+		meta := *metadata // Copy metadata instead of taking reference
 		p.messageBuffer = append(p.messageBuffer, &meta)
 		p.contentSize += contentSize
 		return true
diff --git a/pkg/proto/datadog/stateful/stateful_encoding.proto b/pkg/proto/datadog/stateful/stateful_encoding.proto
new file mode 100644
index 000000000000..6696e971b163
--- /dev/null
+++ b/pkg/proto/datadog/stateful/stateful_encoding.proto
@@ -0,0 +1,122 @@
+syntax = "proto3";
+
+package datadog.intake.stateful;
+
+option go_package = "pkg/proto/pbgo/statefulpb";
+
+// ---------------------------------------------------------------------------
+// Dictionary-encoded
+// ---------------------------------------------------------------------------
+
+message DictEntryDefine {
+  uint64 id = 1;
+  string value = 2;
+}
+
+message DictEntryDelete {
+  uint64 id = 1;
+}
+
+// ---------------------------------------------------------------------------
+// Pattern dictionary
+// ---------------------------------------------------------------------------
+
+// pos_list is used to indicate where dynamic values should be inserted
+// it's more accurate than a marker
+message PatternDefine {
+  uint64 pattern_id = 1;
+  string template = 2;
+  uint32 param_count = 3;
+  repeated uint32 pos_list = 4;
+}
+
+message PatternDelete {
+  uint64 pattern_id = 1;
+}
+
+// ---------------------------------------------------------------------------
+// Log payload
+// ---------------------------------------------------------------------------
+
+message Tag {
+  DynamicValue key = 1;
+  DynamicValue value = 2;
+}
+
+message Log {
+  uint64 timestamp = 1;
+  oneof content {
+    StructuredLog structured = 2;
+    string raw = 3;
+  }
+  // TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream
+  // state and auto-populate them downstream.
+  // Required tags: `service`, `hostname`,
+  // Other tags on agent payload: `status`, `source`
+  // All other tags are sent as `ddtags`
+  repeated Tag tags = 4;
+}
+
+message StructuredLog {
+  uint64 pattern_id = 1;
+  repeated DynamicValue dynamic_values = 2;
+}
+
+// TODO not sure we need numeric type
+message DynamicValue {
+  oneof value {
+    int64 int_value = 1;
+    double float_value = 2;
+    string string_value = 3;
+    uint64 dict_index = 4;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Streaming envelope
+// ---------------------------------------------------------------------------
+
+message Datum {
+  oneof data {
+    PatternDefine pattern_define = 1;
+    PatternDelete pattern_delete = 2;
+    DictEntryDefine dict_entry_define = 3;
+    DictEntryDelete dict_entry_delete = 4;
+    Log logs = 5;
+  }
+}
+
+// DatumSequence wraps a sequence of Datum messages
+// Used for serialization in application-level compression
+message DatumSequence {
+  repeated Datum data = 1;
+}
+
+// data is sequence of pattern/dictionary changes + logs
+// the ordering is significant, must be processed in order
+message StatefulBatch {
+  uint32 batch_id = 1;
+
+  // Bytes of a serialized DatumSequence. Eventually this will also be compressed.
+  // This allows for Datums to be compressed while they are buffered in memory before being acked by the server.
+  bytes data = 2;
+}
+
+message BatchStatus {
+  uint32 batch_id = 1;
+
+  // TODO: only OK is used right now - should we just remove this enum?
+  enum Status {
+    UNKNOWN = 0;
+    OK = 1;
+  }
+  Status status = 2;
+}
+
+// ---------------------------------------------------------------------------
+// gRPC service definition (bi-directional streaming)
+// ---------------------------------------------------------------------------
+
+service StatefulLogsService {
+  rpc LogsStream(stream StatefulBatch) returns (stream BatchStatus);
+}
diff --git a/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go
new file mode 100644
index 000000000000..c70bb84bea12
--- /dev/null
+++ b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go
@@ -0,0 +1,1159 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.36.10
+// 	protoc        v5.29.3
+// source: datadog/stateful/stateful_encoding.proto
+
+package statefulpb
+
+import (
+	context "context"
+	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+	unsafe "unsafe"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+// TODO: only OK is used right now - should we just remove this enum?
+type BatchStatus_Status int32
+
+const (
+	BatchStatus_UNKNOWN BatchStatus_Status = 0
+	BatchStatus_OK      BatchStatus_Status = 1
+)
+
+// Enum value maps for BatchStatus_Status.
+var (
+	BatchStatus_Status_name = map[int32]string{
+		0: "UNKNOWN",
+		1: "OK",
+	}
+	BatchStatus_Status_value = map[string]int32{
+		"UNKNOWN": 0,
+		"OK":      1,
+	}
+)
+
+func (x BatchStatus_Status) Enum() *BatchStatus_Status {
+	p := new(BatchStatus_Status)
+	*p = x
+	return p
+}
+
+func (x BatchStatus_Status) String() string {
+	return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
+}
+
+func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor {
+	return file_datadog_stateful_stateful_encoding_proto_enumTypes[0].Descriptor()
+}
+
+func (BatchStatus_Status) Type() protoreflect.EnumType {
+	return &file_datadog_stateful_stateful_encoding_proto_enumTypes[0]
+}
+
+func (x BatchStatus_Status) Number() protoreflect.EnumNumber {
+	return protoreflect.EnumNumber(x)
+}
+
+// Deprecated: Use BatchStatus_Status.Descriptor instead.
+func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11, 0}
+}
+
+type DictEntryDefine struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Id            uint64                 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
+	Value         string                 `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *DictEntryDefine) Reset() {
+	*x = DictEntryDefine{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *DictEntryDefine) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DictEntryDefine) ProtoMessage() {}
+
+func (x *DictEntryDefine) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead.
+func (*DictEntryDefine) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *DictEntryDefine) GetId() uint64 {
+	if x != nil {
+		return x.Id
+	}
+	return 0
+}
+
+func (x *DictEntryDefine) GetValue() string {
+	if x != nil {
+		return x.Value
+	}
+	return ""
+}
+
+type DictEntryDelete struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Id            uint64                 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *DictEntryDelete) Reset() {
+	*x = DictEntryDelete{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *DictEntryDelete) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DictEntryDelete) ProtoMessage() {}
+
+func (x *DictEntryDelete) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead.
+func (*DictEntryDelete) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *DictEntryDelete) GetId() uint64 {
+	if x != nil {
+		return x.Id
+	}
+	return 0
+}
+
+// pos_list is used to indicate where dynamic values should be inserted
+// it's more accurate than a marker
+type PatternDefine struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	PatternId     uint64                 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"`
+	Template      string                 `protobuf:"bytes,2,opt,name=template,proto3" json:"template,omitempty"`
+	ParamCount    uint32                 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"`
+	PosList       []uint32               `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *PatternDefine) Reset() {
+	*x = PatternDefine{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *PatternDefine) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PatternDefine) ProtoMessage() {}
+
+func (x *PatternDefine) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead.
+func (*PatternDefine) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{2}
+}
+
+func (x *PatternDefine) GetPatternId() uint64 {
+	if x != nil {
+		return x.PatternId
+	}
+	return 0
+}
+
+func (x *PatternDefine) GetTemplate() string {
+	if x != nil {
+		return x.Template
+	}
+	return ""
+}
+
+func (x *PatternDefine) GetParamCount() uint32 {
+	if x != nil {
+		return x.ParamCount
+	}
+	return 0
+}
+
+func (x *PatternDefine) GetPosList() []uint32 {
+	if x != nil {
+		return x.PosList
+	}
+	return nil
+}
+
+type PatternDelete struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	PatternId     uint64                 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *PatternDelete) Reset() {
+	*x = PatternDelete{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *PatternDelete) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*PatternDelete) ProtoMessage() {}
+
+func (x *PatternDelete) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead.
+func (*PatternDelete) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{3}
+}
+
+func (x *PatternDelete) GetPatternId() uint64 {
+	if x != nil {
+		return x.PatternId
+	}
+	return 0
+}
+
+type Tag struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Key           *DynamicValue          `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"`
+	Value         *DynamicValue          `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *Tag) Reset() {
+	*x = Tag{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *Tag) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Tag) ProtoMessage() {}
+
+func (x *Tag) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Tag.ProtoReflect.Descriptor instead.
+func (*Tag) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{4}
+}
+
+func (x *Tag) GetKey() *DynamicValue {
+	if x != nil {
+		return x.Key
+	}
+	return nil
+}
+
+func (x *Tag) GetValue() *DynamicValue {
+	if x != nil {
+		return x.Value
+	}
+	return nil
+}
+
+type Log struct {
+	state     protoimpl.MessageState `protogen:"open.v1"`
+	Timestamp uint64                 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"`
+	// Types that are valid to be assigned to Content:
+	//
+	//	*Log_Structured
+	//	*Log_Raw
+	Content isLog_Content `protobuf_oneof:"content"`
+	// TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream
+	// state and auto-populate them downstream.
+	// Required tags: `service`, `hostname`,
+	// Other tags on agent payload: `status`, `source`
+	// All other tags are sent as `ddtags`
+	Tags          []*Tag `protobuf:"bytes,4,rep,name=tags,proto3" json:"tags,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *Log) Reset() {
+	*x = Log{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *Log) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Log) ProtoMessage() {}
+
+func (x *Log) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Log.ProtoReflect.Descriptor instead.
+func (*Log) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{5}
+}
+
+func (x *Log) GetTimestamp() uint64 {
+	if x != nil {
+		return x.Timestamp
+	}
+	return 0
+}
+
+func (x *Log) GetContent() isLog_Content {
+	if x != nil {
+		return x.Content
+	}
+	return nil
+}
+
+func (x *Log) GetStructured() *StructuredLog {
+	if x != nil {
+		if x, ok := x.Content.(*Log_Structured); ok {
+			return x.Structured
+		}
+	}
+	return nil
+}
+
+func (x *Log) GetRaw() string {
+	if x != nil {
+		if x, ok := x.Content.(*Log_Raw); ok {
+			return x.Raw
+		}
+	}
+	return ""
+}
+
+func (x *Log) GetTags() []*Tag {
+	if x != nil {
+		return x.Tags
+	}
+	return nil
+}
+
+type isLog_Content interface {
+	isLog_Content()
+}
+
+type Log_Structured struct {
+	Structured *StructuredLog `protobuf:"bytes,2,opt,name=structured,proto3,oneof"`
+}
+
+type Log_Raw struct {
+	Raw string `protobuf:"bytes,3,opt,name=raw,proto3,oneof"`
+}
+
+func (*Log_Structured) isLog_Content() {}
+
+func (*Log_Raw) isLog_Content() {}
+
+type StructuredLog struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	PatternId     uint64                 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"`
+	DynamicValues []*DynamicValue        `protobuf:"bytes,2,rep,name=dynamic_values,json=dynamicValues,proto3" json:"dynamic_values,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *StructuredLog) Reset() {
+	*x = StructuredLog{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *StructuredLog) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StructuredLog) ProtoMessage() {}
+
+func (x *StructuredLog) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead.
+func (*StructuredLog) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{6}
+}
+
+func (x *StructuredLog) GetPatternId() uint64 {
+	if x != nil {
+		return x.PatternId
+	}
+	return 0
+}
+
+func (x *StructuredLog) GetDynamicValues() []*DynamicValue {
+	if x != nil {
+		return x.DynamicValues
+	}
+	return nil
+}
+
+// TODO not sure we need numeric type
+type DynamicValue struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Types that are valid to be assigned to Value:
+	//
+	//	*DynamicValue_IntValue
+	//	*DynamicValue_FloatValue
+	//	*DynamicValue_StringValue
+	//	*DynamicValue_DictIndex
+	Value         isDynamicValue_Value `protobuf_oneof:"value"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *DynamicValue) Reset() {
+	*x = DynamicValue{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *DynamicValue) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DynamicValue) ProtoMessage() {}
+
+func (x *DynamicValue) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead.
+func (*DynamicValue) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{7}
+}
+
+func (x *DynamicValue) GetValue() isDynamicValue_Value {
+	if x != nil {
+		return x.Value
+	}
+	return nil
+}
+
+func (x *DynamicValue) GetIntValue() int64 {
+	if x != nil {
+		if x, ok := x.Value.(*DynamicValue_IntValue); ok {
+			return x.IntValue
+		}
+	}
+	return 0
+}
+
+func (x *DynamicValue) GetFloatValue() float64 {
+	if x != nil {
+		if x, ok := x.Value.(*DynamicValue_FloatValue); ok {
+			return x.FloatValue
+		}
+	}
+	return 0
+}
+
+func (x *DynamicValue) GetStringValue() string {
+	if x != nil {
+		if x, ok := x.Value.(*DynamicValue_StringValue); ok {
+			return x.StringValue
+		}
+	}
+	return ""
+}
+
+func (x *DynamicValue) GetDictIndex() uint64 {
+	if x != nil {
+		if x, ok := x.Value.(*DynamicValue_DictIndex); ok {
+			return x.DictIndex
+		}
+	}
+	return 0
+}
+
+type isDynamicValue_Value interface {
+	isDynamicValue_Value()
+}
+
+type DynamicValue_IntValue struct {
+	IntValue int64 `protobuf:"varint,1,opt,name=int_value,json=intValue,proto3,oneof"`
+}
+
+type DynamicValue_FloatValue struct {
+	FloatValue float64 `protobuf:"fixed64,2,opt,name=float_value,json=floatValue,proto3,oneof"`
+}
+
+type DynamicValue_StringValue struct {
+	StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"`
+}
+
+type DynamicValue_DictIndex struct {
+	DictIndex uint64 `protobuf:"varint,4,opt,name=dict_index,json=dictIndex,proto3,oneof"`
+}
+
+func (*DynamicValue_IntValue) isDynamicValue_Value() {}
+
+func (*DynamicValue_FloatValue) isDynamicValue_Value() {}
+
+func (*DynamicValue_StringValue) isDynamicValue_Value() {}
+
+func (*DynamicValue_DictIndex) isDynamicValue_Value() {}
+
+type Datum struct {
+	state protoimpl.MessageState `protogen:"open.v1"`
+	// Types that are valid to be assigned to Data:
+	//
+	//	*Datum_PatternDefine
+	//	*Datum_PatternDelete
+	//	*Datum_DictEntryDefine
+	//	*Datum_DictEntryDelete
+	//	*Datum_Logs
+	Data          isDatum_Data `protobuf_oneof:"data"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *Datum) Reset() {
+	*x = Datum{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *Datum) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*Datum) ProtoMessage() {}
+
+func (x *Datum) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use Datum.ProtoReflect.Descriptor instead.
+func (*Datum) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{8}
+}
+
+func (x *Datum) GetData() isDatum_Data {
+	if x != nil {
+		return x.Data
+	}
+	return nil
+}
+
+func (x *Datum) GetPatternDefine() *PatternDefine {
+	if x != nil {
+		if x, ok := x.Data.(*Datum_PatternDefine); ok {
+			return x.PatternDefine
+		}
+	}
+	return nil
+}
+
+func (x *Datum) GetPatternDelete() *PatternDelete {
+	if x != nil {
+		if x, ok := x.Data.(*Datum_PatternDelete); ok {
+			return x.PatternDelete
+		}
+	}
+	return nil
+}
+
+func (x *Datum) GetDictEntryDefine() *DictEntryDefine {
+	if x != nil {
+		if x, ok := x.Data.(*Datum_DictEntryDefine); ok {
+			return x.DictEntryDefine
+		}
+	}
+	return nil
+}
+
+func (x *Datum) GetDictEntryDelete() *DictEntryDelete {
+	if x != nil {
+		if x, ok := x.Data.(*Datum_DictEntryDelete); ok {
+			return x.DictEntryDelete
+		}
+	}
+	return nil
+}
+
+func (x *Datum) GetLogs() *Log {
+	if x != nil {
+		if x, ok := x.Data.(*Datum_Logs); ok {
+			return x.Logs
+		}
+	}
+	return nil
+}
+
+type isDatum_Data interface {
+	isDatum_Data()
+}
+
+type Datum_PatternDefine struct {
+	PatternDefine *PatternDefine `protobuf:"bytes,1,opt,name=pattern_define,json=patternDefine,proto3,oneof"`
+}
+
+type Datum_PatternDelete struct {
+	PatternDelete *PatternDelete `protobuf:"bytes,2,opt,name=pattern_delete,json=patternDelete,proto3,oneof"`
+}
+
+type Datum_DictEntryDefine struct {
+	DictEntryDefine *DictEntryDefine `protobuf:"bytes,3,opt,name=dict_entry_define,json=dictEntryDefine,proto3,oneof"`
+}
+
+type Datum_DictEntryDelete struct {
+	DictEntryDelete *DictEntryDelete `protobuf:"bytes,4,opt,name=dict_entry_delete,json=dictEntryDelete,proto3,oneof"`
+}
+
+type Datum_Logs struct {
+	Logs *Log `protobuf:"bytes,5,opt,name=logs,proto3,oneof"`
+}
+
+func (*Datum_PatternDefine) isDatum_Data() {}
+
+func (*Datum_PatternDelete) isDatum_Data() {}
+
+func (*Datum_DictEntryDefine) isDatum_Data() {}
+
+func (*Datum_DictEntryDelete) isDatum_Data() {}
+
+func (*Datum_Logs) isDatum_Data() {}
+
+// DatumSequence wraps a sequence of Datum messages
+// Used for serialization in application-level compression
+type DatumSequence struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Data          []*Datum               `protobuf:"bytes,1,rep,name=data,proto3" json:"data,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *DatumSequence) Reset() {
+	*x = DatumSequence{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *DatumSequence) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*DatumSequence) ProtoMessage() {}
+
+func (x *DatumSequence) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use DatumSequence.ProtoReflect.Descriptor instead.
+func (*DatumSequence) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{9}
+}
+
+func (x *DatumSequence) GetData() []*Datum {
+	if x != nil {
+		return x.Data
+	}
+	return nil
+}
+
+// data is sequence of pattern/dictionary changes + logs
+// the ordering is significant, must be processed in order
+type StatefulBatch struct {
+	state   protoimpl.MessageState `protogen:"open.v1"`
+	BatchId uint32                 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"`
+	// Bytes of a serialized DatumSequence. Eventually this will also be compressed.
+	// This allows for Datums to be compressed while they are buffered in memory before being acked by the server.
+	Data          []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *StatefulBatch) Reset() {
+	*x = StatefulBatch{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *StatefulBatch) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*StatefulBatch) ProtoMessage() {}
+
+func (x *StatefulBatch) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead.
+func (*StatefulBatch) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10}
+}
+
+func (x *StatefulBatch) GetBatchId() uint32 {
+	if x != nil {
+		return x.BatchId
+	}
+	return 0
+}
+
+func (x *StatefulBatch) GetData() []byte {
+	if x != nil {
+		return x.Data
+	}
+	return nil
+}
+
+type BatchStatus struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	BatchId       uint32                 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"`
+	Status        BatchStatus_Status     `protobuf:"varint,2,opt,name=status,proto3,enum=datadog.intake.stateful.BatchStatus_Status" json:"status,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *BatchStatus) Reset() {
+	*x = BatchStatus{}
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *BatchStatus) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*BatchStatus) ProtoMessage() {}
+
+func (x *BatchStatus) ProtoReflect() protoreflect.Message {
+	mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead.
+func (*BatchStatus) Descriptor() ([]byte, []int) {
+	return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11}
+}
+
+func (x *BatchStatus) GetBatchId() uint32 {
+	if x != nil {
+		return x.BatchId
+	}
+	return 0
+}
+
+func (x *BatchStatus) GetStatus() BatchStatus_Status {
+	if x != nil {
+		return x.Status
+	}
+	return BatchStatus_UNKNOWN
+}
+
+var File_datadog_stateful_stateful_encoding_proto protoreflect.FileDescriptor
+
+const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" +
+	"\n" +
+	"(datadog/stateful/stateful_encoding.proto\x12\x17datadog.intake.stateful\"7\n" +
+	"\x0fDictEntryDefine\x12\x0e\n" +
+	"\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" +
+	"\x05value\x18\x02 \x01(\tR\x05value\"!\n" +
+	"\x0fDictEntryDelete\x12\x0e\n" +
+	"\x02id\x18\x01 \x01(\x04R\x02id\"\x86\x01\n" +
+	"\rPatternDefine\x12\x1d\n" +
+	"\n" +
+	"pattern_id\x18\x01 \x01(\x04R\tpatternId\x12\x1a\n" +
+	"\btemplate\x18\x02 \x01(\tR\btemplate\x12\x1f\n" +
+	"\vparam_count\x18\x03 \x01(\rR\n" +
+	"paramCount\x12\x19\n" +
+	"\bpos_list\x18\x04 \x03(\rR\aposList\".\n" +
+	"\rPatternDelete\x12\x1d\n" +
+	"\n" +
+	"pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" +
+	"\x03Tag\x127\n" +
+	"\x03key\x18\x01 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x03key\x12;\n" +
+	"\x05value\x18\x02 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x05value\"\xbe\x01\n" +
+	"\x03Log\x12\x1c\n" +
+	"\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x12H\n" +
+	"\n" +
+	"structured\x18\x02 \x01(\v2&.datadog.intake.stateful.StructuredLogH\x00R\n" +
+	"structured\x12\x12\n" +
+	"\x03raw\x18\x03 \x01(\tH\x00R\x03raw\x120\n" +
+	"\x04tags\x18\x04 \x03(\v2\x1c.datadog.intake.stateful.TagR\x04tagsB\t\n" +
+	"\acontent\"|\n" +
+	"\rStructuredLog\x12\x1d\n" +
+	"\n" +
+	"pattern_id\x18\x01 \x01(\x04R\tpatternId\x12L\n" +
+	"\x0edynamic_values\x18\x02 \x03(\v2%.datadog.intake.stateful.DynamicValueR\rdynamicValues\"\x9f\x01\n" +
+	"\fDynamicValue\x12\x1d\n" +
+	"\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" +
+	"\vfloat_value\x18\x02 \x01(\x01H\x00R\n" +
+	"floatValue\x12#\n" +
+	"\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" +
+	"\n" +
+	"dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" +
+	"\x05value\"\x95\x03\n" +
+	"\x05Datum\x12O\n" +
+	"\x0epattern_define\x18\x01 \x01(\v2&.datadog.intake.stateful.PatternDefineH\x00R\rpatternDefine\x12O\n" +
+	"\x0epattern_delete\x18\x02 \x01(\v2&.datadog.intake.stateful.PatternDeleteH\x00R\rpatternDelete\x12V\n" +
+	"\x11dict_entry_define\x18\x03 \x01(\v2(.datadog.intake.stateful.DictEntryDefineH\x00R\x0fdictEntryDefine\x12V\n" +
+	"\x11dict_entry_delete\x18\x04 \x01(\v2(.datadog.intake.stateful.DictEntryDeleteH\x00R\x0fdictEntryDelete\x122\n" +
+	"\x04logs\x18\x05 \x01(\v2\x1c.datadog.intake.stateful.LogH\x00R\x04logsB\x06\n" +
+	"\x04data\"C\n" +
+	"\rDatumSequence\x122\n" +
+	"\x04data\x18\x01 \x03(\v2\x1e.datadog.intake.stateful.DatumR\x04data\">\n" +
+	"\rStatefulBatch\x12\x19\n" +
+	"\bbatch_id\x18\x01 \x01(\rR\abatchId\x12\x12\n" +
+	"\x04data\x18\x02 \x01(\fR\x04data\"\x8c\x01\n" +
+	"\vBatchStatus\x12\x19\n" +
+	"\bbatch_id\x18\x01 \x01(\rR\abatchId\x12C\n" +
+	"\x06status\x18\x02 \x01(\x0e2+.datadog.intake.stateful.BatchStatus.StatusR\x06status\"\x1d\n" +
+	"\x06Status\x12\v\n" +
+	"\aUNKNOWN\x10\x00\x12\x06\n" +
+	"\x02OK\x10\x012u\n" +
+	"\x13StatefulLogsService\x12^\n" +
+	"\n" +
+	"LogsStream\x12&.datadog.intake.stateful.StatefulBatch\x1a$.datadog.intake.stateful.BatchStatus(\x010\x01B\x1bZ\x19pkg/proto/pbgo/statefulpbb\x06proto3"
+
+var (
+	file_datadog_stateful_stateful_encoding_proto_rawDescOnce sync.Once
+	file_datadog_stateful_stateful_encoding_proto_rawDescData []byte
+)
+
+func file_datadog_stateful_stateful_encoding_proto_rawDescGZIP() []byte {
+	file_datadog_stateful_stateful_encoding_proto_rawDescOnce.Do(func() {
+		file_datadog_stateful_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc)))
+	})
+	return file_datadog_stateful_stateful_encoding_proto_rawDescData
+}
+
+var file_datadog_stateful_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
+var file_datadog_stateful_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 12)
+var file_datadog_stateful_stateful_encoding_proto_goTypes = []any{
+	(BatchStatus_Status)(0), // 0: datadog.intake.stateful.BatchStatus.Status
+	(*DictEntryDefine)(nil), // 1: datadog.intake.stateful.DictEntryDefine
+	(*DictEntryDelete)(nil), // 2: datadog.intake.stateful.DictEntryDelete
+	(*PatternDefine)(nil),   // 3: datadog.intake.stateful.PatternDefine
+	(*PatternDelete)(nil),   // 4: datadog.intake.stateful.PatternDelete
+	(*Tag)(nil),             // 5: datadog.intake.stateful.Tag
+	(*Log)(nil),             // 6: datadog.intake.stateful.Log
+	(*StructuredLog)(nil),   // 7: datadog.intake.stateful.StructuredLog
+	(*DynamicValue)(nil),    // 8: datadog.intake.stateful.DynamicValue
+	(*Datum)(nil),           // 9: datadog.intake.stateful.Datum
+	(*DatumSequence)(nil),   // 10: datadog.intake.stateful.DatumSequence
+	(*StatefulBatch)(nil),   // 11: datadog.intake.stateful.StatefulBatch
+	(*BatchStatus)(nil),     // 12: datadog.intake.stateful.BatchStatus
+}
+var file_datadog_stateful_stateful_encoding_proto_depIdxs = []int32{
+	8,  // 0: datadog.intake.stateful.Tag.key:type_name -> datadog.intake.stateful.DynamicValue
+	8,  // 1: datadog.intake.stateful.Tag.value:type_name -> datadog.intake.stateful.DynamicValue
+	7,  // 2: datadog.intake.stateful.Log.structured:type_name -> datadog.intake.stateful.StructuredLog
+	5,  // 3: datadog.intake.stateful.Log.tags:type_name -> datadog.intake.stateful.Tag
+	8,  // 4: datadog.intake.stateful.StructuredLog.dynamic_values:type_name -> datadog.intake.stateful.DynamicValue
+	3,  // 5: datadog.intake.stateful.Datum.pattern_define:type_name -> datadog.intake.stateful.PatternDefine
+	4,  // 6: datadog.intake.stateful.Datum.pattern_delete:type_name -> datadog.intake.stateful.PatternDelete
+	1,  // 7: datadog.intake.stateful.Datum.dict_entry_define:type_name -> datadog.intake.stateful.DictEntryDefine
+	2,  // 8: datadog.intake.stateful.Datum.dict_entry_delete:type_name -> datadog.intake.stateful.DictEntryDelete
+	6,  // 9: datadog.intake.stateful.Datum.logs:type_name -> datadog.intake.stateful.Log
+	9,  // 10: datadog.intake.stateful.DatumSequence.data:type_name -> datadog.intake.stateful.Datum
+	0,  // 11: datadog.intake.stateful.BatchStatus.status:type_name -> datadog.intake.stateful.BatchStatus.Status
+	11, // 12: datadog.intake.stateful.StatefulLogsService.LogsStream:input_type -> datadog.intake.stateful.StatefulBatch
+	12, // 13: datadog.intake.stateful.StatefulLogsService.LogsStream:output_type -> datadog.intake.stateful.BatchStatus
+	13, // [13:14] is the sub-list for method output_type
+	12, // [12:13] is the sub-list for method input_type
+	12, // [12:12] is the sub-list for extension type_name
+	12, // [12:12] is the sub-list for extension extendee
+	0,  // [0:12] is the sub-list for field type_name
+}
+
+func init() { file_datadog_stateful_stateful_encoding_proto_init() }
+func file_datadog_stateful_stateful_encoding_proto_init() {
+	if File_datadog_stateful_stateful_encoding_proto != nil {
+		return
+	}
+	file_datadog_stateful_stateful_encoding_proto_msgTypes[5].OneofWrappers = []any{
+		(*Log_Structured)(nil),
+		(*Log_Raw)(nil),
+	}
+	file_datadog_stateful_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{
+		(*DynamicValue_IntValue)(nil),
+		(*DynamicValue_FloatValue)(nil),
+		(*DynamicValue_StringValue)(nil),
+		(*DynamicValue_DictIndex)(nil),
+	}
+	file_datadog_stateful_stateful_encoding_proto_msgTypes[8].OneofWrappers = []any{
+		(*Datum_PatternDefine)(nil),
+		(*Datum_PatternDelete)(nil),
+		(*Datum_DictEntryDefine)(nil),
+		(*Datum_DictEntryDelete)(nil),
+		(*Datum_Logs)(nil),
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc)),
+			NumEnums:      1,
+			NumMessages:   12,
+			NumExtensions: 0,
+			NumServices:   1,
+		},
+		GoTypes:           file_datadog_stateful_stateful_encoding_proto_goTypes,
+		DependencyIndexes: file_datadog_stateful_stateful_encoding_proto_depIdxs,
+		EnumInfos:         file_datadog_stateful_stateful_encoding_proto_enumTypes,
+		MessageInfos:      file_datadog_stateful_stateful_encoding_proto_msgTypes,
+	}.Build()
+	File_datadog_stateful_stateful_encoding_proto = out.File
+	file_datadog_stateful_stateful_encoding_proto_goTypes = nil
+	file_datadog_stateful_stateful_encoding_proto_depIdxs = nil
+}
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ context.Context
+var _ grpc.ClientConnInterface
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+const _ = grpc.SupportPackageIsVersion6
+
+// StatefulLogsServiceClient is the client API for StatefulLogsService service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
+type StatefulLogsServiceClient interface {
+	LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error)
+}
+
+type statefulLogsServiceClient struct {
+	cc grpc.ClientConnInterface
+}
+
+func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient {
+	return &statefulLogsServiceClient{cc}
+}
+
+func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) {
+	stream, err := c.cc.NewStream(ctx, &_StatefulLogsService_serviceDesc.Streams[0], "/datadog.intake.stateful.StatefulLogsService/LogsStream", opts...)
+	if err != nil {
+		return nil, err
+	}
+	x := &statefulLogsServiceLogsStreamClient{stream}
+	return x, nil
+}
+
+type StatefulLogsService_LogsStreamClient interface {
+	Send(*StatefulBatch) error
+	Recv() (*BatchStatus, error)
+	grpc.ClientStream
+}
+
+type statefulLogsServiceLogsStreamClient struct {
+	grpc.ClientStream
+}
+
+func (x *statefulLogsServiceLogsStreamClient) Send(m *StatefulBatch) error {
+	return x.ClientStream.SendMsg(m)
+}
+
+func (x *statefulLogsServiceLogsStreamClient) Recv() (*BatchStatus, error) {
+	m := new(BatchStatus)
+	if err := x.ClientStream.RecvMsg(m); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// StatefulLogsServiceServer is the server API for StatefulLogsService service.
+type StatefulLogsServiceServer interface {
+	LogsStream(StatefulLogsService_LogsStreamServer) error
+}
+
+// UnimplementedStatefulLogsServiceServer can be embedded to have forward compatible implementations.
+type UnimplementedStatefulLogsServiceServer struct {
+}
+
+func (*UnimplementedStatefulLogsServiceServer) LogsStream(StatefulLogsService_LogsStreamServer) error {
+	return status.Errorf(codes.Unimplemented, "method LogsStream not implemented")
+}
+
+func RegisterStatefulLogsServiceServer(s *grpc.Server, srv StatefulLogsServiceServer) {
+	s.RegisterService(&_StatefulLogsService_serviceDesc, srv)
+}
+
+func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error {
+	return srv.(StatefulLogsServiceServer).LogsStream(&statefulLogsServiceLogsStreamServer{stream})
+}
+
+type StatefulLogsService_LogsStreamServer interface {
+	Send(*BatchStatus) error
+	Recv() (*StatefulBatch, error)
+	grpc.ServerStream
+}
+
+type statefulLogsServiceLogsStreamServer struct {
+	grpc.ServerStream
+}
+
+func (x *statefulLogsServiceLogsStreamServer) Send(m *BatchStatus) error {
+	return x.ServerStream.SendMsg(m)
+}
+
+func (x *statefulLogsServiceLogsStreamServer) Recv() (*StatefulBatch, error) {
+	m := new(StatefulBatch)
+	if err := x.ServerStream.RecvMsg(m); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+var _StatefulLogsService_serviceDesc = grpc.ServiceDesc{
+	ServiceName: "datadog.intake.stateful.StatefulLogsService",
+	HandlerType: (*StatefulLogsServiceServer)(nil),
+	Methods:     []grpc.MethodDesc{},
+	Streams: []grpc.StreamDesc{
+		{
+			StreamName:    "LogsStream",
+			Handler:       _StatefulLogsService_LogsStream_Handler,
+			ServerStreams: true,
+			ClientStreams: true,
+		},
+	},
+	Metadata: "datadog/stateful/stateful_encoding.proto",
+}
diff --git a/tasks/protobuf.py b/tasks/protobuf.py
index 78ef3e412f06..cf1e7a3fa320 100644
--- a/tasks/protobuf.py
+++ b/tasks/protobuf.py
@@ -21,6 +21,7 @@
     'remoteagent': False,
     'autodiscovery': False,
     'trace/idx': False,
+    'stateful': False,
 }
 
 CLI_EXTRAS = {