diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go index 44d9c98a4303..69e932c3f49b 100644 --- a/comp/logs/agent/config/config.go +++ b/comp/logs/agent/config/config.go @@ -124,7 +124,7 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs if logsDDURL, defined := logsConfig.logsDDURL(); defined { haveHTTPProxy = strings.HasPrefix(logsDDURL, "http://") || strings.HasPrefix(logsDDURL, "https://") } - if logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { + if logsConfig.isGRPCUse() || logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { return BuildHTTPEndpointsWithConfig(coreConfig, logsConfig, endpointPrefix, intakeTrackType, intakeProtocol, intakeOrigin) } log.Warnf("You are currently sending Logs to Datadog through TCP (either because %s or %s is set or the HTTP connectivity test has failed) "+ @@ -373,7 +373,7 @@ func buildHTTPEndpoints(coreConfig pkgconfigmodel.Reader, logsConfig *LogsConfig batchMaxContentSize := logsConfig.batchMaxContentSize() inputChanSize := logsConfig.inputChanSize() - return NewEndpointsWithBatchSettings(main, additionals, false, true, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil + return NewEndpointsWithBatchSettings(main, additionals, false, true, logsConfig.isGRPCUse(), batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil } type defaultParseAddressFunc func(string) (host string, port int, err error) @@ -447,6 +447,11 @@ func TaggerWarmupDuration(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).taggerWarmupDuration() } +// StreamLifetime returns the duration for gRPC stream lifetime before rotation. +func StreamLifetime(coreConfig pkgconfigmodel.Reader) time.Duration { + return defaultLogsConfigKeys(coreConfig).streamLifetime() +} + // AggregationTimeout is used when performing aggregation operations func AggregationTimeout(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).aggregationTimeout() diff --git a/comp/logs/agent/config/config_keys.go b/comp/logs/agent/config/config_keys.go index bf6f9313c28c..110243d41068 100644 --- a/comp/logs/agent/config/config_keys.go +++ b/comp/logs/agent/config/config_keys.go @@ -101,6 +101,10 @@ func (l *LogsConfigKeys) isForceHTTPUse() bool { l.getConfig().GetBool(l.getConfigKey("force_use_http")) } +func (l *LogsConfigKeys) isGRPCUse() bool { + return l.getConfig().GetBool(l.getConfigKey("use_grpc")) +} + func (l *LogsConfigKeys) logsNoSSL() bool { return l.getConfig().GetBool(l.getConfigKey("logs_no_ssl")) } @@ -292,6 +296,16 @@ func (l *LogsConfigKeys) senderRecoveryReset() bool { return l.getConfig().GetBool(l.getConfigKey("sender_recovery_reset")) } +func (l *LogsConfigKeys) streamLifetime() time.Duration { + key := l.getConfigKey("stream_lifetime") + streamLifetime := l.getConfig().GetInt(key) + if streamLifetime <= 0 { + log.Warnf("Invalid %s: %v should be > 0, fallback on %v", key, streamLifetime, pkgconfigsetup.DefaultLogsStreamLifetime) + return time.Duration(pkgconfigsetup.DefaultLogsStreamLifetime) * time.Second + } + return time.Duration(streamLifetime) * time.Second +} + // AggregationTimeout is used when performing aggregation operations func (l *LogsConfigKeys) aggregationTimeout() time.Duration { return l.getConfig().GetDuration(l.getConfigKey("aggregation_timeout")) * time.Millisecond diff --git a/comp/logs/agent/config/config_test.go b/comp/logs/agent/config/config_test.go index ee9049f48d9a..1de098e78668 100644 --- a/comp/logs/agent/config/config_test.go +++ b/comp/logs/agent/config/config_test.go @@ -287,7 +287,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsEnvVar() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -414,7 +414,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -504,7 +504,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig2() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) diff --git a/comp/logs/agent/config/endpoints.go b/comp/logs/agent/config/endpoints.go index 6771f20d6d25..238c222c38ff 100644 --- a/comp/logs/agent/config/endpoints.go +++ b/comp/logs/agent/config/endpoints.go @@ -343,6 +343,7 @@ type Endpoints struct { Endpoints []Endpoint UseProto bool UseHTTP bool + UseGRPC bool BatchWait time.Duration BatchMaxConcurrentSend int BatchMaxSize int @@ -369,6 +370,23 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, additionalEndpoints, useProto, useHTTP, + false, // useGRPC defaults to false for backward compatibility + pkgconfigsetup.DefaultBatchWait, + pkgconfigsetup.DefaultBatchMaxConcurrentSend, + pkgconfigsetup.DefaultBatchMaxSize, + pkgconfigsetup.DefaultBatchMaxContentSize, + pkgconfigsetup.DefaultInputChanSize, + ) +} + +// NewEndpointsWithGRPC returns a new endpoints composite with gRPC support +func NewEndpointsWithGRPC(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool) *Endpoints { + return NewEndpointsWithBatchSettings( + main, + additionalEndpoints, + useProto, + useHTTP, + useGRPC, pkgconfigsetup.DefaultBatchWait, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, @@ -378,12 +396,13 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, } // NewEndpointsWithBatchSettings returns a new endpoints composite with non-default batching settings specified -func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { +func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { return &Endpoints{ Main: main, Endpoints: append([]Endpoint{main}, additionalEndpoints...), UseProto: useProto, UseHTTP: useHTTP, + UseGRPC: useGRPC, BatchWait: batchWait, BatchMaxConcurrentSend: batchMaxConcurrentSend, BatchMaxSize: batchMaxSize, diff --git a/comp/logs/agent/config/endpoints_test.go b/comp/logs/agent/config/endpoints_test.go index 831e7b52113c..cfdba0590321 100644 --- a/comp/logs/agent/config/endpoints_test.go +++ b/comp/logs/agent/config/endpoints_test.go @@ -135,6 +135,24 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPCon suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) } +func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidGRPCConfig() { + var endpoints *Endpoints + var endpoint Endpoint + var err error + + suite.config.SetWithoutSource("logs_config.use_grpc", true) + + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + suite.Equal(endpoints.BatchWait, 5*time.Second) + + endpoint = endpoints.Main + suite.True(endpoint.UseSSL()) + suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) +} + func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPConfigAndCompression() { var endpoints *Endpoints var endpoint Endpoint @@ -259,6 +277,7 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.force_use_tcp", "false") suite.config.SetWithoutSource("logs_config.use_http", "false") suite.config.SetWithoutSource("logs_config.force_use_http", "false") + suite.config.SetWithoutSource("logs_config.use_grpc", "false") suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{}) } @@ -329,6 +348,19 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") }) + suite.Run("When use_grpc is true always create gRPC endpoints", func() { + defer resetHTTPConfigValuesToFalse() + suite.config.SetWithoutSource("logs_config.use_grpc", "true") + endpoints, err := BuildEndpoints(suite.config, HTTPConnectivitySuccess, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + }) + suite.Run("When additional_endpoints is not empty always create TCP endpoints", func() { defer resetHTTPConfigValuesToFalse() suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{ diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index bd96231222bd..835c4ee90a75 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -118,6 +118,9 @@ const ( // DefaultLogsSenderBackoffRecoveryInterval is the default logs sender backoff recovery interval DefaultLogsSenderBackoffRecoveryInterval = 2 + // DefaultLogsStreamLifetime is the default gRPC stream lifetime in seconds (15 minutes) + DefaultLogsStreamLifetime = 900 + // maxExternalMetricsProviderChunkSize ensures batch queries are limited in size. maxExternalMetricsProviderChunkSize = 35 @@ -2728,6 +2731,7 @@ func bindEnvAndSetLogsConfigKeys(config pkgconfigmodel.Setup, prefix string) { config.BindEnvAndSetDefault(prefix+"sender_backoff_max", DefaultLogsSenderBackoffMax) config.BindEnvAndSetDefault(prefix+"sender_recovery_interval", DefaultForwarderRecoveryInterval) config.BindEnvAndSetDefault(prefix+"sender_recovery_reset", false) + config.BindEnvAndSetDefault(prefix+"stream_lifetime", DefaultLogsStreamLifetime) config.BindEnvAndSetDefault(prefix+"use_v2_api", true) config.SetKnown(prefix + "dev_mode_no_ssl") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' } diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 4f91ef10ffc4..ec40a34361b6 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -12,6 +12,7 @@ import ( "time" "github.com/DataDog/datadog-agent/pkg/logs/sources" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -38,6 +39,8 @@ type Payload struct { Encoding string // The size of the unencoded payload UnencodedSize int + // Extra information for Stateful gRPC streaming (batch-level state changes) + StatefulExtra any } // NewPayload creates a new payload with the given message metadata, encoded content, encoding type and unencoded size @@ -70,6 +73,13 @@ type Message struct { MessageMetadata } +// StatefulMessage represents a log message for gRPC stateful streaming +// It contains a Datum (from stateful_encoding.proto) and associated metadata +type StatefulMessage struct { + Datum *statefulpb.Datum + Metadata *MessageMetadata +} + // MessageMetadata contains metadata information about a log message // //nolint:revive // exported: ignore package name struct conflict @@ -125,7 +135,7 @@ type MessageContent struct { //nolint:revive content []byte // structured content structuredContent StructuredContent - State MessageContentState + State MessageContentState } // MessageContentState is used to represent the MessageContent state. diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go new file mode 100644 index 000000000000..5d21f7bc81b2 --- /dev/null +++ b/pkg/logs/patterns/automaton/rules.go @@ -0,0 +1,485 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides terminal rules for token classification. +package automaton + +import ( + "fmt" + "regexp" + "sort" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Priority constants for rule evaluation order +// +// Rules are sorted by priority (highest first) and evaluated sequentially until the first match. +// Priority is based on the specificity of the pattern. The more specific the pattern, the higher the priority. +// Higher priority = evaluated first = more specific classification. +const ( + PriorityHigh = 3 // Very specific patterns like IPv4, IPv6, Email + PriorityMedium = 2 // Structured patterns like URI, Dates, HTTPStatus + PriorityLow = 1 // Generic fallback patterns like Numeric +) + +// TerminalRule represents a classification rule +type TerminalRule struct { + Name string + Pattern *regexp.Regexp + TokenType token.TokenType + Priority int // Use PriorityHigh/Medium/Low constants - higher values evaluated first + Category string + Description string + Examples []string +} + +// RuleCategory represents a grouping of rules +type RuleCategory struct { + Name string + Description string + Rules []*TerminalRule +} + +// RuleManager manages terminal rules +type RuleManager struct { + rules []*TerminalRule + categories map[string]*RuleCategory +} + +// NewRuleManager creates a new rule manager +func NewRuleManager() *RuleManager { + return &RuleManager{ + rules: make([]*TerminalRule, 0), + categories: make(map[string]*RuleCategory), + } +} + +// AddRule adds a new terminal rule +func (rm *RuleManager) AddRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error { + // Check for duplicate rule name + if rm.GetRule(name) != nil { + return fmt.Errorf("rule '%s' already exists", name) + } + + // Compile and validate regex pattern + regex, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex pattern '%s': %v", pattern, err) + } + + // Validate examples match the pattern + for _, example := range examples { + if !regex.MatchString(example) { + return fmt.Errorf("example '%s' does not match pattern '%s'", example, pattern) + } + } + + // Create and insert rule + rule := &TerminalRule{ + Name: name, + Pattern: regex, + TokenType: tokenType, + Priority: priority, + Category: category, + Description: description, + Examples: examples, + } + + rm.insertRuleByPriority(rule) + rm.addToCategory(rule) + + return nil +} + +// RemoveRule removes a rule by name +func (rm *RuleManager) RemoveRule(name string) bool { + for i, rule := range rm.rules { + if rule.Name == name { + // Remove from rules list + rm.rules = append(rm.rules[:i], rm.rules[i+1:]...) + + // Remove from category + rm.removeFromCategory(rule) + return true + } + } + return false +} + +// ApplyRules applies terminal rules in priority order to classify a token +// Returns TokenWord if no rule matches (generic word fallback) +func (rm *RuleManager) ApplyRules(value string) token.TokenType { + for _, rule := range rm.rules { + if rule.Pattern.MatchString(value) { + return rule.TokenType + } + } + return token.TokenWord +} + +// LoadPredefinedRules loads predefined rules +func (rm *RuleManager) LoadPredefinedRules() error { + predefined := GetPredefinedRules() + + for _, rule := range predefined { + err := rm.AddRule( + rule.Name, + rule.Pattern.String(), + rule.Category, + rule.Description, + rule.TokenType, + rule.Priority, + rule.Examples, + ) + if err != nil { + return fmt.Errorf("failed to load rule '%s': %v", rule.Name, err) + } + } + + return nil +} + +// ================================================ +// Helper methods +// ================================================ + +func (rm *RuleManager) insertRuleByPriority(rule *TerminalRule) { + // Insert in priority order (higher priority first) + inserted := false + for i, existing := range rm.rules { + if rule.Priority > existing.Priority { + // Insert at position i + rm.rules = append(rm.rules[:i], append([]*TerminalRule{rule}, rm.rules[i:]...)...) + inserted = true + break + } + } + + if !inserted { + rm.rules = append(rm.rules, rule) + } +} + +func (rm *RuleManager) addToCategory(rule *TerminalRule) { + if rm.categories[rule.Category] == nil { + rm.categories[rule.Category] = &RuleCategory{ + Name: rule.Category, + Description: fmt.Sprintf("Rules for %s tokens", rule.Category), + Rules: make([]*TerminalRule, 0), + } + } + + rm.categories[rule.Category].Rules = append(rm.categories[rule.Category].Rules, rule) +} + +func (rm *RuleManager) removeFromCategory(rule *TerminalRule) { + if category, exists := rm.categories[rule.Category]; exists { + for i, r := range category.Rules { + if r.Name == rule.Name { + category.Rules = append(category.Rules[:i], category.Rules[i+1:]...) + break + } + } + + // Remove category if empty + if len(category.Rules) == 0 { + delete(rm.categories, rule.Category) + } + } +} + +// GetRule retrieves a rule by name +func (rm *RuleManager) GetRule(name string) *TerminalRule { + for _, rule := range rm.rules { + if rule.Name == name { + return rule + } + } + return nil +} + +// ListRules returns all rules sorted by priority +func (rm *RuleManager) ListRules() []*TerminalRule { + // Return a copy to prevent external modification + result := make([]*TerminalRule, len(rm.rules)) + copy(result, rm.rules) + return result +} + +// GetRulesByCategory returns rules in a specific category +func (rm *RuleManager) GetRulesByCategory(category string) []*TerminalRule { + if cat, exists := rm.categories[category]; exists { + result := make([]*TerminalRule, len(cat.Rules)) + copy(result, cat.Rules) + return result + } + return []*TerminalRule{} +} + +// GetCategories returns all rule categories +func (rm *RuleManager) GetCategories() []string { + categories := make([]string, 0, len(rm.categories)) + for name := range rm.categories { + categories = append(categories, name) + } + sort.Strings(categories) + return categories +} + +// GetRuleStats returns statistics about the rule system +func (rm *RuleManager) GetRuleStats() RuleStats { + stats := RuleStats{ + TotalRules: len(rm.rules), + Categories: len(rm.categories), + ByCategory: make(map[string]int), + ByTokenType: make(map[token.TokenType]int), + } + + for _, rule := range rm.rules { + stats.ByCategory[rule.Category]++ + stats.ByTokenType[rule.TokenType]++ + } + + return stats +} + +// RuleStats contains statistics about the rule system +type RuleStats struct { + TotalRules int + Categories int + ByCategory map[string]int + ByTokenType map[token.TokenType]int +} + +// GetPredefinedRules returns the standard set of terminal rules +func GetPredefinedRules() []*TerminalRule { + rules := []*TerminalRule{ + + // ============================================================================= + // DATE & TIME PATTERNS (Priority: High to Medium) + // Based on multiline aggregation patterns for comprehensive coverage + // ============================================================================= + + // High Priority - Modern Standards with Timezone Support + { + Name: "RFC3339DateTime", + Pattern: regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(Z|[\+\-]\d{2}:?\d{2})?`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Matches RFC3339 datetime format with timezone", + Examples: []string{"2024-01-15T10:30:45Z", "2024-01-15T10:30:45.123Z", "2024-01-15T10:30:45+02:00"}, + }, + { + Name: "RFC3339NanoDateTime", + Pattern: regexp.MustCompile(`^(\d+)-(\d+)-(\d+)([A-Za-z_]+)(\d+):(\d+):(\d+)\.(\d+)([A-Za-z_]+)(\d+):(\d+)`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Supplementary pattern from multiline handler for edge-case RFC3339 formats", + Examples: []string{"2024-12-25T14:30:00.123456789Z07:00"}, + }, + { + Name: "StandardTimestamp", + Pattern: regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(,\d+)?`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Matches standard timestamp format with optional milliseconds", + Examples: []string{"2024-01-15 10:30:45", "2024-01-15 10:30:45,123"}, + }, + + // Medium Priority - Legacy RFC Standards + { + Name: "RFC1123DateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC1123 datetime format", + Examples: []string{"Mon, 02 Jan 2006 15:04:05 MST", "Wed, 25 Dec 2024 14:30:00 UTC"}, + }, + { + Name: "RFC1123ZDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) (-\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Supplementary pattern from multiline handler for RFC1123Z edge cases", + Examples: []string{"Mon, 02 Jan 2006 15:04:05 -0700", "Wed, 25 Dec 2024 14:30:00 -0800"}, + }, + { + Name: "RFC850DateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+)-([A-Za-z_]+)-(\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC850 datetime format", + Examples: []string{"Monday, 02-Jan-06 15:04:05 MST", "Wednesday, 25-Dec-24 14:30:00 UTC"}, + }, + { + Name: "RFC822DateTime", + Pattern: regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC822 datetime format", + Examples: []string{"02 Jan 06 15:04 MST", "25 Dec 24 14:30 UTC"}, + }, + { + Name: "RFC822ZDateTime", + Pattern: regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) (-\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Supplementary pattern from multiline handler for RFC822Z edge cases", + Examples: []string{"02 Jan 06 15:04 -0700", "25 Dec 24 14:30 -0800"}, + }, + + // Medium Priority - Unix/System Formats + { + Name: "ANSICDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+) (\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches ANSIC datetime format", + Examples: []string{"Mon Jan 2 15:04:05 2006", "Wed Dec 25 14:30:00 2024"}, + }, + { + Name: "UnixDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+)( [A-Za-z_]+ (\d+))?`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Unix datetime format with optional timezone", + Examples: []string{"Mon Jan 2 15:04:05 2006", "Mon Jan 2 15:04:05 MST 2006"}, + }, + { + Name: "RubyDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([\-\+]\d+) (\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Ruby datetime format with timezone offset", + Examples: []string{"Mon Jan 02 15:04:05 -0700 2006", "Wed Dec 25 14:30:00 +0200 2024"}, + }, + + // Medium Priority - Application-Specific Formats + { + Name: "JavaSimpleFormatter", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) (\d+), (\d{4}) (\d+):(\d+):(\d+) (AM|PM)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Java SimpleFormatter date format", + Examples: []string{"January 15, 2024 2:30:45 PM", "December 31, 2023 11:59:59 AM"}, + }, + { + Name: "SlashDateTime", + Pattern: regexp.MustCompile(`^(\d{4})/(\d{2})/(\d{2}) (\d{2}):(\d{2}):(\d{2})`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches slash-separated datetime format", + Examples: []string{"2024/01/15 10:30:45", "2024/12/31 23:59:59"}, + }, + { + Name: "SimpleDate", + Pattern: regexp.MustCompile(`^(\d{4})-(1[012]|0?[1-9])-([12][0-9]|3[01]|0?[1-9])$`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches YYYY-MM-DD date format with validation", + Examples: []string{"2024-01-15", "2024-12-31", "2024-02-29"}, + }, + + // ============================================================================= + // NETWORK PATTERNS (Priority: High) + // ============================================================================= + + { + Name: "IPv4Address", + Pattern: regexp.MustCompile(`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`), + TokenType: token.TokenIPv4, + Priority: PriorityHigh, + Category: "network", + Description: "Matches IPv4 addresses in dotted decimal notation", + Examples: []string{"192.168.1.1", "10.0.0.1", "255.255.255.255", "0.0.0.0"}, + }, + { + Name: "IPv6Address", + Pattern: regexp.MustCompile(`^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$`), + TokenType: token.TokenIPv6, + Priority: PriorityHigh, + Category: "network", + Description: "Matches basic IPv6 addresses", + Examples: []string{"2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80:0000:0000:0000:0000:0000:0000:0001"}, + }, + { + Name: "EmailAddress", + Pattern: regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`), + TokenType: token.TokenEmail, + Priority: PriorityHigh, + Category: "network", + Description: "Matches email addresses", + Examples: []string{"user@example.com", "test.email+tag@domain.org", "admin@company.co.uk"}, + }, + { + Name: "URI", + Pattern: regexp.MustCompile(`^https?://[^\s]+$`), + TokenType: token.TokenURI, + Priority: PriorityMedium, + Category: "network", + Description: "Matches HTTP and HTTPS URIs", + Examples: []string{"http://example.com", "https://api.domain.com/v1/users", "https://cdn.example.org/assets/style.css"}, + }, + + // ============================================================================= + // HTTP PATTERNS (Priority: Medium) + // ============================================================================= + + { + Name: "HTTPStatus", + Pattern: regexp.MustCompile(`^[1-5][0-9][0-9]$`), + TokenType: token.TokenHTTPStatus, + Priority: PriorityMedium, + Category: "http", + Description: "Matches HTTP status codes", + Examples: []string{"200", "404", "500", "301", "403"}, + }, + + // ============================================================================= + // FILESYSTEM PATTERNS (Priority: Medium) + // ============================================================================= + + { + Name: "AbsolutePath", + Pattern: regexp.MustCompile(`^/[^\s]+$`), + TokenType: token.TokenAbsolutePath, + Priority: PriorityMedium, + Category: "filesystem", + Description: "Matches absolute file/URL paths", + Examples: []string{"/api/users", "/var/log/app.log", "/home/user/documents"}, + }, + + // ============================================================================= + // NUMERIC PATTERNS (Priority: Low - Fallback) + // ============================================================================= + + { + Name: "Numeric", + Pattern: regexp.MustCompile(`^\d+$`), + TokenType: token.TokenNumeric, + Priority: PriorityLow, + Category: "numeric", + Description: "Matches pure numeric values", + Examples: []string{"123", "0", "999999", "42"}, + }, + } + + return rules +} diff --git a/pkg/logs/patterns/automaton/rules_test.go b/pkg/logs/patterns/automaton/rules_test.go new file mode 100644 index 000000000000..78f70c5abf90 --- /dev/null +++ b/pkg/logs/patterns/automaton/rules_test.go @@ -0,0 +1,287 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TestNewRuleManager tests the creation of a new rule manager +func TestNewRuleManager(t *testing.T) { + rm := NewRuleManager() + + assert.NotNil(t, rm.rules, "Expected rules slice to be initialized") + assert.NotNil(t, rm.categories, "Expected categories map to be initialized") + assert.Equal(t, 0, len(rm.rules), "Expected empty rules slice") +} + +// TestRuleManager_AddRule tests the addition of a new rule +func TestRuleManager_AddRule(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "TestIPv4", + `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, + "network", + "Test IPv4 pattern", + token.TokenIPv4, + 100, + []string{"192.168.1.1", "10.0.0.1"}, + ) + + assert.NoError(t, err, "Failed to add rule") + assert.Equal(t, 1, len(rm.rules), "Expected 1 rule") + + rule := rm.rules[0] + assert.Equal(t, "TestIPv4", rule.Name, "Expected rule name 'TestIPv4'") + assert.Equal(t, token.TokenIPv4, rule.TokenType, "Expected token type TokenIPv4") + assert.Equal(t, 100, rule.Priority, "Expected priority 100") + assert.Equal(t, "network", rule.Category, "Expected category 'network'") +} + +// TestRuleManager_AddRule_InvalidPattern tests the addition of a new rule with an invalid regex pattern +func TestRuleManager_AddRule_InvalidPattern(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "BadRule", + `[invalid(regex`, + "test", + "Invalid regex", + token.TokenWord, + 50, + []string{}, + ) + + assert.Error(t, err, "Expected error for invalid regex pattern") +} + +// TestRuleManager_AddRule_InvalidExample tests the addition of a new rule with an invalid example +func TestRuleManager_AddRule_InvalidExample(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "TestRule", + `^\d+$`, + "test", + "Numeric pattern", + token.TokenNumeric, + 50, + []string{"123", "abc"}, // "abc" doesn't match ^\d+$ + ) + + assert.Error(t, err, "Expected error for example that doesn't match pattern") +} + +// TestRuleManager_AddRule_Duplicate tests the addition of a duplicate rule +func TestRuleManager_AddRule_Duplicate(t *testing.T) { + rm := NewRuleManager() + + // Add first rule + err := rm.AddRule("TestRule", `^\d+$`, "test", "Numeric", token.TokenNumeric, 50, []string{"123"}) + assert.NoError(t, err, "Failed to add first rule") + + // Try to add duplicate rule + err = rm.AddRule("TestRule", `^[a-z]+$`, "test", "Alpha", token.TokenWord, 50, []string{"abc"}) + assert.Error(t, err, "Expected error when adding duplicate rule name") + assert.Contains(t, err.Error(), "already exists", "Expected 'already exists' error") +} + +// TestRuleManager_RemoveRule tests the removal of a rule +func TestRuleManager_RemoveRule(t *testing.T) { + rm := NewRuleManager() + + // Add a rule first + rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) + + assert.Equal(t, 1, len(rm.rules), "Expected 1 rule before removal") + + // Remove the rule + removed := rm.RemoveRule("TestRule") + assert.True(t, removed, "Expected RemoveRule to return true") + assert.Equal(t, 0, len(rm.rules), "Expected 0 rules after removal") + + // Try to remove non-existent rule + removed = rm.RemoveRule("NonExistent") + assert.False(t, removed, "Expected RemoveRule to return false for non-existent rule") +} + +// TestRuleManager_GetRule tests the retrieval of a rule by name +func TestRuleManager_GetRule(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) + + rule := rm.GetRule("TestRule") + assert.NotNil(t, rule, "Expected to find rule 'TestRule'") + if rule != nil { + assert.Equal(t, "TestRule", rule.Name, "Expected rule name 'TestRule'") + } + + notFound := rm.GetRule("NonExistent") + assert.Nil(t, notFound, "Expected nil for non-existent rule") +} + +// TestRuleManager_PriorityOrdering tests the ordering of rules by priority +func TestRuleManager_PriorityOrdering(t *testing.T) { + rm := NewRuleManager() + + // Add rules in different priority order + rm.AddRule("Low", `low`, "test", "Low priority", token.TokenWord, 10, []string{"low"}) + rm.AddRule("High", `high`, "test", "High priority", token.TokenWord, 100, []string{"high"}) + rm.AddRule("Medium", `medium`, "test", "Medium priority", token.TokenWord, 50, []string{"medium"}) + + rules := rm.ListRules() + assert.Equal(t, 3, len(rules), "Expected 3 rules") + + // Should be ordered by priority (highest first) + expectedOrder := []string{"High", "Medium", "Low"} + expectedPriorities := []int{100, 50, 10} + + for i, rule := range rules { + assert.Equal(t, expectedOrder[i], rule.Name, "Rule %d name mismatch", i) + assert.Equal(t, expectedPriorities[i], rule.Priority, "Rule %d priority mismatch", i) + } +} + +// TestRuleManager_ApplyRules tests the application of rules to a value +func TestRuleManager_ApplyRules(t *testing.T) { + rm := NewRuleManager() + + // Add rules with different priorities + rm.AddRule("IPv4", `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, + "network", "IPv4", token.TokenIPv4, 100, []string{"192.168.1.1"}) + rm.AddRule("Numeric", `^\d+$`, "numeric", "Numbers", token.TokenNumeric, 30, []string{"123"}) + + tests := []struct { + input string + expected token.TokenType + }{ + {"192.168.1.1", token.TokenIPv4}, // Higher priority rule should match + {"123", token.TokenNumeric}, + {"999.999.999.999", token.TokenWord}, // Invalid IPv4, no rule matches - generic word + {"abc", token.TokenWord}, // No rule matches - generic word + } + + for _, test := range tests { + result := rm.ApplyRules(test.input) + assert.Equal(t, test.expected, result, "ApplyRules('%s') mismatch", test.input) + } +} + +// TestRuleManager_GetRulesByCategory tests the retrieval of rules by category +func TestRuleManager_GetRulesByCategory(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) + rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"}) + rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"}) + + networkRules := rm.GetRulesByCategory("network") + assert.Equal(t, 2, len(networkRules), "Expected 2 network rules") + + numericRules := rm.GetRulesByCategory("numeric") + assert.Equal(t, 1, len(numericRules), "Expected 1 numeric rule") + + emptyRules := rm.GetRulesByCategory("nonexistent") + assert.Equal(t, 0, len(emptyRules), "Expected 0 rules for nonexistent category") +} + +// TestRuleManager_GetCategories tests the retrieval of categories +func TestRuleManager_GetCategories(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("Rule1", `r1`, "network", "Rule 1", token.TokenWord, 50, []string{"r1"}) + rm.AddRule("Rule2", `r2`, "time", "Rule 2", token.TokenWord, 50, []string{"r2"}) + rm.AddRule("Rule3", `r3`, "network", "Rule 3", token.TokenWord, 50, []string{"r3"}) + + categories := rm.GetCategories() + assert.Equal(t, 2, len(categories), "Expected 2 categories") + + // Categories should be sorted + expectedCategories := []string{"network", "time"} + for i, expected := range expectedCategories { + if assert.Less(t, i, len(categories), "Category %d should exist", i) { + assert.Equal(t, expected, categories[i], "Expected category %d to be '%s'", i, expected) + } + } +} + +// TestRuleManager_GetRuleStats tests the retrieval of rule statistics +func TestRuleManager_GetRuleStats(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) + rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"}) + rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"}) + + stats := rm.GetRuleStats() + + assert.Equal(t, 3, stats.TotalRules, "Expected TotalRules=3") + assert.Equal(t, 2, stats.Categories, "Expected Categories=2") + assert.Equal(t, 2, stats.ByCategory["network"], "Expected 2 network rules") + assert.Equal(t, 1, stats.ByCategory["numeric"], "Expected 1 numeric rule") + assert.Equal(t, 1, stats.ByTokenType[token.TokenIPv4], "Expected 1 IPv4 token rule") +} + +// TestGetPredefinedRules tests the retrieval of predefined rules +func TestGetPredefinedRules(t *testing.T) { + rules := GetPredefinedRules() + + assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be non-empty") + + // Check that we have the expected rule types + foundRules := make(map[string]bool) + for _, rule := range rules { + foundRules[rule.Name] = true + + // Validate rule structure + assert.NotNil(t, rule.Pattern, "Rule '%s' has nil pattern", rule.Name) + assert.NotEqual(t, "", rule.Name, "Found rule with empty name") + assert.NotEqual(t, "", rule.Category, "Rule '%s' has empty category", rule.Name) + assert.NotEqual(t, 0, len(rule.Examples), "Rule '%s' has no examples", rule.Name) + + // Test examples against pattern + for _, example := range rule.Examples { + assert.True(t, rule.Pattern.MatchString(example), + "Rule '%s': example '%s' doesn't match pattern", rule.Name, example) + } + } + + expectedRules := []string{"IPv4Address", "EmailAddress", "URI", "HTTPStatus", "Numeric"} + for _, expected := range expectedRules { + assert.True(t, foundRules[expected], "Expected predefined rule '%s' not found", expected) + } +} + +// TestRuleManager_LoadPredefinedRules tests the loading of predefined rules +func TestRuleManager_LoadPredefinedRules(t *testing.T) { + rm := NewRuleManager() + + err := rm.LoadPredefinedRules() + assert.NoError(t, err, "Failed to load predefined rules") + + rules := rm.ListRules() + assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be loaded") + + // Verify some key rules exist + ipv4Rule := rm.GetRule("IPv4Address") + assert.NotNil(t, ipv4Rule, "Expected IPv4Address rule to be loaded") + + emailRule := rm.GetRule("EmailAddress") + assert.NotNil(t, emailRule, "Expected EmailAddress rule to be loaded") + + // Test that rules are working + result := rm.ApplyRules("192.168.1.1") + assert.Equal(t, token.TokenIPv4, result, "Expected IPv4 token for '192.168.1.1'") + + result = rm.ApplyRules("test@example.com") + assert.Equal(t, token.TokenEmail, result, "Expected Email token for 'test@example.com'") +} diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go new file mode 100644 index 000000000000..ab9f0b514965 --- /dev/null +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -0,0 +1,312 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides log message tokenization using finite state automaton +// and pattern matching for semantic token classification. +package automaton + +import ( + "fmt" + "unicode" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// TokenizerState represents the current state of the FSA +type TokenizerState int + +const ( + StateStart TokenizerState = iota // StateStart is the initial state + StateWord // StateWord is letters, digits, and common separators for structured tokens + StateNumeric // StateNumeric is pure numbers + StateWhitespace // StateWhitespace is spaces, tabs, newlines + StateSpecial // StateSpecial is operators, punctuation, symbols +) + +const ( + // These numbers could be ran with some more testing on more log samples to optimize these values. + // tokenizerBufferCapacity is the initial capacity for the rune buffer. + tokenizerBufferCapacity = 128 + + // tokenizerTokensCapacity is the initial capacity for the tokens slice. + tokenizerTokensCapacity = 24 +) + +// Tokenizer implements a finite state automaton for log tokenization +type Tokenizer struct { + input string + pos int + length int + state TokenizerState + buffer []rune + tokens []token.Token +} + +// NewTokenizer creates a new tokenizer for the given input +func NewTokenizer(input string) *Tokenizer { + return &Tokenizer{ + input: input, + pos: 0, + length: len(input), + state: StateStart, + buffer: make([]rune, 0, tokenizerBufferCapacity), + tokens: make([]token.Token, 0, tokenizerTokensCapacity), + } +} + +// Tokenize processes the input string and returns a TokenList +func (t *Tokenizer) Tokenize() *token.TokenList { + for t.pos < t.length { + if !t.processNextToken() { + break + } + } + + t.handleLastToken() + t.classifyTokens() + + return token.NewTokenListWithTokens(t.tokens) +} + +// classifyTokens upgrades generic tokens to specific types. +// The FSA first creates generic tokens (TokenWord, TokenNumeric), then this function uses +// pattern matching to identify structured types: +// - "192.168.1.1" → TokenNumeric upgraded to TokenIPv4 +// - "user@example.com" → TokenWord upgraded to TokenEmail +// - "GET" → TokenWord upgraded to TokenHTTPMethod +func (t *Tokenizer) classifyTokens() { + for i, tok := range t.tokens { + // Skip if not eligible for classification + if !t.shouldClassify(&tok) { + continue + } + + // identify specific structured types (IP, Email, Date, HTTP, etc.) + // fallback to word token if can't upgrade to specific type + classifiedType, err := t.classifyToken(tok.Value) + if err != nil { + log.Warnf("Failed to classify token '%s': %v. Falling back to word token type", tok.Value, err) + continue + } + + // fallback to word token if can't upgrade to specific type + if classifiedType == token.TokenWord { + continue + } + + // Upgrade token to the more specific type + t.tokens[i].Type = classifiedType + t.tokens[i].Wildcard = getWildcardPotential(classifiedType) + } +} + +// shouldClassify determines if a token is eligible for pattern-based classification. +// Returns true only for generic Word/Numeric tokens that are PotentialWildcard. +// Excludes: whitespace, punctuation (NotWildcard) +func (t *Tokenizer) shouldClassify(tok *token.Token) bool { + isGenericType := tok.Type == token.TokenWord || tok.Type == token.TokenNumeric + canVary := tok.Wildcard != token.NotWildcard + + return isGenericType && canVary +} + +// processNextToken advances the automaton by one token +func (t *Tokenizer) processNextToken() bool { + if t.pos >= t.length { + return false + } + + char := rune(t.input[t.pos]) + + switch t.state { + case StateStart: + return t.handleStartState(char) + case StateWord: + return t.handleWordState(char) + case StateNumeric: + return t.handleNumericState(char) + case StateWhitespace: + return t.handleWhitespaceState(char) + case StateSpecial: + return t.handleSpecialState(char) + default: + return t.handleStartState(char) // Fallback + } +} + +// handleStartState determines initial state based on character type +func (t *Tokenizer) handleStartState(char rune) bool { + switch { + case unicode.IsSpace(char): + t.setState(StateWhitespace) + case unicode.IsDigit(char): + t.setState(StateNumeric) + case unicode.IsLetter(char) || char == '/': + t.setState(StateWord) + default: + t.setState(StateSpecial) + } + + t.addToBuffer(char) + t.pos++ + return true +} + +// handleWordState processes word tokens +func (t *Tokenizer) handleWordState(char rune) bool { + if unicode.IsLetter(char) || unicode.IsDigit(char) || char == '_' || char == '-' || + char == '.' || char == '@' || char == '/' || + (char == ':' && t.isURLScheme()) { + t.addToBuffer(char) + t.pos++ + return true + } + + t.createWordToken() + t.setState(StateStart) + return true +} + +// handleNumericState processes numeric tokens +// Allows digits and special chars for dates (2024-01-15), times (10:30:45) or IPs (192.168.1.1) +func (t *Tokenizer) handleNumericState(char rune) bool { + switch { + case unicode.IsDigit(char), char == '.', char == '-', char == '/', char == ':': + t.addToBuffer(char) + t.pos++ + return true + default: + t.createNumericToken() + t.setState(StateStart) + return true + } +} + +// handleWhitespaceState processes whitespace +func (t *Tokenizer) handleWhitespaceState(char rune) bool { + switch { + case unicode.IsSpace(char): + t.addToBuffer(char) + t.pos++ + return true + default: + t.createWhitespaceToken() + t.setState(StateStart) + return true + } +} + +// handleSpecialState processes special characters +func (t *Tokenizer) handleSpecialState(_ rune) bool { + // The special character is already in buffer from handleStartState + // Just create the token and reset state + t.createSpecialToken() + t.setState(StateStart) + return true +} + +// classifyToken attempts to classify a single token's type using trie and terminal rules. +func (t *Tokenizer) classifyToken(value string) (token.TokenType, error) { + if len(value) == 0 { + return token.TokenUnknown, fmt.Errorf("cannot classify empty string token value") + } + return globalTrie.Match(value), nil +} + +// getWildcardPotential determines if a token type can potentially become a wildcard +// Returns either NotWildcard (0%) or PotentialWildcard (50%) +// Note: IsWildcard (100%) is only set during pattern merging, never during tokenization +func getWildcardPotential(tokenType token.TokenType) token.WildcardStatus { + // Only whitespace cannot become a wildcard + if tokenType == token.TokenWhitespace { + return token.NotWildcard + } + + // Everything else can potentially become wildcards + return token.PotentialWildcard +} + +// ================================================ +// Helper functions +// ================================================ + +// isURLScheme checks if current buffer looks like a URL scheme +func (t *Tokenizer) isURLScheme() bool { + buffer := string(t.buffer) + return buffer == "http" || buffer == "https" +} + +// State management helpers + +func (t *Tokenizer) setState(newState TokenizerState) { + t.state = newState +} + +func (t *Tokenizer) addToBuffer(char rune) { + t.buffer = append(t.buffer, char) +} + +func (t *Tokenizer) clearBuffer() { + t.buffer = t.buffer[:0] // Keep capacity, reset length +} + +func (t *Tokenizer) bufferToString() string { + return string(t.buffer) +} + +func (t *Tokenizer) handleLastToken() { + if len(t.buffer) > 0 { + // Create token from remaining buffer content based on current state + switch t.state { + case StateNumeric: + t.createNumericToken() + case StateWhitespace: + t.createWhitespaceToken() + case StateSpecial: + t.createSpecialToken() + default: + t.createWordToken() + } + } +} + +// ================================================ +// Token creation methods +// ================================================ + +func (t *Tokenizer) createWordToken() { + value := t.bufferToString() + // Create as basic Word type - classification happens later in classifyTokens() + tok := token.NewToken(token.TokenWord, value, token.PotentialWildcard) + t.tokens = append(t.tokens, tok) + t.clearBuffer() +} + +func (t *Tokenizer) createNumericToken() { + value := t.bufferToString() + // Numeric tokens are potential wildcards - will be classified later + tok := token.NewToken(token.TokenNumeric, value, token.PotentialWildcard) + t.tokens = append(t.tokens, tok) + t.clearBuffer() +} + +func (t *Tokenizer) createWhitespaceToken() { + // Normalize all whitespace (tabs, spaces, newlines, multiple spaces) to single space + value := " " + // Whitespace never becomes wildcard + tok := token.NewToken(token.TokenWhitespace, value, token.NotWildcard) + t.tokens = append(t.tokens, tok) + t.clearBuffer() +} + +func (t *Tokenizer) createSpecialToken() { + value := t.bufferToString() + // Special characters (punctuation, symbols) should not wildcard - only merge if identical + // Examples: ":", "[", "@" - structural markers that must stay consistent + tok := token.NewToken(token.TokenWord, value, token.NotWildcard) + t.tokens = append(t.tokens, tok) + t.clearBuffer() +} diff --git a/pkg/logs/patterns/automaton/tokenizer_test.go b/pkg/logs/patterns/automaton/tokenizer_test.go new file mode 100644 index 000000000000..5fdf716836bb --- /dev/null +++ b/pkg/logs/patterns/automaton/tokenizer_test.go @@ -0,0 +1,460 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TestTokenizer_SimpleTokenization tests basic tokenization and type classification +func TestTokenizer_SimpleTokenization(t *testing.T) { + input := "GET /api 200" + tokenizer := NewTokenizer(input) + tokenList := tokenizer.Tokenize() + + assert.NotEqual(t, 0, tokenList.Length(), "Expected tokens, got empty list") + + // Should have: GET, whitespace, /api, whitespace, 200 + assert.Equal(t, 5, tokenList.Length(), "Expected 5 tokens") + + // Verify token types + expectedTypes := []token.TokenType{ + token.TokenHTTPMethod, // GET + token.TokenWhitespace, // space + token.TokenAbsolutePath, // /api + token.TokenWhitespace, // space + token.TokenHTTPStatus, // 200 + } + + for i, expected := range expectedTypes { + if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) { + assert.Equal(t, expected, tokenList.Tokens[i].Type, + "Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected) + } + } +} + +// TestTokenizer_StateTransitions tests the state transitions of the tokenizer +func TestTokenizer_StateTransitions(t *testing.T) { + tests := []struct { + input string + expectedStates []TokenizerState + description string + }{ + {"GET", []TokenizerState{StateStart, StateWord}, "Simple word"}, + {"123", []TokenizerState{StateStart, StateNumeric}, "Simple numeric"}, + {" ", []TokenizerState{StateStart, StateWhitespace}, "Single whitespace"}, + {"/path", []TokenizerState{StateStart, StateWord}, "Path starts as word character"}, + {"192.168.1.1", []TokenizerState{StateStart, StateNumeric}, "IPv4 stays in numeric state initially"}, + } + + for _, test := range tests { + tokenizer := NewTokenizer(test.input) + + // Capture state transitions + var states []TokenizerState + states = append(states, tokenizer.state) + + for tokenizer.pos < tokenizer.length { + if !tokenizer.processNextToken() { + break + } + states = append(states, tokenizer.state) + } + + // For simple cases, check exact sequence + if test.input != "192.168.1.1" { + assert.GreaterOrEqual(t, len(states), len(test.expectedStates), + "Input '%s' (%s): expected at least %d states", test.input, test.description, len(test.expectedStates)) + + // Check that expected states appear in sequence + for i, expected := range test.expectedStates { + if assert.Less(t, i, len(states), "State %d should exist for input '%s'", i, test.input) { + assert.Equal(t, expected, states[i], + "Input '%s' (%s): expected state %d to be %v", test.input, test.description, i, expected) + } + } + } else { + // For IPv4 with simplified FSA, check basic state transitions + hasStart := false + hasNumeric := false + + for _, state := range states { + switch state { + case StateStart: + hasStart = true + case StateNumeric: + hasNumeric = true + } + } + + assert.True(t, hasStart, "IPv4 test: expected to see StateStart") + assert.True(t, hasNumeric, "IPv4 test: expected to see StateNumeric") + } + } +} + +// TestTokenTypePreservation tests that TokenNumeric stays TokenNumeric when no pattern matches +// This is critical: classification should upgrade OR preserve, never downgrade +func TestTokenTypePreservation(t *testing.T) { + // Test that generic number stays TokenNumeric (not downgraded to TokenWord) + tokenList := TokenizeString("User 12345 logged in") + + // Find the numeric token + var numericToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == "12345" { + numericToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, numericToken, "Expected to find numeric token '12345'") + + // Should stay TokenNumeric, not become TokenWord + if numericToken != nil { + assert.Equal(t, token.TokenNumeric, numericToken.Type, + "Token '12345' should stay TokenNumeric") + } + + // Test that numeric upgrades when pattern matches + tokenList = TokenizeString("User 192.168.1.1 logged in") + + // Find the IP token + var ipToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == "192.168.1.1" { + ipToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, ipToken, "Expected to find IP token '192.168.1.1'") + + // Should be upgraded to TokenIPv4 + if ipToken != nil { + assert.Equal(t, token.TokenIPv4, ipToken.Type, + "Token '192.168.1.1' should be TokenIPv4") + } +} + +// TestWildcardStatus tests that tokens are correctly marked as NotWildcard or PotentialWildcard +func TestWildcardStatus(t *testing.T) { + tests := []struct { + input string + tokenValue string + expectedWildcard token.WildcardStatus + description string + }{ + {" ", " ", token.NotWildcard, "Whitespace should be NotWildcard"}, + {":", ":", token.NotWildcard, "Punctuation should be NotWildcard"}, + {"hello", "hello", token.PotentialWildcard, "Generic word should be PotentialWildcard"}, + {"12345", "12345", token.PotentialWildcard, "Generic number should be PotentialWildcard"}, + {"INFO User logged in", "INFO", token.PotentialWildcard, "Severity level should be PotentialWildcard"}, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + var targetToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == test.tokenValue { + targetToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, targetToken, "Expected to find token '%s'", test.tokenValue) + + if targetToken != nil { + assert.Equal(t, test.expectedWildcard, targetToken.Wildcard, test.description) + } + }) + } +} + +// Test the complete data flow +func TestArchitectureCompliance(t *testing.T) { + // Test the exact call graph + // automaton.TokenizeString → NewTokenizer → Tokenizer.Tokenize → processNextToken → classifyToken → globalTrie.Match + + input := "GET /api/users 200" + + // Step 1: automaton.TokenizeString (main entry point) + tokenList := TokenizeString(input) + + // Verify TokenList creation + assert.NotNil(t, tokenList, "TokenizeString returned nil") + + // Step 2: Verify token classification used globalTrie.Match + var httpMethod, httpStatus, path *token.Token + + for i := range tokenList.Tokens { + switch tokenList.Tokens[i].Type { + case token.TokenHTTPMethod: + httpMethod = &tokenList.Tokens[i] + case token.TokenHTTPStatus: + httpStatus = &tokenList.Tokens[i] + case token.TokenAbsolutePath: + path = &tokenList.Tokens[i] + } + } + + if assert.NotNil(t, httpMethod, "HTTP method token not found - trie classification failed") { + assert.Equal(t, "GET", httpMethod.Value, "Expected HTTP method 'GET'") + } + + if assert.NotNil(t, httpStatus, "HTTP status token not found - trie classification failed") { + assert.Equal(t, "200", httpStatus.Value, "Expected HTTP status '200'") + } + + if assert.NotNil(t, path, "Path token not found - state machine failed") { + assert.Equal(t, "/api/users", path.Value, "Expected path '/api/users'") + } + + // Step 3: Verify signature generation works + signature := token.NewSignature(tokenList) + assert.False(t, signature.IsEmpty(), "Signature generation failed") + + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus" + assert.Equal(t, expectedPosition, signature.Position, "Signature position mismatch") +} + +// TestComplexLogScenarios tests complex log scenarios +func TestComplexLogScenarios(t *testing.T) { + tests := []struct { + name string + input string + expected []token.TokenType + }{ + { + name: "HTTP Request", + input: "GET /api/users 200", + expected: []token.TokenType{ + token.TokenHTTPMethod, token.TokenWhitespace, + token.TokenAbsolutePath, token.TokenWhitespace, + token.TokenHTTPStatus, + }, + }, + { + name: "Error Message", + input: "ERROR Database connection failed", + expected: []token.TokenType{ + token.TokenSeverityLevel, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, + }, + }, + { + name: "User Login", + input: "INFO User 12345 logged in", + expected: []token.TokenType{ + token.TokenSeverityLevel, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenNumeric, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, + }, + }, + { + name: "Complex with Email and IP", + input: "user@domain.com from 192.168.1.1", + expected: []token.TokenType{ + token.TokenEmail, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenIPv4, + }, + }, + { + name: "URL with Scheme", + input: "Visit https://example.com/docs", + expected: []token.TokenType{ + token.TokenWord, token.TokenWhitespace, + token.TokenURI, + }, + }, + { + name: "Date in Context", + input: "Event on 2024-01-15", + expected: []token.TokenType{ + token.TokenWord, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenDate, + }, + }, + { + name: "False Positive - Single @ is not Email", + input: "Price @ $10 each", + expected: []token.TokenType{ + token.TokenWord, // Price + token.TokenWhitespace, // space + token.TokenWord, // @ + token.TokenWhitespace, // space + token.TokenWord, // $ + token.TokenNumeric, // 10 + token.TokenWhitespace, // space + token.TokenWord, // each + }, + }, + { + name: "False Positive - Division operator is not Path", + input: "Calculate 10 / 2 = 5", + expected: []token.TokenType{ + token.TokenWord, // Calculate + token.TokenWhitespace, // space + token.TokenNumeric, // 10 + token.TokenWhitespace, // space + token.TokenWord, // / + token.TokenWhitespace, // space + token.TokenNumeric, // 2 + token.TokenWhitespace, // space + token.TokenWord, // = + token.TokenWhitespace, // space + token.TokenNumeric, // 5 + }, + }, + { + name: "False Positive - Phone number is not Date", + input: "Phone: 123-456-7890", + expected: []token.TokenType{ + token.TokenWord, // Phone + token.TokenWord, // : + token.TokenWhitespace, // space + token.TokenNumeric, // 123-456-7890 stays numeric, not date + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + assert.Equal(t, len(test.expected), tokenList.Length(), + "Expected %d tokens, got: %v", len(test.expected), tokenTypesToString(tokenList.Tokens)) + + for i, expected := range test.expected { + if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) { + assert.Equal(t, expected, tokenList.Tokens[i].Type, + "Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected) + } + } + }) + } +} + +// TestWhitespaceNormalization tests that all whitespace types are normalized to single space +func TestWhitespaceNormalization(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Single space", + input: "Error: message", + expected: " ", + }, + { + name: "Tab character", + input: "Error:\tmessage", + expected: " ", + }, + { + name: "Multiple spaces", + input: "Error: message", + expected: " ", + }, + { + name: "Multiple tabs", + input: "Error:\t\tmessage", + expected: " ", + }, + { + name: "Mixed tabs and spaces", + input: "Error: \t message", + expected: " ", + }, + { + name: "Newline", + input: "Error:\nmessage", + expected: " ", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + // Find whitespace token + var whitespaceToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Type == token.TokenWhitespace { + whitespaceToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, whitespaceToken, "Expected to find whitespace token") + + if whitespaceToken != nil { + assert.Equal(t, test.expected, whitespaceToken.Value, + "Whitespace should be normalized to single space") + assert.Equal(t, token.NotWildcard, whitespaceToken.Wildcard, + "Whitespace should be NotWildcard") + } + }) + } +} + +// TestWhitespaceNormalization_Signature tests if whitespace normalization would allows logs with different whitespace to merge into the same pattern +func TestWhitespaceNormalization_Signature(t *testing.T) { + // These logs differ only in whitespace - they should tokenize identically + log1 := "Error: connection failed" // single space + log2 := "Error:\tconnection failed" // tab + log3 := "Error: connection failed" // double space + + tl1 := TokenizeString(log1) + tl2 := TokenizeString(log2) + tl3 := TokenizeString(log3) + + // All should have same token count + assert.Equal(t, tl1.Length(), tl2.Length(), "Token counts should match") + assert.Equal(t, tl1.Length(), tl3.Length(), "Token counts should match") + + // All whitespace tokens should be normalized to single space + for i := 0; i < tl1.Length(); i++ { + if tl1.Tokens[i].Type == token.TokenWhitespace { + assert.Equal(t, " ", tl1.Tokens[i].Value, "Whitespace in log1 should be normalized") + assert.Equal(t, " ", tl2.Tokens[i].Value, "Whitespace in log2 should be normalized") + assert.Equal(t, " ", tl3.Tokens[i].Value, "Whitespace in log3 should be normalized") + } + } + + // Signatures should be identical + sig1 := token.NewSignature(tl1) + sig2 := token.NewSignature(tl2) + sig3 := token.NewSignature(tl3) + + assert.True(t, sig1.Equals(sig2), "Signatures should be equal after normalization") + assert.True(t, sig1.Equals(sig3), "Signatures should be equal after normalization") +} + +// =============================== +// Helper functions +// =============================== +func tokenTypesToString(tokens []token.Token) []string { + result := make([]string, len(tokens)) + for i, tok := range tokens { + result[i] = tok.String() + } + return result +} diff --git a/pkg/logs/patterns/automaton/trie.go b/pkg/logs/patterns/automaton/trie.go new file mode 100644 index 000000000000..540b73a588f3 --- /dev/null +++ b/pkg/logs/patterns/automaton/trie.go @@ -0,0 +1,194 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides log message tokenization using finite state automaton +// and trie-based pattern matching for token classification. +package automaton + +import ( + "strings" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TrieNode represents a node in the classification trie +type TrieNode struct { + children map[rune]*TrieNode + tokenType token.TokenType + isTerminal bool +} + +// Trie implements a prefix tree for token classification +type Trie struct { + root *TrieNode +} + +// GlobalRuleManager manages terminal rules +var globalRuleManager *RuleManager + +// globalTrie is the singleton trie instance +var globalTrie *Trie + +// init initializes the global trie and rule manager +// todo: componentize this eventually +func init() { + globalTrie = NewTrie() + globalRuleManager = NewRuleManager() + globalRuleManager.LoadPredefinedRules() + globalTrie.buildPredefinedPatterns() +} + +// NewTrie creates a new trie +func NewTrie() *Trie { + return &Trie{ + root: &TrieNode{ + children: make(map[rune]*TrieNode), + }, + } +} + +// Match performs token classification +func (trie *Trie) Match(value string) token.TokenType { + if len(value) == 0 { + return token.TokenUnknown + } + + if tokenType := trie.exactMatch(value); tokenType != token.TokenUnknown { + return tokenType + } + + return trie.applyTerminalRules(value) +} + +// exactMatch performs exact string matching +func (trie *Trie) exactMatch(value string) token.TokenType { + node := trie.root + + for _, char := range value { + child, exists := node.children[char] + if !exists { + return token.TokenUnknown + } + node = child + } + + if node.isTerminal { + return node.tokenType + } + + return token.TokenUnknown +} + +// applyTerminalRules applies regex-based terminal rules +func (trie *Trie) applyTerminalRules(value string) token.TokenType { + return globalRuleManager.ApplyRules(value) +} + +// AddExactPattern adds an exact string pattern to the trie +func (trie *Trie) AddExactPattern(pattern string, tokenType token.TokenType) { + node := trie.root + + for _, char := range pattern { + if _, exists := node.children[char]; !exists { + node.children[char] = &TrieNode{ + children: make(map[rune]*TrieNode), + } + } + node = node.children[char] + } + + node.isTerminal = true + node.tokenType = tokenType +} + +// buildPredefinedPatterns populates the trie with exact-match patterns +// for fast classification of known strings (HTTP methods, severity levels, whitespace). +// Regex-based terminal rules are handled by globalRuleManager via LoadPredefinedRules(). +func (trie *Trie) buildPredefinedPatterns() { + // HTTP methods - exact string matching + httpMethods := []string{"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH", "TRACE", "CONNECT"} + for _, method := range httpMethods { + trie.AddExactPattern(method, token.TokenHTTPMethod) + } + + // Severity levels - exact string matching (both uppercase and lowercase) + severityLevels := []string{"TRACE", "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "FATAL", "PANIC", "EMERGENCY", "ALERT", "CRITICAL", "NOTICE"} + for _, level := range severityLevels { + trie.AddExactPattern(level, token.TokenSeverityLevel) + trie.AddExactPattern(strings.ToLower(level), token.TokenSeverityLevel) + } + + // Whitespace - exact character matching + trie.AddExactPattern(" ", token.TokenWhitespace) + trie.AddExactPattern("\t", token.TokenWhitespace) + trie.AddExactPattern("\n", token.TokenWhitespace) + trie.AddExactPattern("\r\n", token.TokenWhitespace) +} + +// TokenizeString is the main entry point +func TokenizeString(input string) *token.TokenList { + if len(input) == 0 { + return token.NewTokenList() + } + + tokenizer := NewTokenizer(input) + return tokenizer.Tokenize() +} + +// Statistics + +// TrieStats is the stats of the trie +type TrieStats struct { + ExactPatterns int + TerminalRules int + TrieNodes int + MaxDepth int +} + +// GetStats returns trie statistics for testing purposes +func (trie *Trie) GetStats() TrieStats { + nodeCount, maxDepth := trie.countNodes(trie.root, 0) + + // Terminal rules are managed by globalRuleManager, not the trie itself + terminalRuleCount := 0 + if globalRuleManager != nil { + terminalRuleCount = len(globalRuleManager.rules) + } + + return TrieStats{ + ExactPatterns: trie.countExactPatterns(trie.root), + TerminalRules: terminalRuleCount, + TrieNodes: nodeCount, + MaxDepth: maxDepth, + } +} + +func (trie *Trie) countNodes(node *TrieNode, depth int) (int, int) { + count := 1 + maxDepth := depth + + for _, child := range node.children { + childCount, childDepth := trie.countNodes(child, depth+1) + count += childCount + if childDepth > maxDepth { + maxDepth = childDepth + } + } + + return count, maxDepth +} + +func (trie *Trie) countExactPatterns(node *TrieNode) int { + count := 0 + if node.isTerminal { + count = 1 + } + + for _, child := range node.children { + count += trie.countExactPatterns(child) + } + + return count +} diff --git a/pkg/logs/patterns/automaton/trie_test.go b/pkg/logs/patterns/automaton/trie_test.go new file mode 100644 index 000000000000..8ae73f8d5d4c --- /dev/null +++ b/pkg/logs/patterns/automaton/trie_test.go @@ -0,0 +1,210 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestGlobalTrie_ExactMatch(t *testing.T) { + tests := []struct { + input string + expected token.TokenType + }{ + {"GET", token.TokenHTTPMethod}, + {"POST", token.TokenHTTPMethod}, + {"ERROR", token.TokenSeverityLevel}, + {"INFO", token.TokenSeverityLevel}, + {"debug", token.TokenSeverityLevel}, // lowercase + {" ", token.TokenWhitespace}, + {"\t", token.TokenWhitespace}, + {"unknown", token.TokenWord}, // no rule matches - generic word + } + + for _, test := range tests { + result := globalTrie.Match(test.input) + if result != test.expected { + t.Errorf("globalTrie.Match('%s'): expected %v, got %v", + test.input, test.expected, result) + } + } +} + +func TestGlobalTrie_TerminalRules(t *testing.T) { + tests := []struct { + input string + expected token.TokenType + }{ + {"200", token.TokenHTTPStatus}, + {"404", token.TokenHTTPStatus}, + {"500", token.TokenHTTPStatus}, + {"192.168.1.1", token.TokenIPv4}, + {"10.0.0.1", token.TokenIPv4}, + {"test@example.com", token.TokenEmail}, + {"user@domain.org", token.TokenEmail}, + {"/api/users", token.TokenAbsolutePath}, + {"/var/log/app.log", token.TokenAbsolutePath}, + {"2023-12-25", token.TokenDate}, + {"2023-12-25T14:30:00", token.TokenDate}, + {"1234", token.TokenNumeric}, // 4 digits won't match HTTP status + {"0", token.TokenNumeric}, + {"https://example.com", token.TokenURI}, + {"http://api.domain.com/path", token.TokenURI}, + } + + for _, test := range tests { + result := globalTrie.Match(test.input) + if result != test.expected { + t.Errorf("globalTrie.Match('%s'): expected %v, got %v", + test.input, test.expected, result) + } + } +} + +func TestTrieStats(t *testing.T) { + stats := globalTrie.GetStats() + + if stats.ExactPatterns == 0 { + t.Error("Expected some exact patterns in trie") + } + if stats.TerminalRules == 0 { + t.Error("Expected some terminal rules") + } + if stats.TrieNodes == 0 { + t.Error("Expected some trie nodes") + } + + t.Logf("Trie Stats: %d exact patterns, %d terminal rules, %d nodes, max depth %d", + stats.ExactPatterns, stats.TerminalRules, stats.TrieNodes, stats.MaxDepth) +} + +func TestTrie_AddExactPattern(t *testing.T) { + // Create a new trie for testing + testTrie := NewTrie() + + // Add a custom pattern + testTrie.AddExactPattern("CUSTOM", token.TokenWord) + + // Test that it matches + result := testTrie.Match("CUSTOM") + if result != token.TokenWord { + t.Errorf("Expected TokenWord for 'CUSTOM', got %v", result) + } + + // Test that unknown patterns fall back to TokenWord (generic word) + result = testTrie.Match("unknown") + if result != token.TokenWord { + t.Errorf("Expected TokenWord for 'unknown', got %v", result) + } +} + +func TestTrie_AddTerminalRule(t *testing.T) { + // Test adding terminal rule to global rule manager + err := globalRuleManager.AddRule( + "TestRule", + `^TEST\d+$`, + "test", + "Test rule for testing", + token.TokenNumeric, + PriorityHigh, // Higher priority than existing rules + []string{"TEST123"}, + ) + if err != nil { + t.Fatalf("Failed to add terminal rule: %v", err) + } + + // Test that it matches using global trie + result := globalTrie.Match("TEST123") + if result != token.TokenNumeric { + t.Errorf("Expected TokenNumeric for 'TEST123', got %v", result) + } + + // Test that non-matching patterns don't match + result = globalTrie.Match("TESTXYZ") + if result == token.TokenNumeric { + t.Error("Should not match non-numeric pattern") + } + + // Clean up - remove the test rule + globalRuleManager.RemoveRule("TestRule") +} + +func TestTrie_InvalidTerminalRule(t *testing.T) { + // Try to add invalid regex to global rule manager + err := globalRuleManager.AddRule( + "InvalidRule", + `[invalid(regex`, + "test", + "Invalid rule", + token.TokenWord, + PriorityMedium, + []string{}, + ) + if err == nil { + t.Error("Expected error for invalid regex pattern") + } +} + +func TestTrie_ExactMatchPriority(t *testing.T) { + testTrie := NewTrie() + + // Add exact pattern + testTrie.AddExactPattern("TEST", token.TokenWord) + + // Add terminal rule that would also match + globalRuleManager.AddRule( + "ExactMatchTestRule", + `^TEST$`, + "test", + "Test rule for exact match priority", + token.TokenNumeric, + PriorityHigh, + []string{"TEST"}, + ) + + // Exact match should take priority + result := testTrie.Match("TEST") + if result != token.TokenWord { + t.Errorf("Exact match should take priority, expected TokenWord, got %v", result) + } + + // Clean up + globalRuleManager.RemoveRule("ExactMatchTestRule") +} + +func TestTrie_EmptyInput(t *testing.T) { + result := globalTrie.Match("") + if result != token.TokenUnknown { + t.Errorf("Empty input should return TokenUnknown, got %v", result) + } +} + +func TestTrieNodeStructure(t *testing.T) { + testTrie := NewTrie() + testTrie.AddExactPattern("ABC", token.TokenWord) + + // Verify trie structure + stats := testTrie.GetStats() + if stats.TrieNodes < 4 { // root + A + B + C + t.Errorf("Expected at least 4 trie nodes, got %d", stats.TrieNodes) + } + if stats.ExactPatterns < 1 { + t.Errorf("Expected at least 1 exact pattern, got %d", stats.ExactPatterns) + } +} + +func TestTrieDepthCalculation(t *testing.T) { + testTrie := NewTrie() + testTrie.AddExactPattern("A", token.TokenWord) + testTrie.AddExactPattern("ABCDEFGHIJ", token.TokenWord) // 10 chars deep + + stats := testTrie.GetStats() + if stats.MaxDepth < 10 { + t.Errorf("Expected max depth >= 10, got %d", stats.MaxDepth) + } +} diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go new file mode 100644 index 000000000000..5c00ddde9222 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster.go @@ -0,0 +1,163 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "strings" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Cluster represents a cluster with a group of TokenLists that have identical signatures. +// A cluster may contain multiple patterns if token lists with the same signature cannot be merged since structural Fidelity is Valuable. +// Examples: +// "Status: OK" → HTTP response format +// "Status; OK" → CSV-like format +// "Status OK" → Plain text format +// These are different log formats, even if semantically similar → we need to keep them separate. +type Cluster struct { + Signature token.Signature + Patterns []*Pattern // Multiple patterns per cluster + + // Timestamp tracking for the cluster itself + CreatedAt time.Time // When cluster was first created + UpdatedAt time.Time // When cluster was last modified (any pattern changed) +} + +// NewCluster creates a new cluster. +func NewCluster(signature token.Signature, tokenList *token.TokenList) *Cluster { + now := time.Now() + return &Cluster{ + Signature: signature, + Patterns: nil, // Will be generated when needed + CreatedAt: now, + UpdatedAt: now, + } +} + +// ============================================================================= +// Core Clustering Logic +// ============================================================================= + +// AddTokenListToPatterns adds a TokenList to the appropriate pattern in the cluster. +// If no matching pattern exists, creates a new one. +func (c *Cluster) AddTokenListToPatterns(tokenList *token.TokenList) *Pattern { + // Ensure patterns are generated + if len(c.Patterns) == 0 { + // No patterns yet, create first one + patternID := generatePatternID() + pattern := newPattern(tokenList, patternID) + + c.Patterns = []*Pattern{pattern} + // Update the cluster's new pattern at timestamp + c.UpdatedAt = time.Now() + return pattern + } + + // Try to find a matching pattern + for _, p := range c.Patterns { + // Check if this TokenList can merge with this pattern's sample + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + // CRITICAL: Also verify it can merge with the template + // If template has evolved differently, regeneratePattern will fail + // and we should create a new pattern instead + // Note: CanMergeTokenLists is not symmetric, so check both directions + if p.Template != nil { + templateCompatible1 := merging.CanMergeTokenLists(p.Template, tokenList) + templateCompatible2 := merging.CanMergeTokenLists(tokenList, p.Template) + templateCompatible := templateCompatible1 || templateCompatible2 + if !templateCompatible { + // Log matches sample but not template - template has evolved incompatibly + // Skip this pattern and continue searching or create new one + // This will create a new pattern instead + continue + } + } + + // Merge into existing pattern (same PatternID is preserved) + p.LogCount++ + p.UpdatedAt = time.Now() + c.UpdatedAt = time.Now() + + // Incrementally merge the new token list into the pattern template + // regeneratePattern will update template if merge succeeds + if c.regeneratePattern(p, tokenList) { + return p // Return existing pattern with updated template + } + // regeneratePattern failed - template couldn't merge with tokenList + // This shouldn't happen if we checked above, but handle it gracefully + // Create a new pattern instead + break + } + } + + // No matching pattern found, create a new one + patternID := generatePatternID() + pattern := newPattern(tokenList, patternID) + c.Patterns = append(c.Patterns, pattern) + c.UpdatedAt = time.Now() + return pattern +} + +// regeneratePattern incrementally merges a new token list into the pattern. +// Returns true if merge succeeded, false if merge failed. +func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) bool { + if p.Template == nil { + return false + } + + // Incremental merge: merge new log with existing template + merged := merging.MergeTokenLists(p.Template, newTokenList) + if merged == nil { + // Merge failed - template and newTokenList are incompatible + return false + } + + p.Template = merged + p.Positions = make([]int, 0, merged.Length()) + + // Build wildcard positions list when 2 tokenlists are mergable. + for i := 0; i < merged.Length(); i++ { + tok := merged.Tokens[i] + if tok.Wildcard == token.IsWildcard { + p.Positions = append(p.Positions, i) + + // Special handling for path wildcards + if tok.Type == token.TokenAbsolutePath && p.Sample != nil && i < p.Sample.Length() { + firstPath := p.Sample.Tokens[i].Value + merged.Tokens[i].Value = getPathPattern(firstPath) + } + } + } + + p.UpdatedAt = time.Now() + return true +} + +// getPathPattern converts a path to hierarchical wildcard pattern +func getPathPattern(path string) string { + if path == "/" { + return "/" + } + + // Remove leading/trailing slashes and split + trimmed := strings.Trim(path, "/") + if trimmed == "" { + return "/" + } + + parts := strings.Split(trimmed, "/") + result := "" + for i := 0; i < len(parts); i++ { + result += "/*" + } + + return result +} diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go new file mode 100644 index 000000000000..61e4bda83254 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -0,0 +1,121 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "crypto/rand" + "encoding/binary" + "sync" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" + "github.com/DataDog/datadog-agent/pkg/trace/log" +) + +// PatternChangeType indicates what changed when adding a TokenList to the cluster manager +type PatternChangeType int + +const ( + // PatternNoChange means the TokenList was added to an existing cluster without structural changes + PatternNoChange PatternChangeType = iota + // PatternNew means a brand new pattern was created (first time seeing this signature) + PatternNew + // PatternUpdated means an existing pattern's structure changed (more wildcards added) + PatternUpdated +) + +// ClusterManager manages the clustering of TokenLists using hash-based bucketing. +type ClusterManager struct { + mu sync.RWMutex + hashBuckets map[uint64][]*Cluster +} + +// NewClusterManager creates a new ClusterManager. +func NewClusterManager() *ClusterManager { + return &ClusterManager{ + hashBuckets: make(map[uint64][]*Cluster), + } +} + +// Add processes a TokenList and adds it to the appropriate cluster. +// Returns the pattern that was created/updated and a PatternChangeType indicating what changed. +func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChangeType) { + if tokenList == nil || tokenList.IsEmpty() { + log.Errorf("Cluster Manager failed to add log: %v for patterning. Token list is empty or nil.", tokenList.String()) + return nil, PatternNoChange + } + + cm.mu.Lock() + defer cm.mu.Unlock() + + // Create new signature and hash it + signature := token.NewSignature(tokenList) + hash := signature.Hash + + // Get hash bucket + clusters := cm.hashBuckets[hash] + + // Look for existing cluster with matching signature + for _, cluster := range clusters { + if !cluster.Signature.Equals(signature) { + continue + } + + // Find which pattern within the cluster the tokenList will match + var matchedPattern *Pattern + var oldWildcardCount int + for _, p := range cluster.Patterns { + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + matchedPattern = p + oldWildcardCount = p.GetWildcardCount() + break + } + } + + // Add the tokenList to the cluster (merges or creates new pattern) + pattern := cluster.AddTokenListToPatterns(tokenList) + + // Check if a new pattern was created (no match found or merge failed) + if matchedPattern == nil || matchedPattern.PatternID != pattern.PatternID { + return pattern, PatternNew + } + + // Check if wildcard count changed (pattern evolved) + if pattern.GetWildcardCount() != oldWildcardCount { + return pattern, PatternUpdated + } + + return pattern, PatternNoChange + } + + // If no matching pattern was found, create a new cluster and pattern. + newCluster := NewCluster(signature, tokenList) + // Add the token list to create the first pattern + pattern := newCluster.AddTokenListToPatterns(tokenList) + cm.hashBuckets[hash] = append(clusters, newCluster) + + return pattern, PatternNew +} + +// Clear removes all clusters. +func (cm *ClusterManager) Clear() { + cm.mu.Lock() + defer cm.mu.Unlock() + cm.hashBuckets = make(map[uint64][]*Cluster) +} + +// generatePatternID generates a unique pattern ID +func generatePatternID() uint64 { + var buf [8]byte + _, err := rand.Read(buf[:]) + if err != nil { + return uint64(time.Now().UnixNano()) + } + return binary.BigEndian.Uint64(buf[:]) +} diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go new file mode 100644 index 000000000000..39c7a31953c1 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_manager_test.go @@ -0,0 +1,300 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Test-only helper functions + +// getCluster retrieves the cluster with the given signature. +func getCluster(cm *ClusterManager, signature token.Signature) *Cluster { + hash := signature.Hash + + cm.mu.RLock() + defer cm.mu.RUnlock() + + clusters, exists := cm.hashBuckets[hash] + if !exists { + return nil + } + + for _, cluster := range clusters { + if cluster.Signature.Equals(signature) { + return cluster + } + } + + return nil +} + +// getAllPatterns returns all patterns across all clusters. +func getAllPatterns(cm *ClusterManager) []*Pattern { + var allPatterns []*Pattern + + cm.mu.RLock() + defer cm.mu.RUnlock() + + // Iterate through all clusters in all hash buckets + for _, clusters := range cm.hashBuckets { + for _, cluster := range clusters { + // Collect all patterns from this cluster + allPatterns = append(allPatterns, cluster.Patterns...) + } + } + + return allPatterns +} + +func TestClusterManager_NewClusterManager(t *testing.T) { + cm := NewClusterManager() + + assert.NotNil(t, cm, "ClusterManager should not be nil") +} + +func TestClusterManager_Add_NewCluster(t *testing.T) { + cm := NewClusterManager() + + // Create TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + + pattern, changeType := cm.Add(tokenList) + + assert.NotNil(t, pattern, "Should return a pattern") + assert.Equal(t, 1, pattern.LogCount, "Pattern should have log count 1") + assert.Equal(t, PatternNew, changeType, "Expected PatternNew for first add") +} + +func TestClusterManager_Add_ExistingCluster(t *testing.T) { + cm := NewClusterManager() + + // Create two TokenLists with same signature + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHTTPMethod}, // Different value, same type + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, // Different value, same type + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + pattern1, changeType1 := cm.Add(tokenList1) + pattern2, changeType2 := cm.Add(tokenList2) + + // Should be the same pattern (same cluster, merged together) + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with same signature should merge into same pattern") + assert.Equal(t, 2, pattern2.LogCount, "Pattern should have log count 2") + assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add") + + // With eager pattern generation, adding the second token list creates wildcards (pattern update) + assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)") +} + +func TestClusterManager_Add_DifferentSignatures(t *testing.T) { + cm := NewClusterManager() + + // Create TokenLists with different signatures + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, // Different type + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, // Different type + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + pattern1, _ := cm.Add(tokenList1) + pattern2, _ := cm.Add(tokenList2) + + // Should be different patterns (different clusters) + assert.NotEqual(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with different signatures should create different patterns") +} + +func TestClusterManager_GetCluster(t *testing.T) { + cm := NewClusterManager() + + // Create and add TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + addedPattern, _ := cm.Add(tokenList) + + // Retrieve cluster by signature + retrievedCluster := getCluster(cm, signature) + + assert.NotNil(t, retrievedCluster, "Should retrieve cluster by signature") + assert.Equal(t, 1, len(retrievedCluster.Patterns), "Cluster should have 1 pattern") + assert.Equal(t, addedPattern.PatternID, retrievedCluster.Patterns[0].PatternID, "Pattern IDs should match") + + // Try to get non-existent cluster + differentTokens := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + differentTokenList := token.NewTokenListWithTokens(differentTokens) + differentSignature := token.NewSignature(differentTokenList) + + nonExistentCluster := getCluster(cm, differentSignature) + assert.Nil(t, nonExistentCluster, "Should return nil for non-existent cluster") +} + +func TestClusterManager_Clear(t *testing.T) { + cm := NewClusterManager() + + // Add some data + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + cm.Add(tokenList) + + // Verify data exists + assert.NotNil(t, getCluster(cm, signature), "Should have cluster before clear") + + // Clear + cm.Clear() + + // Verify data is gone + assert.Nil(t, getCluster(cm, signature), "Should have no cluster after clear") +} + +func TestClusterManager_GetAllPatterns(t *testing.T) { + cm := NewClusterManager() + + // Initially empty + patterns := getAllPatterns(cm) + assert.Equal(t, 0, len(patterns), "Should have no patterns initially") + + // Add pattern 1 (signature 1) + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + pattern1, _ := cm.Add(token.NewTokenListWithTokens(tokens1)) + + // Add pattern 2 (same signature, should merge into pattern 1) + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + } + pattern2, _ := cm.Add(token.NewTokenListWithTokens(tokens2)) + + // Add pattern 3 (different signature) + tokens3 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + pattern3, _ := cm.Add(token.NewTokenListWithTokens(tokens3)) + + // Get all patterns + allPatterns := getAllPatterns(cm) + + // Should have 2 patterns: pattern1 (merged with pattern2) and pattern3 + assert.Equal(t, 2, len(allPatterns), "Should have 2 patterns total") + + // Verify we have both pattern IDs + patternIDs := make(map[uint64]bool) + for _, p := range allPatterns { + patternIDs[p.PatternID] = true + } + assert.True(t, patternIDs[pattern1.PatternID], "Should include pattern 1") + assert.True(t, patternIDs[pattern3.PatternID], "Should include pattern 3") + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Pattern 1 and 2 should be the same (merged)") +} + +func TestClusterManager_PatternChangeType(t *testing.T) { + cm := NewClusterManager() + + // Create token lists with same signature (HTTP method, space, path) + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/users", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/orders", Type: token.TokenAbsolutePath}, + } + tokens3 := []token.Token{ + {Value: "PUT", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/items", Type: token.TokenAbsolutePath}, + } + tokens4 := []token.Token{ + {Value: "DELETE", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/products", Type: token.TokenAbsolutePath}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + tokenList4 := token.NewTokenListWithTokens(tokens4) + + // First add - should create a new pattern + pattern1, changeType1 := cm.Add(tokenList1) + assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add") + t.Logf("✅ Add #1: PatternNew (created pattern with PatternID=%d)", pattern1.PatternID) + + // Second add - same signature, adding to existing pattern creates wildcards (pattern update) + pattern2, changeType2 := cm.Add(tokenList2) + assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)") + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Should return same pattern for same signature") + t.Logf("✅ Add #2: PatternUpdated (wildcards created, logCount=%d)", pattern2.LogCount) + t.Logf(" Pattern after 2 logs: '%s'", pattern2.GetPatternString()) + + // Third add - pattern exists but wildcard count unchanged (still 2 wildcards) + pattern3, changeType3 := cm.Add(tokenList3) + assert.Equal(t, PatternNoChange, changeType3, "Expected PatternNoChange for third add (wildcard count unchanged)") + assert.Equal(t, pattern1.PatternID, pattern3.PatternID, "Should return same pattern for same signature") + t.Logf("✅ Add #3: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern3.LogCount) + t.Logf(" Pattern after 3 logs: '%s'", pattern3.GetPatternString()) + + // Fourth add - pattern exists, wildcard count still unchanged + pattern4, changeType4 := cm.Add(tokenList4) + assert.Equal(t, PatternNoChange, changeType4, "Expected PatternNoChange for fourth add (wildcard count unchanged)") + t.Logf("✅ Add #4: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern4.LogCount) + + // Final pattern (eagerly generated by Add) + t.Logf(" Final pattern after 4 logs: '%s'", pattern4.GetPatternString()) + + // Verify all returned the same pattern + assert.Equal(t, 4, pattern4.LogCount, "Expected pattern log count 4") +} diff --git a/pkg/logs/patterns/clustering/cluster_test.go b/pkg/logs/patterns/clustering/cluster_test.go new file mode 100644 index 000000000000..350ec8be5a0d --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_test.go @@ -0,0 +1,456 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestCluster_NewCluster(t *testing.T) { + // Create a simple TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + cluster := NewCluster(signature, tokenList) + + assert.Equal(t, 0, clusterSize(cluster), "Expected cluster size 0 initially") + assert.True(t, cluster.Signature.Equals(signature), "Cluster signature doesn't match expected signature") + assert.Empty(t, cluster.Patterns, "Patterns should be empty initially (computed lazily)") +} + +func TestCluster_AddTokenListToPatterns(t *testing.T) { + // Create first TokenList + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList1 := token.NewTokenListWithTokens(tokens1) + signature1 := token.NewSignature(tokenList1) + + cluster := NewCluster(signature1, tokenList1) + cluster.AddTokenListToPatterns(tokenList1) + + assert.Equal(t, 1, clusterSize(cluster), "Expected initial cluster size 1") + + // Create second TokenList with same signature but different values + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHTTPMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + } + tokenList2 := token.NewTokenListWithTokens(tokens2) + + // Add tokenList with matching signature + cluster.AddTokenListToPatterns(tokenList2) + + assert.Equal(t, 2, clusterSize(cluster), "Expected cluster size 2 after adding") + assert.NotEmpty(t, cluster.Patterns, "Expected patterns to exist after adding TokenLists") +} + +func TestCluster_SinglePattern_SingleLog(t *testing.T) { + // When a cluster has only one log, it creates one pattern with no wildcards + tokens := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + cluster := NewCluster(signature, tokenList) + cluster.AddTokenListToPatterns(tokenList) + + // Should have exactly one pattern (which is also the primary) + assert.Equal(t, 1, len(cluster.Patterns), "Should have exactly one pattern") + + mostCommon := getMostCommonPattern(cluster) + assert.NotNil(t, mostCommon, "Most common pattern should not be nil") + + pattern := mostCommon.Template + assert.NotNil(t, pattern, "Pattern template should not be nil") + assert.False(t, hasWildcards(cluster), "Single log should not have wildcards") + assert.Equal(t, tokenList.Length(), pattern.Length(), "Pattern length should match original TokenList") + + for i, tok := range pattern.Tokens { + assert.Equal(t, tokenList.Tokens[i].Value, tok.Value, + "Pattern token %d value mismatch", i) + } +} + +func TestCluster_MultiplePatterns_SpecialCharVariation(t *testing.T) { + // This is the key test for multi-pattern clusters! + // TokenLists with same signature but different special characters should create multiple patterns + // Note: Whitespace variations now merge (normalized to single space) + + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word|Word|Word", + Length: 6, + Hash: 1234, + } + + cluster := NewCluster(signature, nil) + + // Create TokenLists with different special characters (cannot merge - structural difference) + tokens1 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Protected first word + {Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Colon + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ";", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Semicolon - DIFFERENT! + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens3 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Colon - matches tokens1 + {Value: " ", Type: token.TokenWhitespace}, + {Value: "database", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "error", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + + cluster.AddTokenListToPatterns(tokenList1) + cluster.AddTokenListToPatterns(tokenList2) // Different special char - new pattern + cluster.AddTokenListToPatterns(tokenList3) // Same special char as tokens1 - same pattern + + // Should have 2 patterns (one for colon, one for semicolon) + assert.Len(t, cluster.Patterns, 2, "Expected 2 patterns due to special character variation") + + // Verify pattern sizes + pattern1Size := cluster.Patterns[0].size() + pattern2Size := cluster.Patterns[1].size() + + // One pattern should have 2 token lists, the other should have 1 + validSizes := (pattern1Size == 2 && pattern2Size == 1) || (pattern1Size == 1 && pattern2Size == 2) + assert.True(t, validSizes, "Expected pattern sizes [2, 1], got [%d, %d]", pattern1Size, pattern2Size) + + t.Logf("✅ Multi-pattern cluster created: %d patterns", len(cluster.Patterns)) + t.Logf(" Pattern 1: %d token lists", cluster.Patterns[0].size()) + t.Logf(" Pattern 2: %d token lists", cluster.Patterns[1].size()) +} + +func TestCluster_FindMatchingPattern(t *testing.T) { + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word", + Length: 4, + Hash: 5678, + } + + cluster := NewCluster(signature, nil) + + tokens1 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, // Different whitespace + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + pattern1 := cluster.AddTokenListToPatterns(tokenList1) + pattern2 := cluster.AddTokenListToPatterns(tokenList2) + + // Should return different patterns + assert.NotEqual(t, pattern1, pattern2, "Should create different patterns for different whitespace") + + // findMatchingPattern should return the correct pattern for each token list + found1 := findMatchingPattern(cluster, tokenList1) + found2 := findMatchingPattern(cluster, tokenList2) + + assert.Equal(t, pattern1, found1, "Should find the first pattern for tokenList1") + assert.Equal(t, pattern2, found2, "Should find the second pattern for tokenList2") +} + +func TestCluster_GetMostCommonPattern(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 9999, + } + + cluster := NewCluster(signature, nil) + + // Add multiple token lists that split into different patterns + // Pattern 1: 3 logs (should be most common) + for i := 0; i < 3; i++ { + tokens := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokenList := token.NewTokenListWithTokens(tokens) + cluster.AddTokenListToPatterns(tokenList) + } + + // Pattern 2: 1 log (less common) + tokens2 := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different whitespace + {Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokenList2 := token.NewTokenListWithTokens(tokens2) + cluster.AddTokenListToPatterns(tokenList2) + + mostCommon := getMostCommonPattern(cluster) + assert.NotNil(t, mostCommon, "Most common pattern should not be nil") + assert.Equal(t, 3, mostCommon.LogCount, "Most common pattern should have 3 logs") +} + +func TestCluster_GetAllPatterns(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Numeric", + Length: 3, + Hash: 1111, + } + + cluster := NewCluster(signature, nil) + + // Create 3 different patterns via whitespace variation + tokens1 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "42", Type: token.TokenNumeric}, + } + tokens2 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "100", Type: token.TokenNumeric}, + } + tokens3 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "200", Type: token.TokenNumeric}, + } + + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens3)) + + allPatterns := cluster.Patterns + assert.Len(t, allPatterns, 3, "Expected 3 patterns") +} + +func TestCluster_ExtractWildcardValues_MultiPattern(t *testing.T) { + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word", + Length: 4, + Hash: 2222, + } + + cluster := NewCluster(signature, nil) + + tokens1 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster.AddTokenListToPatterns(tokenList1) + cluster.AddTokenListToPatterns(tokenList2) + + // Both should merge into same pattern + // Extract wildcard values from tokenList2 + values := extractWildcardValues(cluster, tokenList2) + + // Should have one wildcard value for the last word token + assert.Len(t, values, 1, "Expected 1 wildcard value") + if len(values) > 0 { + assert.Equal(t, "timeout", values[0], "Expected wildcard value 'timeout'") + } +} + +func TestCluster_Size_MultiPattern(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 3333, + } + + cluster := NewCluster(signature, nil) + + // Add 2 token lists to pattern 1 + for i := 0; i < 2; i++ { + tokens := []token.Token{ + {Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "passed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens)) + } + + // Add 3 token lists to pattern 2 (different whitespace) + for i := 0; i < 3; i++ { + tokens := []token.Token{ + {Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens)) + } + + // Total size should be 5 (2 + 3) + assert.Equal(t, 5, clusterSize(cluster), "Expected cluster size 5 (2 + 3)") +} + +func TestCluster_BackwardCompatibility(t *testing.T) { + // Test that old API methods still work (GetPatternString, GetWildcardPositions, etc.) + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 4444, + } + + cluster := NewCluster(signature, nil) + + tokens1 := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) + + patternString := getPatternString(cluster) + assert.NotEmpty(t, patternString, "getPatternString should return a valid pattern string") + + wildcards := hasWildcards(cluster) + assert.True(t, wildcards, "Should have wildcards") + + wildcardPositions := getWildcardPositions(cluster) + assert.NotEmpty(t, wildcardPositions, "Should have wildcard positions") + + t.Logf("✅ Backward compatibility: pattern='%s', wildcards=%v", patternString, wildcardPositions) +} + +// ============================================================================= +// Test Helper Functions +// ============================================================================= + +// getMostCommonPattern returns the pattern with the highest log count in the cluster. +func getMostCommonPattern(c *Cluster) *Pattern { + if len(c.Patterns) == 0 { + return nil + } + + mostCommonIdx := 0 + maxLogCount := c.Patterns[0].LogCount + for idx, p := range c.Patterns { + if p.LogCount > maxLogCount { + maxLogCount = p.LogCount + mostCommonIdx = idx + } + } + return c.Patterns[mostCommonIdx] +} + +// getPatternString returns a string representation of the most common pattern. +func getPatternString(c *Cluster) string { + mostCommon := getMostCommonPattern(c) + if mostCommon == nil { + return "" + } + return mostCommon.GetPatternString() +} + +// getWildcardPositions returns wildcard token positions for the most common pattern. +func getWildcardPositions(c *Cluster) []int { + mostCommon := getMostCommonPattern(c) + if mostCommon == nil { + return nil + } + return mostCommon.Positions +} + +// hasWildcards returns true if any pattern in this cluster contains wildcard positions. +func hasWildcards(c *Cluster) bool { + for _, p := range c.Patterns { + if len(p.Positions) > 0 { + return true + } + } + return false +} + +// extractWildcardValues extracts wildcard values from a TokenList using the matching pattern. +func extractWildcardValues(c *Cluster, tokenList *token.TokenList) []string { + p := findMatchingPattern(c, tokenList) + if p == nil { + return []string{} + } + return p.GetWildcardValues(tokenList) +} + +// findMatchingPattern finds the Pattern that matches the given TokenList. +func findMatchingPattern(c *Cluster, tokenList *token.TokenList) *Pattern { + if len(c.Patterns) == 0 { + return nil + } + + // Try to find a Pattern where the TokenList can merge + for _, p := range c.Patterns { + // Check if this TokenList can merge with the pattern's sample + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + return p + } + } + + // No matching pattern found + return nil +} + +// clusterSize returns the total number of logs across all patterns in the cluster. +func clusterSize(c *Cluster) int { + total := 0 + for _, p := range c.Patterns { + total += p.LogCount + } + return total +} diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go new file mode 100644 index 000000000000..a778cb0d8f03 --- /dev/null +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -0,0 +1,102 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package merging provides intelligent mergeability logic for pattern generation. +// It determines which TokenLists can be merged into unified patterns with wildcards, +// and enforces protection rules to maintain semantic quality. +package merging + +import ( + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// shouldProtectPosition determines if the token at this position is the first word token. +// The first word token is protected from wildcarding to preserve semantic meaning, +// regardless of what position it appears at (e.g., after timestamps/dates). +func shouldProtectPosition(position int, tokenType token.TokenType, tl *token.TokenList) bool { + // Only word tokens can be protected + if tokenType != token.TokenWord { + return false + } + + // Check if any word token appears before this position + for i := 0; i < position; i++ { + if tl.Tokens[i].Type == token.TokenWord { + return false // Not the first word token + } + } + + // This is the first word token + return true +} + +// CanMergeTokenLists checks if incoming log (tl2) can merge with existing pattern's sample (tl1). +// Returns true only if all token positions are either identical or mergeable according +// to their comparison results and protection rules. +func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { + if tl1.Length() != tl2.Length() { + return false + } + + for i := 0; i < tl1.Length(); i++ { + tok1 := &tl1.Tokens[i] + tok2 := &tl2.Tokens[i] + + result := tok1.Compare(tok2) + + // If tokens conflict, reject + if result == token.Conflict { + return false + } + + // If tokens are identical, continue + if result == token.Identical { + continue + } + + // For wildcard result, check first word protection rule + if result == token.Wildcard && shouldProtectPosition(i, tok1.Type, tl1) { + return false + } + } + + return true +} + +// MergeTokenLists performs the actual merge of two TokenLists, creating a new TokenList +// with wildcards at positions where tokens differ but are mergeable. +// Returns nil if the TokenLists cannot be merged. +func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { + if tl1.Length() != tl2.Length() { + return nil + } + + merged := token.NewTokenList() + + for i := 0; i < tl1.Length(); i++ { + tok1 := &tl1.Tokens[i] + tok2 := &tl2.Tokens[i] + + result := tok1.Compare(tok2) + + switch result { + case token.Conflict: + return nil // Abort entire merge + + case token.Identical: + merged.Add(*tok1) // Keep same + + case token.Wildcard: + // Check protection rules before wildcarding + if shouldProtectPosition(i, tok1.Type, tl1) { + return nil + } + // Create wildcard, preserving the first token's value as representative + merged.AddToken(tok1.Type, tok1.Value, token.IsWildcard) + } + } + + return merged +} diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go new file mode 100644 index 000000000000..510fdc169c12 --- /dev/null +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -0,0 +1,324 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package merging + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestShouldProtectPosition(t *testing.T) { + tests := []struct { + name string + tokens []token.Token + position int + expected bool + }{ + { + name: "First word at position 0 should be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "failed", token.NotWildcard), + }, + position: 0, + expected: true, + }, + { + name: "First numeric at position 0 should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenNumeric, "2025", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 0, + expected: false, + }, + { + name: "Second word should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "failed", token.NotWildcard), + }, + position: 2, + expected: false, + }, + { + name: "First word after timestamp should be protected", + tokens: []token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 4, + expected: true, + }, + { + name: "Whitespace should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 0, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tl := token.NewTokenListWithTokens(tt.tokens) + result := shouldProtectPosition(tt.position, tt.tokens[tt.position].Type, tl) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestCanMergeTokenLists_IdenticalLists(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), + }) + + assert.True(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_PossiblyWildcardTokens(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + }) + + assert.True(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_GenericWords(t *testing.T) { + // Generic words without possiblyWildcard flag should not merge + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "bob", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "cat", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_DifferentLengths(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_FirstWordProtection(t *testing.T) { + // First word protection should prevent merge even with possiblyWildcard + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2), "First word should be protected from wildcarding") +} + +func TestMergeTokenLists_CreateWildcard(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + }) + + merged := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged) + assert.Equal(t, 3, merged.Length()) + assert.Equal(t, "logged", merged.Tokens[0].Value) + assert.Equal(t, token.NotWildcard, merged.Tokens[0].Wildcard) + assert.Equal(t, " ", merged.Tokens[1].Value) + // Wildcard token has empty value - the Wildcard field tracks status + assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard) + assert.Equal(t, token.TokenWord, merged.Tokens[2].Type) +} + +func TestMergeTokenLists_UnmergeableReturnsNil(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "bob", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "cat", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), + }) + + merged := MergeTokenLists(tl1, tl2) + assert.Nil(t, merged, "Unmergeable TokenLists should return nil") +} + +func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { + // Try to merge when first token is a word but differs + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Login", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "successful", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Logout", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "successful", token.NotWildcard), + }) + + // Should fail because first word is protected + merged := MergeTokenLists(tl1, tl2) + assert.Nil(t, merged, "Should not merge when first word differs (protected)") +} + +func TestCanMergeTokenLists_TimestampPrefixedLogs(t *testing.T) { + // Test that first WORD (not severity level) after timestamp is protected + // Severity levels CAN wildcard, but the first actual word is protected + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Memory", token.NotWildcard), + }) + + // Should NOT merge because first word (Failed vs Memory) differs and is protected + // Note: Severity levels (ERROR vs WARN) CAN wildcard - they're not the "first word" + assert.False(t, CanMergeTokenLists(tl1, tl2), "First word token (after severity) should be protected") +} + +func TestMergeTokenLists_TimestampPrefixedLogsSameFirstWord(t *testing.T) { + // Test that logs with same first word can merge, even with different timestamps and severity levels + // Pattern: * * * Failed * + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-15", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + }) + + // Should merge - timestamps wildcard, severity wildcard, "Failed" is protected but identical, last word wildcards + merged := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged, "Should merge when first word matches") + assert.Equal(t, token.IsWildcard, merged.Tokens[0].Wildcard, "Date should be wildcarded") + assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard, "Time should be wildcarded") + assert.Equal(t, token.IsWildcard, merged.Tokens[4].Wildcard, "Severity level should be wildcarded") + assert.Equal(t, "Failed", merged.Tokens[6].Value, "Failed (first word) should be preserved") + assert.Equal(t, token.NotWildcard, merged.Tokens[6].Wildcard, "Failed should not be wildcarded (protected)") + assert.Equal(t, token.IsWildcard, merged.Tokens[8].Wildcard, "Last word should be wildcarded") +} + +func TestMergeTokenLists_ProgressiveMerging(t *testing.T) { + // Test merging multiple TokenLists progressively + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "123", token.PotentialWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "456", token.PotentialWildcard), + }) + + tl3 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "789", token.PotentialWildcard), + }) + + // Merge first two + merged12 := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged12) + assert.Equal(t, token.IsWildcard, merged12.Tokens[2].Wildcard) + + // Merge result with third + merged123 := MergeTokenLists(merged12, tl3) + assert.NotNil(t, merged123) + assert.Equal(t, 3, merged123.Length()) + assert.Equal(t, "Request", merged123.Tokens[0].Value) + // Wildcard token has empty value - the Wildcard field tracks status + assert.Equal(t, token.IsWildcard, merged123.Tokens[2].Wildcard) + assert.Equal(t, token.TokenNumeric, merged123.Tokens[2].Type) +} diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go new file mode 100644 index 000000000000..788582678d8c --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern.go @@ -0,0 +1,149 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "strings" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Pattern represents a single pattern within a cluster. +// A cluster with the same signature may contain multiple incompatible patterns +// (e.g., different non-identical special characters that cannot merge). +type Pattern struct { + Template *token.TokenList // The pattern template with wildcards (matches proto "template") + Positions []int // Token indices that are wildcards (matches proto "pos_list") + PatternID uint64 // Unique pattern ID (matches proto "pattern_id") + Sample *token.TokenList // First log sample (for multi-pattern matching) + LogCount int // Total number of logs that matched this pattern + + // Timestamp tracking for stateful encoding + CreatedAt time.Time // When pattern was first created + UpdatedAt time.Time // When pattern was last modified +} + +// newPattern creates a new pattern from a single token list. +func newPattern(tokenList *token.TokenList, patternID uint64) *Pattern { + now := time.Now() + return &Pattern{ + Template: tokenList, // First log becomes initial template + Positions: []int{}, // No wildcards yet + PatternID: patternID, + Sample: tokenList, // Store first log as sample + LogCount: 1, // First log + CreatedAt: now, + UpdatedAt: now, + } +} + +// size returns the number of logs in this pattern. +func (p *Pattern) size() int { + return p.LogCount +} + +// GetPatternString returns the pattern template. +// Pattern template has no wildcard placeholders and wildcard tokens are completely omitted +func (p *Pattern) GetPatternString() string { + if p.Template == nil { + return "" + } + + var parts []string + for _, tok := range p.Template.Tokens { + // Skip wildcard tokens entirely + if tok.Wildcard == token.IsWildcard { + continue + } + // Only use printable ASCII/UTF-8 characters in the template + cleaned := sanitizeForTemplate(tok.Value) + if cleaned != "" { + parts = append(parts, cleaned) + } + } + return strings.Join(parts, "") +} + +// hasWildcards returns true if this pattern contains wildcard positions. +func (p *Pattern) hasWildcards() bool { + return len(p.Positions) > 0 +} + +// GetWildcardCount returns the number of wildcard positions in this pattern. +// This matches the ParamCount that will be sent in PatternDefine. +func (p *Pattern) GetWildcardCount() int { + return len(p.Positions) +} + +// GetWildcardCharPositions returns character indices where dynamic values should be injected. +// The template does NOT contain wildcard placeholders - wildcards are omitted entirely. +// Positions mark the injection points in the template string. +// Example: Template "User logged" (wildcard omitted) returns [5] (inject after "User ") +func (p *Pattern) GetWildcardCharPositions() []int { + if p.Template == nil { + return nil + } + + var charPositions []int + currentPos := 0 + + for _, tok := range p.Template.Tokens { + cleaned := sanitizeForTemplate(tok.Value) + + if tok.Wildcard == token.IsWildcard { + // Mark the injection point (current position in template which excludes wildcards) + charPositions = append(charPositions, currentPos) + // Wildcard tokens are NOT in the template, so don't advance currentPos + } else if cleaned != "" { + // Add the length of the cleaned token value + currentPos += len(cleaned) + } + } + + return charPositions +} + +// GetWildcardValues extracts the wildcard values from a specific TokenList. +func (p *Pattern) GetWildcardValues(tokenList *token.TokenList) []string { + if p.Template == nil || len(p.Positions) == 0 { + return []string{} + } + + // Check if tokenList matches p.Template structure + templateMatches := merging.CanMergeTokenLists(p.Template, tokenList) || merging.CanMergeTokenLists(tokenList, p.Template) + if !templateMatches { + return nil + } + + wildcardValues := make([]string, len(p.Positions)) + + for i, templatePos := range p.Positions { + if templatePos < tokenList.Length() { + wildcardValues[i] = tokenList.Tokens[templatePos].Value + } else { + wildcardValues[i] = "" + } + } + + return wildcardValues +} + +// sanitizeForTemplate removes non-printable characters from template strings +func sanitizeForTemplate(s string) string { + runes := []rune(s) + result := make([]rune, 0, len(runes)) + for _, r := range runes { + // Keep only printable characters (space and above, excluding DEL) + if r >= ' ' && r != 0x7F && r < 0xFFFD { + result = append(result, r) + } + } + return string(result) +} diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go new file mode 100644 index 000000000000..474c72aa82b4 --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern_test.go @@ -0,0 +1,432 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Test-only helper functions + +func TestNewPattern(t *testing.T) { + // Create a simple token list + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + patternID := uint64(12345) + pattern := newPattern(tl, patternID) + + assert.NotNil(t, pattern) + assert.Equal(t, patternID, pattern.PatternID) + assert.Equal(t, tl, pattern.Template, "Template should be the initial token list") + assert.Equal(t, tl, pattern.Sample, "Sample should be the initial token list") + assert.Equal(t, 1, pattern.LogCount, "LogCount should be 1 for first log") + assert.Equal(t, 0, len(pattern.Positions), "No wildcards initially") + assert.False(t, pattern.CreatedAt.IsZero(), "CreatedAt should be set") + assert.False(t, pattern.UpdatedAt.IsZero(), "UpdatedAt should be set") +} + +func TestAddTokenList(t *testing.T) { + // Note: addTokenList() was inlined into Cluster.AddTokenListToPatterns() + // This test now verifies that LogCount and UpdatedAt can be modified directly + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + initialLogCount := pattern.LogCount + initialUpdatedAt := pattern.UpdatedAt + + // Simulate what cluster does when adding to existing pattern + time.Sleep(1 * time.Millisecond) // Ensure time difference + pattern.LogCount++ + pattern.UpdatedAt = time.Now() + + assert.Equal(t, initialLogCount+1, pattern.LogCount, "LogCount should increment") + assert.True(t, pattern.UpdatedAt.After(initialUpdatedAt), "UpdatedAt should be updated") +} + +func TestSize(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + assert.Equal(t, 1, pattern.size()) + + // Simulate adding more logs (what cluster does) + pattern.LogCount++ + assert.Equal(t, 2, pattern.size()) + + pattern.LogCount++ + assert.Equal(t, 3, pattern.size()) +} + +func TestGetPatternString_NoWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + result := pattern.GetPatternString() + + assert.Equal(t, "Service started", result) +} + +func TestGetPatternString_WithWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2} + result := pattern.GetPatternString() + + // Wildcard tokens are omitted from the template + assert.Equal(t, "Service ", result) +} + +func TestGetPatternString_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + } + result := pattern.GetPatternString() + + assert.Equal(t, "", result) +} + +func TestHasWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + // No wildcards initially + assert.False(t, pattern.hasWildcards()) + + // Add wildcard positions + pattern.Positions = []int{1, 3} + assert.True(t, pattern.hasWildcards()) +} + +func TestGetWildcardPositions(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{1, 3, 5} + + assert.Equal(t, []int{1, 3, 5}, pattern.Positions) +} + +// getParamCount returns the number of parameters/wildcards in a pattern. +func getParamCount(p *Pattern) int { + return len(p.Positions) +} + +func TestGetParamCount(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + // No wildcards + assert.Equal(t, 0, getParamCount(pattern)) + + // Add wildcard positions + pattern.Positions = []int{1, 3, 5} + assert.Equal(t, 3, getParamCount(pattern)) +} + +func TestGetWildcardCharPositions(t *testing.T) { + // Create pattern: "Service " (wildcard omitted from template) + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2} + + charPositions := pattern.GetWildcardCharPositions() + // "Service " = 8 chars, wildcard injection point is at position 8 + assert.Equal(t, []int{8}, charPositions) +} + +func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) { + // Create pattern: "Error in " (both wildcards omitted from template) + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Error", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "code", token.IsWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "module", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2, 6} + + charPositions := pattern.GetWildcardCharPositions() + // Template is "Error in " (wildcards omitted): "Error " (6 chars) + " in " (4 chars) = 10 chars + // First wildcard injection at position 6 (after "Error ") + // Second wildcard injection at position 10 (after "Error in ") + assert.Equal(t, []int{6, 10}, charPositions) +} + +func TestGetWildcardCharPositions_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + } + + charPositions := pattern.GetWildcardCharPositions() + assert.Nil(t, charPositions) +} + +func TestGetWildcardValues(t *testing.T) { + // Create sample log: "Service started" + sample := token.NewTokenList() + sample.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + sample.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + sample.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + // Create template with wildcard: "Service *" + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(sample, 12345) + pattern.Template = tl + pattern.Positions = []int{2} + + values := pattern.GetWildcardValues(sample) + assert.Equal(t, []string{"started"}, values) +} + +func TestGetWildcardValues_NilTemplate(t *testing.T) { + sample := token.NewTokenList() + sample.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(sample, 12345) + pattern.Template = nil + + values := pattern.GetWildcardValues(sample) + assert.Empty(t, values) +} + +func TestGetWildcardValues_NilSample(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Sample = nil + pattern.Positions = []int{0} + + // Test with the template itself since sample is nil + values := pattern.GetWildcardValues(tl) + assert.Equal(t, []string{"Test"}, values) +} + +func TestExtractWildcardValues(t *testing.T) { + // Create template: "Service *" + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Template = template + pattern.Positions = []int{2} + + // Create incoming log: "Service crashed" + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "crashed", token.PotentialWildcard)) + + values := pattern.GetWildcardValues(incoming) + assert.Equal(t, []string{"crashed"}, values) +} + +func TestExtractWildcardValues_MultipleWildcards(t *testing.T) { + // Create template: "* in * at *" + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value3", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Template = template + pattern.Positions = []int{0, 4, 8} + + // Create incoming log: "Error in module at line" + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Error", token.PotentialWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "module", token.PotentialWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "line", token.PotentialWildcard)) + + values := pattern.GetWildcardValues(incoming) + assert.Equal(t, []string{"Error", "module", "line"}, values) +} + +func TestExtractWildcardValues_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + Positions: []int{0}, + } + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + values := pattern.GetWildcardValues(incoming) + assert.Equal(t, []string{}, values) +} + +func TestExtractWildcardValues_NoPositions(t *testing.T) { + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(template, 12345) + pattern.Positions = []int{} // No wildcards + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + values := pattern.GetWildcardValues(incoming) + assert.Equal(t, []string{}, values) +} + +func TestExtractWildcardValues_PositionOutOfBounds(t *testing.T) { + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Positions = []int{0, 5} // Position 5 is out of bounds + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Value", token.PotentialWildcard)) + + values := pattern.GetWildcardValues(incoming) + // CRITICAL: Must return same length as Positions to match ParamCount + // Out-of-bounds positions are filled with empty strings + assert.Equal(t, []string{"Value", ""}, values, "Should maintain Positions length with empty strings for out-of-bounds") +} + +func TestSanitizeForTemplate_PrintableChars(t *testing.T) { + input := "Hello World 123" + result := sanitizeForTemplate(input) + assert.Equal(t, "Hello World 123", result) +} + +func TestSanitizeForTemplate_NonPrintableChars(t *testing.T) { + // Include null byte, bell, backspace + input := "Hello\x00\x07\x08World" + result := sanitizeForTemplate(input) + assert.Equal(t, "HelloWorld", result, "Non-printable characters should be removed") +} + +func TestSanitizeForTemplate_DELCharacter(t *testing.T) { + input := "Hello\x7FWorld" + result := sanitizeForTemplate(input) + assert.Equal(t, "HelloWorld", result, "DEL character should be removed") +} + +func TestSanitizeForTemplate_SpecialChars(t *testing.T) { + input := "Service: Error! @user #tag" + result := sanitizeForTemplate(input) + assert.Equal(t, "Service: Error! @user #tag", result, "Special chars should be kept") +} + +func TestSanitizeForTemplate_EmptyString(t *testing.T) { + input := "" + result := sanitizeForTemplate(input) + assert.Equal(t, "", result) +} + +func TestSanitizeForTemplate_UnicodeChars(t *testing.T) { + input := "Hello 世界 🌍" + result := sanitizeForTemplate(input) + // Emoji (🌍) is above 0xFFFD and gets filtered out by sanitizeForTemplate + // CJK characters (世界) are within the acceptable range + assert.Equal(t, "Hello 世界 ", result, "CJK chars preserved, emoji filtered") +} + +func TestPattern_IntegrationScenario(t *testing.T) { + // Simulate a realistic pattern lifecycle + + // 1. First log arrives + log1 := token.NewTokenList() + log1.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "Database", token.PotentialWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "connection", token.PotentialWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "failed", token.PotentialWildcard)) + + pattern := newPattern(log1, 9999) + + assert.Equal(t, 1, pattern.LogCount) + assert.False(t, pattern.hasWildcards()) + assert.Equal(t, "ERROR: Database connection failed", pattern.GetPatternString()) + + // 2. Pattern updated with wildcards (simulated) + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern.Template = template + pattern.Positions = []int{3, 5, 7} + pattern.LogCount++ // Simulate second log being added + pattern.UpdatedAt = time.Now() + + assert.Equal(t, 2, pattern.LogCount) + assert.True(t, pattern.hasWildcards()) + assert.Equal(t, 3, getParamCount(pattern)) + // Wildcard tokens are omitted from template, leaving: "ERROR: " + " " + " " = "ERROR: " + assert.Equal(t, "ERROR: ", pattern.GetPatternString()) + + // 3. Extract wildcard values from new log + log2 := token.NewTokenList() + log2.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "Network", token.PotentialWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "timeout", token.PotentialWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "reached", token.PotentialWildcard)) + + values := pattern.GetWildcardValues(log2) + assert.Equal(t, []string{"Network", "timeout", "reached"}, values) +} diff --git a/pkg/logs/patterns/token/signature.go b/pkg/logs/patterns/token/signature.go new file mode 100644 index 000000000000..7f8411d39b56 --- /dev/null +++ b/pkg/logs/patterns/token/signature.go @@ -0,0 +1,95 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "fmt" + "hash/fnv" + "strings" +) + +// Signature represents a structural signature of a TokenList +type Signature struct { + Position string + Length int + Hash uint64 +} + +// NewSignature creates a signature from a TokenList +func NewSignature(tl *TokenList) Signature { + if tl.IsEmpty() { + return Signature{ + Position: "", + Length: 0, + Hash: 0, + } + } + + position := positionSignature(tl) + + // Include first word token value in signature if it exists + // This prevents messages with different first words but similar signature from being in the same cluster + // eg: I love burger vs You love burger + if len(tl.Tokens) > 0 && tl.Tokens[0].Type == TokenWord { + firstWordValue := tl.Tokens[0].Value + position = firstWordValue + position + } + + hash := computeHash(position) + return Signature{ + Position: position, + Length: len(tl.Tokens), + Hash: hash, + } +} + +// Equals checks if two signatures are identical +func (s *Signature) Equals(other Signature) bool { + return s.Position == other.Position && + s.Length == other.Length +} + +// computeHash generates a hash for the signature +func computeHash(input string) uint64 { + hash := fnv.New64a() + hash.Write([]byte(input)) + return hash.Sum64() +} + +// String returns a string representation of the signature +func (s *Signature) String() string { + return fmt.Sprintf("Sig{pos:%s, len:%d, hash:%x}", + s.Position, s.Length, s.Hash) +} + +// IsEmpty returns true if the signature represents an empty TokenList +func (s *Signature) IsEmpty() bool { + return s.Length == 0 +} + +// HasSameStructure checks if two signatures have the same positional structure +func (s *Signature) HasSameStructure(other Signature) bool { + return s.Position == other.Position && s.Length == other.Length +} + +// GetHashBucket returns the hash bucket for efficient clustering +func (s *Signature) GetHashBucket() uint64 { + return s.Hash +} + +// positionSignature generates position-based signature +func positionSignature(tl *TokenList) string { + if tl.IsEmpty() { + return "" + } + + var positionParts []string + for _, token := range tl.Tokens { + positionParts = append(positionParts, token.Type.String()) + } + return strings.Join(positionParts, "|") +} diff --git a/pkg/logs/patterns/token/signature_test.go b/pkg/logs/patterns/token/signature_test.go new file mode 100644 index 000000000000..885062fa90c4 --- /dev/null +++ b/pkg/logs/patterns/token/signature_test.go @@ -0,0 +1,241 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" +) + +func TestNewSignature(t *testing.T) { + // Empty TokenList + emptyTL := NewTokenList() + emptySig := NewSignature(emptyTL) + if emptySig.Position != "" || emptySig.Length != 0 || emptySig.Hash != 0 { + t.Error("Empty TokenList should have empty signature") + } + + // Non-empty TokenList + tokens := []Token{ + {Type: TokenHTTPMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenHTTPStatus, Value: "200"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus" + if sig.Position != expectedPosition { + t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, sig.Position) + } + + if sig.Length != 5 { + t.Errorf("Expected length 5, got %d", sig.Length) + } + + if sig.Hash == 0 { + t.Error("Hash should not be 0 for non-empty TokenList") + } +} + +func TestSignature_Equals(t *testing.T) { + // Test 1: Same structure, SAME first word, different other values → EQUAL signatures + tokens1 := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tokens2 := []Token{ + {Type: TokenWord, Value: "hello"}, // Same first word! + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "universe"}, // Different second word + } + tl1 := NewTokenListWithTokens(tokens1) + tl2 := NewTokenListWithTokens(tokens2) + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + + if !sig1.Equals(sig2) { + t.Error("TokenLists with same first word and structure should have equal signatures") + } + + // Test 2: Same structure, DIFFERENT first word → DIFFERENT signatures + tokens3 := []Token{ + {Type: TokenWord, Value: "goodbye"}, // Different first word + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tl3 := NewTokenListWithTokens(tokens3) + sig3 := NewSignature(tl3) + + if sig1.Equals(sig3) { + t.Error("TokenLists with different first word should NOT have equal signatures") + } + + // Test 3: Different structure (different types) → DIFFERENT signatures + tokens4 := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenNumeric, Value: "123"}, // Different type + } + tl4 := NewTokenListWithTokens(tokens4) + sig4 := NewSignature(tl4) + + if sig1.Equals(sig4) { + t.Error("TokenLists with different structure should not have equal signatures") + } + + // Test 4: Signature equality with itself + if !sig1.Equals(sig1) { + t.Error("Signature should equal itself") + } +} + +func TestSignature_String(t *testing.T) { + tokens := []Token{ + {Type: TokenWord, Value: "test"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + str := sig.String() + if str == "" { + t.Error("Signature string should not be empty") + } + + // Should contain key components + if !containsAll(str, []string{"pos:", "len:", "hash:"}) { + t.Errorf("Signature string should contain all components, got: %s", str) + } +} + +func TestSignature_IsEmpty(t *testing.T) { + // Empty signature + emptyTL := NewTokenList() + emptySig := NewSignature(emptyTL) + if !emptySig.IsEmpty() { + t.Error("Empty signature should return true for IsEmpty()") + } + + // Non-empty signature + tokens := []Token{{Type: TokenWord, Value: "test"}} + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + if sig.IsEmpty() { + t.Error("Non-empty signature should return false for IsEmpty()") + } +} + +func TestSignature_HasSameStructure(t *testing.T) { + // Same structure, different values + tokens1 := []Token{ + {Type: TokenHTTPMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tokens2 := []Token{ + {Type: TokenHTTPMethod, Value: "POST"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/users"}, + } + + tl1 := NewTokenListWithTokens(tokens1) + tl2 := NewTokenListWithTokens(tokens2) + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + + if !sig1.HasSameStructure(sig2) { + t.Error("Signatures with same structure should return true") + } + + // Different structure + tokens3 := []Token{ + {Type: TokenWord, Value: "different"}, + {Type: TokenNumeric, Value: "123"}, + } + tl3 := NewTokenListWithTokens(tokens3) + sig3 := NewSignature(tl3) + + if sig1.HasSameStructure(sig3) { + t.Error("Signatures with different structure should return false") + } +} + +func TestSignature_GetHashBucket(t *testing.T) { + tokens := []Token{ + {Type: TokenWord, Value: "test"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + hashBucket := sig.GetHashBucket() + if hashBucket != sig.Hash { + t.Error("GetHashBucket should return the signature hash") + } + if hashBucket == 0 { + t.Error("Hash bucket should not be 0 for non-empty signature") + } +} + +func TestComputeHash(t *testing.T) { + // Test that same input produces same hash + input1 := "test input" + input2 := "test input" + input3 := "different input" + + hash1 := computeHash(input1) + hash2 := computeHash(input2) + hash3 := computeHash(input3) + + if hash1 != hash2 { + t.Error("Same input should produce same hash") + } + if hash1 == hash3 { + t.Error("Different input should produce different hash (very likely)") + } + if hash1 == 0 { + t.Error("Hash should not be 0") + } +} + +func TestSignature_ConsistentHashing(t *testing.T) { + // Test that identical TokenLists produce identical signatures with same hash + tokens := []Token{ + {Type: TokenHTTPMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + + tl1 := NewTokenListWithTokens(tokens) + tl2 := NewTokenListWithTokens(tokens) + + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + + if sig1.Hash != sig2.Hash { + t.Error("Identical TokenLists should produce identical signature hashes") + } + if !sig1.Equals(sig2) { + t.Error("Identical TokenLists should produce equal signatures") + } +} + +// Helper function to check if string contains all substrings +func containsAll(str string, substrings []string) bool { + for _, substr := range substrings { + found := false + for i := 0; i <= len(str)-len(substr); i++ { + if str[i:i+len(substr)] == substr { + found = true + break + } + } + if !found { + return false + } + } + return true +} diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go new file mode 100644 index 000000000000..89b0dcdea904 --- /dev/null +++ b/pkg/logs/patterns/token/token.go @@ -0,0 +1,131 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "fmt" +) + +//go:generate stringer -type=TokenType -trimprefix=Token + +// TokenType.String() method is auto-generated by stringer +// Run: go generate ./pkg/logs/patterns/token to regenerate the stringer file if you make changes to the TokenType enum + +// TokenType represents the type of a token +type TokenType int + +const ( + // Basic token types + TokenUnknown TokenType = iota // TokenUnknown is the unknown token type + TokenWord // TokenWord is the word token type + TokenNumeric // TokenNumeric is the numeric token type + TokenWhitespace // TokenWhitespace is the whitespace token type + + // Network-related tokens + TokenIPv4 // TokenIPv4 is the IPv4 token type + TokenIPv6 // TokenIPv6 is the IPv6 token type + TokenEmail // TokenEmail is the email token type + TokenURI // TokenURI is the URI token type + TokenAbsolutePath // TokenAbsolutePath is the absolute path token type + + // HTTP-related tokens + TokenHTTPMethod // TokenHTTPMethod is the HTTP method token type + TokenHTTPStatus // TokenHTTPStatus is the HTTP status token type + + // Log-related tokens + TokenSeverityLevel // TokenSeverityLevel is the severity level token type + TokenDate // TokenDate is the date token type +) + +// WildcardStatus describes a token's potential to become a wildcard +type WildcardStatus int + +const ( + // NotWildcard - This token cannot become a wildcard + // Examples: whitespace or first word token + NotWildcard WildcardStatus = iota + + // PotentialWildcard - This token can become a wildcard + // Examples: all non white space tokens + PotentialWildcard + + // IsWildcard - This token is already a wildcard + IsWildcard +) + +// MergeResult describes the result of comparing two tokens +type MergeResult int + +const ( + // Conflict - Tokens cannot merge, abort pattern creation + // Examples: different types, words with different values + Conflict MergeResult = iota + + // Identical - Tokens are the same, keep as-is + // Examples: "Error" vs "Error", wildcard vs any value of same type + Identical + + // Wildcard - Tokens can merge into wildcard + // Examples: "connection" vs "replication", "user123" vs "admin456", "GET" vs "POST" + Wildcard +) + +// Token represents a single token in a log message +type Token struct { + Type TokenType + Value string + Wildcard WildcardStatus +} + +// NewToken creates a token with the specified wildcard status +func NewToken(tokenType TokenType, value string, wildcard WildcardStatus) Token { + return Token{ + Type: tokenType, + Value: value, + Wildcard: wildcard, + } +} + +// String returns a string representation of the token +func (t *Token) String() string { + return fmt.Sprintf("%s(%s)", t.Type, t.Value) +} + +// Compare checks if two tokens can merge +func (t *Token) Compare(t2 *Token) MergeResult { + // Different types cannot merge + if t.Type != t2.Type { + return Conflict + } + + // Same type same value - check this first before type-specific logic + if t.Value == t2.Value { + return Identical + } + + // t is wildcard - matches any value of same type + if t.Wildcard == IsWildcard { + return Identical + } + + // Whitespace never wildcards (structural) + if t.Type == TokenWhitespace { + return Conflict + } + + // Words only wildcard if both are PotentialWildcard + if t.Type == TokenWord { + if t.Wildcard == PotentialWildcard && t2.Wildcard == PotentialWildcard { + return Wildcard + } + return Conflict + } + + // Structured types (HTTP, IP, Numeric, Date, etc.) wildcard if same type + // Same TokenDate type means same format structure (e.g., both RFC3339) + return Wildcard +} diff --git a/pkg/logs/patterns/token/token_test.go b/pkg/logs/patterns/token/token_test.go new file mode 100644 index 000000000000..b9a81b68479e --- /dev/null +++ b/pkg/logs/patterns/token/token_test.go @@ -0,0 +1,103 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewToken(t *testing.T) { + token := NewToken(TokenWord, "test", PotentialWildcard) + + assert.Equal(t, TokenWord, token.Type, "Expected TokenWord") + assert.Equal(t, "test", token.Value, "Expected 'test'") + assert.Equal(t, PotentialWildcard, token.Wildcard, "Expected PotentialWildcard") +} + +func TestToken_Compare_DifferentTypes(t *testing.T) { + word := NewToken(TokenWord, "hello", PotentialWildcard) + number := NewToken(TokenNumeric, "123", PotentialWildcard) + + result := word.Compare(&number) + assert.Equal(t, Conflict, result, "Different types should return Conflict") +} + +func TestToken_Compare_SameValue(t *testing.T) { + token1 := NewToken(TokenWord, "hello", PotentialWildcard) + token2 := NewToken(TokenWord, "hello", PotentialWildcard) + + result := token1.Compare(&token2) + assert.Equal(t, Identical, result, "Same values should return Identical") +} + +func TestToken_Compare_WildcardMatches(t *testing.T) { + wildcard := NewToken(TokenWord, "anything", IsWildcard) + concrete := NewToken(TokenWord, "hello", PotentialWildcard) + + result := wildcard.Compare(&concrete) + assert.Equal(t, Identical, result, "Wildcard should match any value of same type") +} + +func TestToken_Compare_WhitespaceConflict(t *testing.T) { + space1 := NewToken(TokenWhitespace, " ", NotWildcard) + space2 := NewToken(TokenWhitespace, " ", NotWildcard) + + result := space1.Compare(&space2) + assert.Equal(t, Conflict, result, "Different whitespace should return Conflict") +} + +func TestToken_Compare_WordsWithDifferentValues(t *testing.T) { + // Both PotentialWildcard - should merge to wildcard + word1 := NewToken(TokenWord, "hello", PotentialWildcard) + word2 := NewToken(TokenWord, "world", PotentialWildcard) + + result := word1.Compare(&word2) + assert.Equal(t, Wildcard, result, "Different PotentialWildcard words should return Wildcard") + + // One is NotWildcard - should conflict + word3 := NewToken(TokenWord, "INFO", NotWildcard) + word4 := NewToken(TokenWord, "ERROR", PotentialWildcard) + + result2 := word3.Compare(&word4) + assert.Equal(t, Conflict, result2, "Words with NotWildcard should return Conflict") +} + +func TestToken_Compare_StructuredTypes(t *testing.T) { + // Different IPs should merge to wildcard + ip1 := NewToken(TokenIPv4, "192.168.1.1", PotentialWildcard) + ip2 := NewToken(TokenIPv4, "10.0.0.1", PotentialWildcard) + + result := ip1.Compare(&ip2) + assert.Equal(t, Wildcard, result, "Different structured types (same type) should return Wildcard") + + // Different numbers should merge to wildcard + num1 := NewToken(TokenNumeric, "123", PotentialWildcard) + num2 := NewToken(TokenNumeric, "456", PotentialWildcard) + + result2 := num1.Compare(&num2) + assert.Equal(t, Wildcard, result2, "Different numeric values should return Wildcard") + + // Different dates should merge to wildcard + date1 := NewToken(TokenDate, "2023-01-01", PotentialWildcard) + date2 := NewToken(TokenDate, "2023-12-31", PotentialWildcard) + + result3 := date1.Compare(&date2) + assert.Equal(t, Wildcard, result3, "Different dates should return Wildcard") +} + +func TestToken_String(t *testing.T) { + // Regular token + token := Token{Type: TokenWord, Value: "hello"} + expected := "Word(hello)" + assert.Equal(t, expected, token.String(), "Token String() should format correctly") + + // Wildcard token - still shows the value, not "*" + wildcardToken := Token{Type: TokenWord, Value: "test", Wildcard: IsWildcard} + expectedWildcard := "Word(test)" + assert.Equal(t, expectedWildcard, wildcardToken.String(), "Wildcard token String() should show value") +} diff --git a/pkg/logs/patterns/token/tokenlist.go b/pkg/logs/patterns/token/tokenlist.go new file mode 100644 index 000000000000..8cdbfa915fe7 --- /dev/null +++ b/pkg/logs/patterns/token/tokenlist.go @@ -0,0 +1,59 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "strings" +) + +// TokenList represents a sequence of tokens +type TokenList struct { + Tokens []Token +} + +// NewTokenList creates a new empty TokenList +func NewTokenList() *TokenList { + return &TokenList{Tokens: make([]Token, 0)} +} + +// NewTokenListWithTokens creates a new TokenList with the provided tokens +func NewTokenListWithTokens(tokens []Token) *TokenList { + return &TokenList{Tokens: tokens} +} + +// Add appends one or more tokens to the list +func (tl *TokenList) Add(tokens ...Token) { + tl.Tokens = append(tl.Tokens, tokens...) +} + +// AddToken creates and adds a new token with the given type and value +func (tl *TokenList) AddToken(tokenType TokenType, value string, wildcard WildcardStatus) { + tl.Tokens = append(tl.Tokens, NewToken(tokenType, value, wildcard)) +} + +// Length returns the number of tokens +func (tl *TokenList) Length() int { + return len(tl.Tokens) +} + +// IsEmpty returns true if the list is empty +func (tl *TokenList) IsEmpty() bool { + return len(tl.Tokens) == 0 +} + +// String returns a string representation +func (tl *TokenList) String() string { + if tl.IsEmpty() { + return "[]" + } + + var parts []string + for _, token := range tl.Tokens { + parts = append(parts, token.String()) + } + return "[" + strings.Join(parts, ", ") + "]" +} diff --git a/pkg/logs/patterns/token/tokenlist_test.go b/pkg/logs/patterns/token/tokenlist_test.go new file mode 100644 index 000000000000..8a6b571f6f3d --- /dev/null +++ b/pkg/logs/patterns/token/tokenlist_test.go @@ -0,0 +1,115 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" +) + +func TestTokenList_NewTokenList(t *testing.T) { + // Empty token list + tl := NewTokenList() + if tl == nil { + t.Fatal("NewTokenList should not return nil") + } + if !tl.IsEmpty() { + t.Error("New TokenList should be empty") + } + if tl.Length() != 0 { + t.Errorf("New TokenList should have length 0, got %d", tl.Length()) + } + + // Token list with initial tokens + tokens := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tl2 := NewTokenListWithTokens(tokens) + if tl2.Length() != 3 { + t.Errorf("Expected length 3, got %d", tl2.Length()) + } + if tl2.IsEmpty() { + t.Error("TokenList with tokens should not be empty") + } +} + +func TestTokenList_Add(t *testing.T) { + tl := NewTokenList() + + token1 := Token{Type: TokenWord, Value: "hello"} + tl.Add(token1) + + if tl.Length() != 1 { + t.Errorf("Expected length 1, got %d", tl.Length()) + } + if tl.IsEmpty() { + t.Error("TokenList should not be empty after adding token") + } + if tl.Tokens[0].Value != "hello" { + t.Errorf("Expected token value 'hello', got '%s'", tl.Tokens[0].Value) + } +} + +func TestTokenList_String(t *testing.T) { + // Empty list + tl := NewTokenList() + if tl.String() != "[]" { + t.Errorf("Empty TokenList string should be '[]', got '%s'", tl.String()) + } + + // Non-empty list + tl.Add(Token{Type: TokenWord, Value: "hello"}) + tl.Add(Token{Type: TokenWhitespace, Value: " "}) + tl.Add(Token{Type: TokenWord, Value: "world"}) + + expected := "[Word(hello), Whitespace( ), Word(world)]" + if tl.String() != expected { + t.Errorf("Expected '%s', got '%s'", expected, tl.String()) + } +} + +func TestTokenList_PositionSignature(t *testing.T) { + // Empty token list + emptyTL := NewTokenList() + if positionSignature(emptyTL) != "" { + t.Error("Empty TokenList should have empty position signature") + } + + // Non-empty token list + tokens := []Token{ + {Type: TokenHTTPMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tl := NewTokenListWithTokens(tokens) + + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath" + if positionSignature(tl) != expectedPosition { + t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, positionSignature(tl)) + } +} + +func TestTokenList_Signature(t *testing.T) { + // Test that TokenList.Signature() creates a proper signature + tokens := []Token{ + {Type: TokenHTTPMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + if sig.Length != 3 { + t.Errorf("Expected signature length 3, got %d", sig.Length) + } + if sig.Hash == 0 { + t.Error("Signature hash should not be 0") + } + if sig.Position == "" { + t.Error("Signature position should not be empty") + } +} diff --git a/pkg/logs/patterns/token/tokentype_string.go b/pkg/logs/patterns/token/tokentype_string.go new file mode 100644 index 000000000000..91b756894d27 --- /dev/null +++ b/pkg/logs/patterns/token/tokentype_string.go @@ -0,0 +1,36 @@ +// Code generated by "stringer -type=TokenType -trimprefix=Token"; DO NOT EDIT. + +package token + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokenUnknown-0] + _ = x[TokenWord-1] + _ = x[TokenNumeric-2] + _ = x[TokenWhitespace-3] + _ = x[TokenIPv4-4] + _ = x[TokenIPv6-5] + _ = x[TokenEmail-6] + _ = x[TokenURI-7] + _ = x[TokenAbsolutePath-8] + _ = x[TokenHTTPMethod-9] + _ = x[TokenHTTPStatus-10] + _ = x[TokenSeverityLevel-11] + _ = x[TokenDate-12] +} + +const _TokenType_name = "UnknownWordNumericWhitespaceIPv4IPv6EmailURIAbsolutePathHTTPMethodHTTPStatusSeverityLevelDate" + +var _TokenType_index = [...]uint8{0, 7, 11, 18, 28, 32, 36, 41, 44, 56, 66, 76, 89, 93} + +func (i TokenType) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_TokenType_index)-1 { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[idx]:_TokenType_index[idx+1]] +} diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go index 499209b0d313..9f36a896cc28 100644 --- a/pkg/logs/pipeline/pipeline.go +++ b/pkg/logs/pipeline/pipeline.go @@ -20,7 +20,9 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/processor" "github.com/DataDog/datadog-agent/pkg/logs/sender" + grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc" compressioncommon "github.com/DataDog/datadog-agent/pkg/util/compression" + "github.com/DataDog/datadog-agent/pkg/util/log" ) // Pipeline processes and sends messages to the backend @@ -54,6 +56,10 @@ func NewPipeline( } else { encoder = processor.JSONServerlessInitEncoder } + } else if endpoints.UseGRPC { + // Throwaway code to test with existing pipelines + // TODO change to real encoder once State component is ready + encoder = grpcsender.MockEncoder } else if endpoints.UseHTTP { encoder = processor.JSONEncoder } else if endpoints.UseProto { @@ -105,13 +111,20 @@ func getStrategy( compressor logscompression.Component, instanceID string, ) sender.Strategy { - if endpoints.UseHTTP || serverlessMeta.IsEnabled() { + if endpoints.UseGRPC || endpoints.UseHTTP || serverlessMeta.IsEnabled() { var encoder compressioncommon.Compressor encoder = compressor.NewCompressor(compressioncommon.NoneKind, 0) if endpoints.Main.UseCompression { encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) } + if endpoints.UseGRPC { + translator := grpcsender.NewMessageTranslator() + // TODO: Consider sharing cluster manager across pipelines for better pattern clustering: + // translator := grpcsender.NewMessageTranslator(getSharedClusterManager()) + statefulInputChan := translator.Start(inputChan, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size")) + return grpcsender.NewBatchStrategy(statefulInputChan, outputChan, flushChan, endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID) + } return sender.NewBatchStrategy( inputChan, outputChan, @@ -125,5 +138,7 @@ func getStrategy( pipelineMonitor, instanceID) } + + log.Infof("Pipeline: Using StreamStrategy (default)") return sender.NewStreamStrategy(inputChan, outputChan, compressor.NewCompressor(compressioncommon.NoneKind, 0)) } diff --git a/pkg/logs/pipeline/provider.go b/pkg/logs/pipeline/provider.go index 9737f8a5c007..ad8c3002b800 100644 --- a/pkg/logs/pipeline/provider.go +++ b/pkg/logs/pipeline/provider.go @@ -22,6 +22,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/sender" + grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc" httpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/http" tcpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/tcp" "github.com/DataDog/datadog-agent/pkg/logs/status/statusinterface" @@ -88,7 +89,9 @@ func NewProvider( var senderImpl sender.PipelineComponent serverlessMeta := sender.NewServerlessMeta(serverless) - if endpoints.UseHTTP { + if endpoints.UseGRPC { + senderImpl = grpcsender.NewSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext) + } else if endpoints.UseHTTP { senderImpl = httpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, serverlessMeta, legacyMode) } else { senderImpl = tcpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, status, serverlessMeta, legacyMode) diff --git a/pkg/logs/processor/processor.go b/pkg/logs/processor/processor.go index 6bff04fd564a..a4583b491aae 100644 --- a/pkg/logs/processor/processor.go +++ b/pkg/logs/processor/processor.go @@ -18,6 +18,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/diagnostic" "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -51,6 +52,9 @@ type Processor struct { configChan chan failoverConfig failoverConfig failoverConfig + // Pattern extraction components + clusterManager *clustering.ClusterManager + // Telemetry pipelineMonitor metrics.PipelineMonitor utilization metrics.UtilizationMonitor diff --git a/pkg/logs/sender/grpc/batch_strategy.go b/pkg/logs/sender/grpc/batch_strategy.go new file mode 100644 index 000000000000..3bac15e1c61b --- /dev/null +++ b/pkg/logs/sender/grpc/batch_strategy.go @@ -0,0 +1,260 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//nolint:revive // TODO(AML) Fix revive linter +package grpc + +import ( + "time" + + "github.com/benbjohnson/clock" + "google.golang.org/protobuf/proto" + + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" + "github.com/DataDog/datadog-agent/pkg/telemetry" + "github.com/DataDog/datadog-agent/pkg/util/compression" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +var ( + tlmDroppedTooLarge = telemetry.NewCounter("logs_sender_grpc_batch_strategy", "dropped_too_large", []string{"pipeline"}, "Number of payloads dropped due to being too large") +) + +// StatefulExtra holds state changes (non-Log datums) from a batch +// Used by inflight tracker to maintain snapshot state for stream rotation +type StatefulExtra struct { + StateChanges []*statefulpb.Datum +} + +// isStateDatum returns true if the datum represents a state change +// (pattern/dict define/delete operations) +func isStateDatum(datum *statefulpb.Datum) bool { + switch datum.Data.(type) { + case *statefulpb.Datum_PatternDefine, *statefulpb.Datum_PatternDelete, + *statefulpb.Datum_DictEntryDefine, *statefulpb.Datum_DictEntryDelete: + return true + default: + return false + } +} + +// batchStrategy contains batching logic for gRPC sender without serializer +// It collects Datum objects from StatefulMessages and creates Payload with serialized DatumSequence +// Note: Serverless logs are not supported in this PoC implementation +type batchStrategy struct { + inputChan chan *message.StatefulMessage + outputChan chan *message.Payload + flushChan chan struct{} + buffer *sender.MessageBuffer + pipelineName string + batchWait time.Duration + compression compression.Compressor + stopChan chan struct{} // closed when the goroutine has finished + clock clock.Clock + + // For gRPC: store Datums separately since MessageBuffer only stores metadata + grpcDatums []*statefulpb.Datum + + // Telemetry + pipelineMonitor metrics.PipelineMonitor + utilization metrics.UtilizationMonitor + instanceID string +} + +// NewBatchStrategy returns a new gRPC batch strategy +func NewBatchStrategy(inputChan chan *message.StatefulMessage, + outputChan chan *message.Payload, + flushChan chan struct{}, + batchWait time.Duration, + maxBatchSize int, + maxContentSize int, + pipelineName string, + compression compression.Compressor, + pipelineMonitor metrics.PipelineMonitor, + instanceID string, +) sender.Strategy { + return newBatchStrategyWithClock(inputChan, outputChan, flushChan, batchWait, maxBatchSize, maxContentSize, pipelineName, clock.New(), compression, pipelineMonitor, instanceID) +} + +func newBatchStrategyWithClock(inputChan chan *message.StatefulMessage, + outputChan chan *message.Payload, + flushChan chan struct{}, + batchWait time.Duration, + maxBatchSize int, + maxContentSize int, + pipelineName string, + clock clock.Clock, + compression compression.Compressor, + pipelineMonitor metrics.PipelineMonitor, + instanceID string, +) sender.Strategy { + + return &batchStrategy{ + inputChan: inputChan, + outputChan: outputChan, + flushChan: flushChan, + buffer: sender.NewMessageBuffer(maxBatchSize, maxContentSize), + batchWait: batchWait, + compression: compression, + stopChan: make(chan struct{}), + pipelineName: pipelineName, + clock: clock, + grpcDatums: make([]*statefulpb.Datum, 0), + pipelineMonitor: pipelineMonitor, + utilization: pipelineMonitor.MakeUtilizationMonitor(metrics.StrategyTlmName, instanceID), + instanceID: instanceID, + } +} + +// Mostly copy/pasted from sender/bactch_strategy.go +func (s *batchStrategy) Stop() { + close(s.inputChan) + <-s.stopChan +} + +// Mostly copy/pasted from sender/bactch_strategy.go +func (s *batchStrategy) Start() { + go func() { + flushTicker := s.clock.Ticker(s.batchWait) + defer func() { + s.flushBuffer(s.outputChan) + flushTicker.Stop() + close(s.stopChan) + }() + for { + select { + case m, isOpen := <-s.inputChan: + if !isOpen { + // inputChan has been closed, no more payloads are expected + return + } + s.processMessage(m, s.outputChan) + case <-flushTicker.C: + // flush the payloads at a regular interval so pending messages don't wait here for too long. + s.flushBuffer(s.outputChan) + case <-s.flushChan: + // flush payloads on demand, used for infrequently running serverless functions + s.flushBuffer(s.outputChan) + } + } + }() +} + +func (s *batchStrategy) addMessage(m *message.StatefulMessage) bool { + // No utilization tracking here - just trivial slice operations + // Real work (proto marshaling) is tracked in sendMessagesWithDatums() + + // Defensive check - should never happen with proper message construction + if m.Datum == nil { + return false + } + + // Try to add to buffer + if s.buffer.AddMessageWithSize(m.Metadata, m.Metadata.RawDataLen) { + s.grpcDatums = append(s.grpcDatums, m.Datum) + return true + } + + // Buffer full (not an error) + return false +} + +// Mostly copy/pasted from batch.go +func (s *batchStrategy) processMessage(m *message.StatefulMessage, outputChan chan *message.Payload) { + // Track latency stats from metadata + if m.Metadata.Origin != nil { + m.Metadata.Origin.LogSource.LatencyStats.Add(m.Metadata.GetLatency()) + } + + added := s.addMessage(m) + if !added || s.buffer.IsFull() { + s.flushBuffer(outputChan) + } + if !added { + // it's possible that the m could not be added because the buffer was full + // so we need to retry once again + added = s.addMessage(m) + if !added { + log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, m.Metadata.RawDataLen, s.buffer.ContentSizeLimit()) + tlmDroppedTooLarge.Inc(s.pipelineName) + } + } +} + +// flushBuffer sends all the messages that are stored in the buffer and forwards them +// to the next stage of the pipeline. +func (s *batchStrategy) flushBuffer(outputChan chan *message.Payload) { + if s.buffer.IsEmpty() { + return + } + + s.utilization.Start() + + messagesMetadata := s.buffer.GetMessages() + s.buffer.Clear() + + // Use the collected Datums and clear them + grpcDatums := s.grpcDatums + s.grpcDatums = make([]*statefulpb.Datum, 0) + + s.sendMessagesWithDatums(messagesMetadata, grpcDatums, outputChan) +} + +func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.MessageMetadata, grpcDatums []*statefulpb.Datum, outputChan chan *message.Payload) { + defer s.utilization.Stop() + + unencodedSize := 0 + for _, msgMeta := range messagesMetadata { + unencodedSize += msgMeta.RawDataLen + } + + // Extract all state changes from this batch for snapshot management + var stateChanges []*statefulpb.Datum + for _, datum := range grpcDatums { + if isStateDatum(datum) { + stateChanges = append(stateChanges, datum) + } + } + + // Create DatumSequence and marshal to bytes + datumSeq := &statefulpb.DatumSequence{ + Data: grpcDatums, + } + + serialized, err := proto.Marshal(datumSeq) + if err != nil { + log.Errorf("Failed to marshal DatumSequence: %v", err) + return + } + + // Compress the serialized protobuf data + compressed, err := s.compression.Compress(serialized) + if err != nil { + log.Errorf("Failed to compress DatumSequence: %v", err) + return + } + + // Create payload with compressed data + p := &message.Payload{ + MessageMetas: messagesMetadata, + Encoded: compressed, + Encoding: s.compression.ContentEncoding(), + UnencodedSize: unencodedSize, + } + + // Store batch-level state changes in payload + if len(stateChanges) > 0 { + p.StatefulExtra = &StatefulExtra{ + StateChanges: stateChanges, + } + } + + outputChan <- p + s.pipelineMonitor.ReportComponentEgress(p, metrics.StrategyTlmName, s.instanceID) + s.pipelineMonitor.ReportComponentIngress(p, metrics.SenderTlmName, metrics.SenderTlmInstanceID) +} diff --git a/pkg/logs/sender/grpc/batch_strategy_test.go b/pkg/logs/sender/grpc/batch_strategy_test.go new file mode 100644 index 000000000000..4a88d2deb216 --- /dev/null +++ b/pkg/logs/sender/grpc/batch_strategy_test.go @@ -0,0 +1,654 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package grpc + +import ( + "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + compressionfx "github.com/DataDog/datadog-agent/comp/serializer/logscompression/fx-mock" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" + "github.com/DataDog/datadog-agent/pkg/util/compression" +) + +// Helper to create test StatefulMessage with Datum +func createTestStatefulMessage(content string) *message.StatefulMessage { + msg := message.NewMessage([]byte(content), nil, "", 0) + msg.MessageMetadata.RawDataLen = len(content) + + datum := &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ + Timestamp: 12345, + Content: &statefulpb.Log_Raw{ + Raw: content, + }, + }, + }, + } + + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: datum, + } +} + +func TestBatchStrategySendsPayloadWhenBufferIsFull(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + s := NewBatchStrategy( + input, + output, + flushChan, + 100*time.Millisecond, + 2, // maxBatchSize + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message1 := createTestStatefulMessage("a") + input <- message1 + + message2 := createTestStatefulMessage("b") + input <- message2 + + // Expect payload to be sent because buffer is full + payload := <-output + assert.Equal(t, 2, len(payload.MessageMetas)) + assert.Equal(t, message1.Metadata, payload.MessageMetas[0]) + assert.Equal(t, message2.Metadata, payload.MessageMetas[1]) + assert.Equal(t, "identity", payload.Encoding) + assert.Equal(t, 2, payload.UnencodedSize) + + // Verify the payload contains valid DatumSequence + var datumSeq statefulpb.DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 2, len(datumSeq.Data)) + assert.Equal(t, "a", datumSeq.Data[0].GetLogs().GetRaw()) + assert.Equal(t, "b", datumSeq.Data[1].GetLogs().GetRaw()) + + s.Stop() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategySendsPayloadWhenBufferIsOutdated(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + timerInterval := 100 * time.Millisecond + + clk := clock.NewMock() + s := newBatchStrategyWithClock( + input, + output, + flushChan, + timerInterval, + 100, // maxBatchSize + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + for round := 0; round < 3; round++ { + m := createTestStatefulMessage("test") + input <- m + + // It should flush in this time + clk.Add(2 * timerInterval) + + payload := <-output + assert.EqualValues(t, m.Metadata, payload.MessageMetas[0]) + + // Verify payload contains valid DatumSequence + var datumSeq statefulpb.DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 1, len(datumSeq.Data)) + } + + s.Stop() + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategySendsPayloadWhenClosingInput(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + clk := clock.NewMock() + s := newBatchStrategyWithClock( + input, + output, + flushChan, + 100*time.Millisecond, + 2, + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message := createTestStatefulMessage("test") + input <- message + + go func() { + s.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + // Expect payload to be sent before timer, so we never advance the clock; if this + // doesn't work, the test will hang + payload := <-output + assert.Equal(t, message.Metadata, payload.MessageMetas[0]) +} + +func TestBatchStrategyShouldNotBlockWhenStoppingGracefully(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + s := NewBatchStrategy( + input, + output, + flushChan, + 100*time.Millisecond, + 2, + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message := createTestStatefulMessage("test") + input <- message + + go func() { + s.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + payload := <-output + assert.Equal(t, message.Metadata, payload.MessageMetas[0]) +} + +func TestBatchStrategySynchronousFlush(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + // Batch size is large so it will not flush until we trigger it manually + // Flush time is large so it won't automatically trigger during this test + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // All of these messages will get buffered + messages := []*message.StatefulMessage{ + createTestStatefulMessage("a"), + createTestStatefulMessage("b"), + createTestStatefulMessage("c"), + } + + messageMeta := make([]*message.MessageMetadata, len(messages)) + for idx, m := range messages { + input <- m + messageMeta[idx] = m.Metadata + } + + // Since the batch size is large there should be nothing on the output yet + select { + case <-output: + assert.Fail(t, "there should be nothing on the output channel yet") + default: + } + + go func() { + // Stop triggers the flush and make sure we can read the messages out now + strategy.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + payload := <-output + assert.ElementsMatch(t, messageMeta, payload.MessageMetas) + + select { + case <-output: + assert.Fail(t, "the output channel should still be empty") + default: + } +} + +func TestBatchStrategyFlushChannel(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + // Batch size is large so it will not flush until we trigger it manually + // Flush time is large so it won't automatically trigger during this test + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // All of these messages will get buffered + messages := []*message.StatefulMessage{ + createTestStatefulMessage("a"), + createTestStatefulMessage("b"), + createTestStatefulMessage("c"), + } + messageMeta := make([]*message.MessageMetadata, len(messages)) + for idx, m := range messages { + input <- m + messageMeta[idx] = m.Metadata + } + + // Since the batch size is large there should be nothing on the output yet + select { + case <-output: + assert.Fail(t, "there should be nothing on the output channel yet") + default: + } + + // Trigger a manual flush + flushChan <- struct{}{} + + payload := <-output + assert.ElementsMatch(t, messageMeta, payload.MessageMetas) + + // Ensure we read all of the messages + select { + case <-output: + assert.Fail(t, "the output channel should still be empty") + default: + } + + // End the test strategy + go func() { + // Stop triggers the flush and make sure we can read the messages out now + strategy.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategyMessageTooLarge(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10, // Small content size limit + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send a message that fits + normalMessage := createTestStatefulMessage("small") + input <- normalMessage + + // Send a message that's too large (will be dropped) + largeMessage := createTestStatefulMessage("this message is way too large for the content size limit") + input <- largeMessage + + // Trigger flush + flushChan <- struct{}{} + + // Should only receive the normal message + payload := <-output + assert.Equal(t, 1, len(payload.MessageMetas)) + assert.Equal(t, normalMessage.Metadata, payload.MessageMetas[0]) + + // Verify no more payloads + select { + case <-output: + assert.Fail(t, "should not receive more payloads") + default: + } + + strategy.Stop() +} + +func TestBatchStrategyInvalidDatum(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send message with nil Datum + msg1 := message.NewMessage([]byte("test"), nil, "", 0) + invalidMsg1 := &message.StatefulMessage{ + Metadata: &msg1.MessageMetadata, + Datum: nil, + } + input <- invalidMsg1 + + // Note: With strongly-typed Datum field, wrong type is prevented at compile time + + // Send a valid message + validMsg := createTestStatefulMessage("valid") + input <- validMsg + + // Trigger flush + flushChan <- struct{}{} + + // Should only receive the valid message + payload := <-output + assert.Equal(t, 1, len(payload.MessageMetas)) + assert.Equal(t, validMsg.Metadata, payload.MessageMetas[0]) + + strategy.Stop() +} + +func TestBatchStrategyCompression(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + // Use identity (no-op) compression for simplicity + // Testing actual compression behavior is covered by the compression package tests + compressor := compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressor, + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send several messages + for i := 0; i < 5; i++ { + msg := createTestStatefulMessage("test message") + input <- msg + } + + // Trigger flush + flushChan <- struct{}{} + + payload := <-output + assert.Equal(t, 5, len(payload.MessageMetas)) + assert.Equal(t, "identity", payload.Encoding) + assert.NotEmpty(t, payload.Encoded) + + // Verify the payload contains valid DatumSequence (identity compression = no compression) + var datumSeq statefulpb.DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 5, len(datumSeq.Data)) + for _, datum := range datumSeq.Data { + assert.Equal(t, "test message", datum.GetLogs().GetRaw()) + } + + strategy.Stop() +} + +// TestBatchStrategyStatefulExtra tests that state changes are correctly tracked in StatefulExtra +func TestBatchStrategyStatefulExtra(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent blocking + flushChan := make(chan struct{}) + timerInterval := 100 * time.Millisecond + + clk := clock.NewMock() + strategy := newBatchStrategyWithClock( + input, + output, + flushChan, + timerInterval, + 10, // maxBatchSize - large enough to not trigger size-based flush + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Helper to create state change messages + createPatternDefineMsg := func(id uint64, template string) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{ + PatternDefine: &statefulpb.PatternDefine{ + PatternId: id, + Template: template, + }, + }, + }, + } + } + + createDictEntryDefineMsg := func(id uint64, value string) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{ + DictEntryDefine: &statefulpb.DictEntryDefine{ + Id: id, + Value: value, + }, + }, + }, + } + } + + createPatternDeleteMsg := func(id uint64) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDelete{ + PatternDelete: &statefulpb.PatternDelete{ + PatternId: id, + }, + }, + }, + } + } + + createDictEntryDeleteMsg := func(id uint64) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDelete{ + DictEntryDelete: &statefulpb.DictEntryDelete{ + Id: id, + }, + }, + }, + } + } + + createLogMsg := func(content string) *message.StatefulMessage { + msg := message.NewMessage([]byte(content), nil, "", 0) + msg.MessageMetadata.RawDataLen = len(content) + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ + Timestamp: 12345, + Content: &statefulpb.Log_Raw{ + Raw: content, + }, + }, + }, + }, + } + } + + // Send all 14 events in sequence + // Batch 1 (5 entries): add p1, add d1, log, add p2, add d2 + input <- createPatternDefineMsg(1, "pattern1") + input <- createDictEntryDefineMsg(1, "value1") + input <- createLogMsg("log with p1/d1") + input <- createPatternDefineMsg(2, "pattern2") + input <- createDictEntryDefineMsg(2, "value2") + + // Advance clock to trigger timer-based flush for batch 1 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 1 + payload1 := <-output + require.Equal(t, 5, len(payload1.MessageMetas), "Batch 1 should have 5 messages") + + // Verify StatefulExtra for Batch 1 + require.NotNil(t, payload1.StatefulExtra, "Batch 1 should have StatefulExtra") + extra1, ok := payload1.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 4, len(extra1.StateChanges), "Batch 1 should have 4 state changes") + + // Check specific state changes in Batch 1 + assert.Equal(t, uint64(1), extra1.StateChanges[0].GetPatternDefine().PatternId) + assert.Equal(t, "pattern1", extra1.StateChanges[0].GetPatternDefine().Template) + assert.Equal(t, uint64(1), extra1.StateChanges[1].GetDictEntryDefine().Id) + assert.Equal(t, "value1", extra1.StateChanges[1].GetDictEntryDefine().Value) + assert.Equal(t, uint64(2), extra1.StateChanges[2].GetPatternDefine().PatternId) + assert.Equal(t, "pattern2", extra1.StateChanges[2].GetPatternDefine().Template) + assert.Equal(t, uint64(2), extra1.StateChanges[3].GetDictEntryDefine().Id) + assert.Equal(t, "value2", extra1.StateChanges[3].GetDictEntryDefine().Value) + + // Batch 2 (6 entries): log, del p1, del d1, add p3, add d3, log + input <- createLogMsg("log with p2/d2") + input <- createPatternDeleteMsg(1) + input <- createDictEntryDeleteMsg(1) + input <- createPatternDefineMsg(3, "pattern3") + input <- createDictEntryDefineMsg(3, "value3") + input <- createLogMsg("log with p3/d3") + + // Advance clock to trigger timer-based flush for batch 2 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 2 + payload2 := <-output + require.Equal(t, 6, len(payload2.MessageMetas), "Batch 2 should have 6 messages") + + // Verify StatefulExtra for Batch 2 + require.NotNil(t, payload2.StatefulExtra, "Batch 2 should have StatefulExtra") + extra2, ok := payload2.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 4, len(extra2.StateChanges), "Batch 2 should have 4 state changes") + + // Check specific state changes in Batch 2 + assert.Equal(t, uint64(1), extra2.StateChanges[0].GetPatternDelete().PatternId) + assert.Equal(t, uint64(1), extra2.StateChanges[1].GetDictEntryDelete().Id) + assert.Equal(t, uint64(3), extra2.StateChanges[2].GetPatternDefine().PatternId) + assert.Equal(t, "pattern3", extra2.StateChanges[2].GetPatternDefine().Template) + assert.Equal(t, uint64(3), extra2.StateChanges[3].GetDictEntryDefine().Id) + assert.Equal(t, "value3", extra2.StateChanges[3].GetDictEntryDefine().Value) + + // Batch 3 (3 entries): add p4, add d4, log + input <- createPatternDefineMsg(4, "pattern4") + input <- createDictEntryDefineMsg(4, "value4") + input <- createLogMsg("log with p4/d4") + + // Advance clock to trigger timer-based flush for batch 3 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 3 + payload3 := <-output + require.Equal(t, 3, len(payload3.MessageMetas), "Batch 3 should have 3 messages") + + // Verify StatefulExtra for Batch 3 + require.NotNil(t, payload3.StatefulExtra, "Batch 3 should have StatefulExtra") + extra3, ok := payload3.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 2, len(extra3.StateChanges), "Batch 3 should have 2 state changes") + + // Check specific state changes in Batch 3 + assert.Equal(t, uint64(4), extra3.StateChanges[0].GetPatternDefine().PatternId) + assert.Equal(t, "pattern4", extra3.StateChanges[0].GetPatternDefine().Template) + assert.Equal(t, uint64(4), extra3.StateChanges[1].GetDictEntryDefine().Id) + assert.Equal(t, "value4", extra3.StateChanges[1].GetDictEntryDefine().Value) + + strategy.Stop() +} diff --git a/pkg/logs/sender/grpc/inflight.go b/pkg/logs/sender/grpc/inflight.go new file mode 100644 index 000000000000..07437f5d99b6 --- /dev/null +++ b/pkg/logs/sender/grpc/inflight.go @@ -0,0 +1,236 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "google.golang.org/protobuf/proto" + + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" +) + +// inflightTracker is a bounded FIFO queue that tracks payloads in two regions: +// 1. Sent but awaiting acknowledgment (head to sentTail) +// 2. Buffered but not yet sent to the network (sentTail to tail) +// +// Queue Layout: +// [--sent awaiting ack--][--buffered not sent--] +// ^ ^ ^ +// head sentTail tail +// +// BatchID tracking: +// - Sent payloads have sequential batchIDs: [headBatchID, headBatchID+1, ..., headBatchID+sentSize-1] +// - Only tracks headBatchID (oldest sent) and nextBatchID (next to be assigned) +// +// Snapshot State: +// - Maintains accumulated state changes for stream bootstrapping +// - Represents the state "before" the first payload in the queue +// - Updated when payloads are acknowledged (popped) +type inflightTracker struct { + items []*message.Payload + head int // Index of the oldest sent item (awaiting ack) + sentTail int // Index of the first buffered item that's not yet sent + tail int // Index of the next available slot for new buffered items + cap int // Maximum total capacity of the tracker + headBatchID uint32 // BatchID of the oldest sent payload (at head) + batchIDCounter uint32 // Next batchID to be assigned when markSent is called + snapshot *snapshotState // Accumulated state for new streams +} + +// newInflightTracker creates a new bounded inflight tracker with the given capacity +// Allocates capacity+1 slots to implement the "waste one slot" ring buffer pattern +func newInflightTracker(capacity int) *inflightTracker { + return &inflightTracker{ + items: make([]*message.Payload, capacity+1), + cap: capacity, + snapshot: newSnapshotState(), + } +} + +// hasSpace returns true if there is at least one free slot +func (t *inflightTracker) hasSpace() bool { + return t.totalCount() < t.cap +} + +// append adds a new payload to the buffered region (not yet sent) +// Returns true if the payload was added, false if the tracker is full +func (t *inflightTracker) append(payload *message.Payload) bool { + if !t.hasSpace() { + return false + } + t.items[t.tail] = payload + t.tail = (t.tail + 1) % len(t.items) + return true +} + +// pop removes and returns the oldest sent payload (at head) after receiving an ack +// Returns nil if there are no sent payloads +// Also applies any state changes from the payload to the snapshot state +func (t *inflightTracker) pop() *message.Payload { + if t.head == t.sentTail { + return nil + } + payload := t.items[t.head] + t.items[t.head] = nil // Allow GC + t.head = (t.head + 1) % len(t.items) + + // Apply state changes from this payload to snapshot + if payload.StatefulExtra != nil { + if extra, ok := payload.StatefulExtra.(*StatefulExtra); ok { + t.snapshot.apply(extra) + } + } + + // Advance headBatchID for the next payload + if t.head != t.sentTail { + t.headBatchID++ + } + + return payload +} + +// hasUnacked returns true if there are sent payloads awaiting acknowledgment +func (t *inflightTracker) hasUnacked() bool { + return t.head != t.sentTail +} + +// hasUnSent returns true if there are buffered payloads not yet sent +func (t *inflightTracker) hasUnSent() bool { + return t.sentTail != t.tail +} + +// getHeadBatchID returns the expected batchID at the head (oldest sent payload) +// Caller must check hasUnacked() first to ensure there are sent payloads +func (t *inflightTracker) getHeadBatchID() uint32 { + return t.headBatchID +} + +// nextBatchID returns the batchID that will be assigned to the next sent item +// This is a peek operation (idempotent, no mutation) +func (t *inflightTracker) nextBatchID() uint32 { + return t.batchIDCounter +} + +// markSent moves a buffered payload to the sent region and assigns it a batchID +// Returns true if successful, false if there are no buffered payloads +func (t *inflightTracker) markSent() bool { + if t.sentTail == t.tail { + return false + } + + // If this is the first sent item, set headBatchID + if t.head == t.sentTail { + t.headBatchID = t.batchIDCounter + } + + t.sentTail = (t.sentTail + 1) % len(t.items) + t.batchIDCounter++ // Increment counter for next batch + return true +} + +// nextToSend returns the next buffered payload ready to be sent (without removing it) +// Returns nil if there are no buffered payloads +func (t *inflightTracker) nextToSend() *message.Payload { + if t.sentTail == t.tail { + return nil + } + return t.items[t.sentTail] +} + +// sentCount returns the number of sent payloads awaiting ack +func (t *inflightTracker) sentCount() int { + return (t.sentTail - t.head + len(t.items)) % len(t.items) +} + +// totalCount returns the total number of tracked payloads +func (t *inflightTracker) totalCount() int { + return (t.tail - t.head + len(t.items)) % len(t.items) +} + +// resetOnRotation set any un-acked payload as un-sent and reset the batchID. +func (t *inflightTracker) resetOnRotation() { + // Move all sent items back to buffered region by resetting sentTail to head + // This makes all items [head, tail) buffered again + t.sentTail = t.head + + // Reset batchID counter for the new stream + // Make the first batchID be 1, 0 is reserved for the snapshot state + t.headBatchID = 1 + t.batchIDCounter = 1 +} + +// getSnapshot returns the current snapshot state for stream bootstrapping +// Returns serialized bytes (marshaled DatumSequence) or nil if empty +func (t *inflightTracker) getSnapshot() []byte { + return t.snapshot.serialize() +} + +// snapshotState maintains the accumulated state changes for stream bootstrapping +// It represents the state "before" the first payload in the inflight queue +type snapshotState struct { + dictMap map[uint64]*statefulpb.DictEntryDefine + patternMap map[uint64]*statefulpb.PatternDefine +} + +// newSnapshotState creates a new empty snapshot state +func newSnapshotState() *snapshotState { + return &snapshotState{ + dictMap: make(map[uint64]*statefulpb.DictEntryDefine), + patternMap: make(map[uint64]*statefulpb.PatternDefine), + } +} + +// apply updates the snapshot state by processing state changes from a payload +func (s *snapshotState) apply(extra *StatefulExtra) { + if extra == nil { + return + } + + for _, datum := range extra.StateChanges { + switch d := datum.Data.(type) { + case *statefulpb.Datum_PatternDefine: + s.patternMap[d.PatternDefine.PatternId] = d.PatternDefine + case *statefulpb.Datum_PatternDelete: + delete(s.patternMap, d.PatternDelete.PatternId) + case *statefulpb.Datum_DictEntryDefine: + s.dictMap[d.DictEntryDefine.Id] = d.DictEntryDefine + case *statefulpb.Datum_DictEntryDelete: + delete(s.dictMap, d.DictEntryDelete.Id) + } + } +} + +// serialize returns the current snapshot state as serialized bytes +// Returns the marshaled DatumSequence containing all pattern and dictionary definitions +// Used to send snapshot on new stream creation +func (s *snapshotState) serialize() []byte { + // Calculate total datums needed + totalSize := len(s.patternMap) + len(s.dictMap) + + if totalSize == 0 { + return nil + } + + datums := make([]*statefulpb.Datum, 0, totalSize) + + for _, pattern := range s.patternMap { + datums = append(datums, &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{PatternDefine: pattern}, + }) + } + for _, entry := range s.dictMap { + datums = append(datums, &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{DictEntryDefine: entry}, + }) + } + + datumSeq := &statefulpb.DatumSequence{ + Data: datums, + } + + serialized, _ := proto.Marshal(datumSeq) + return serialized +} diff --git a/pkg/logs/sender/grpc/inflight_test.go b/pkg/logs/sender/grpc/inflight_test.go new file mode 100644 index 000000000000..653db10b44c7 --- /dev/null +++ b/pkg/logs/sender/grpc/inflight_test.go @@ -0,0 +1,482 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/message" +) + +// Helper function to create test payloads +func createTestPayload(content string) *message.Payload { + return &message.Payload{ + Encoded: []byte(content), + } +} + +func TestNewInflightTracker(t *testing.T) { + tracker := newInflightTracker(10) + + assert.NotNil(t, tracker) + assert.Equal(t, 10, tracker.cap) + assert.Equal(t, 0, tracker.head) + assert.Equal(t, 0, tracker.sentTail) + assert.Equal(t, 0, tracker.tail) + assert.Equal(t, uint32(0), tracker.headBatchID) + assert.Equal(t, uint32(0), tracker.batchIDCounter) + assert.True(t, tracker.hasSpace()) + assert.False(t, tracker.hasUnacked()) + assert.False(t, tracker.hasUnSent()) +} + +func TestInflightTrackerAppend(t *testing.T) { + tracker := newInflightTracker(10) + + // Append first payload + payload1 := createTestPayload("test1") + assert.True(t, tracker.append(payload1)) + assert.Equal(t, 1, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Append second payload + payload2 := createTestPayload("test2") + assert.True(t, tracker.append(payload2)) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + // Append third payload + payload3 := createTestPayload("test3") + assert.True(t, tracker.append(payload3)) + assert.Equal(t, 3, tracker.totalCount()) +} + +func TestInflightTrackerAppendWhenFull(t *testing.T) { + // Test filling buffer to absolute capacity from empty state + tracker := newInflightTracker(3) + + // Fill to capacity (3 items) + assert.True(t, tracker.append(createTestPayload("test1"))) + assert.Equal(t, 1, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + assert.True(t, tracker.append(createTestPayload("test2"))) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + assert.True(t, tracker.append(createTestPayload("test3"))) + assert.Equal(t, 3, tracker.totalCount()) + assert.False(t, tracker.hasSpace()) + + // Append should fail when full + assert.False(t, tracker.append(createTestPayload("test4"))) + assert.Equal(t, 3, tracker.totalCount()) +} + +func TestInflightTrackerMarkSent(t *testing.T) { + tracker := newInflightTracker(5) + + // Add buffered payloads + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + + assert.Equal(t, 0, tracker.sentCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Mark first as sent + assert.True(t, tracker.markSent()) + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + assert.True(t, tracker.hasUnacked()) + assert.True(t, tracker.hasUnSent()) + + // Mark second as sent + assert.True(t, tracker.markSent()) + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + assert.True(t, tracker.hasUnacked()) + assert.False(t, tracker.hasUnSent()) + + // Try to mark sent when no buffered items + assert.False(t, tracker.markSent()) +} + +func TestInflightTrackerPop(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and mark payloads as sent + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + tracker.markSent() + tracker.markSent() + + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + + // Pop first payload + popped1 := tracker.pop() + assert.Equal(t, payload1, popped1) + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.True(t, tracker.hasUnacked()) + + // Pop second payload + popped2 := tracker.pop() + assert.Equal(t, payload2, popped2) + assert.Equal(t, 0, tracker.sentCount()) + assert.False(t, tracker.hasUnacked()) + + // Pop when empty should return nil + poppedNil := tracker.pop() + assert.Nil(t, poppedNil) +} + +func TestInflightTrackerNextToSend(t *testing.T) { + tracker := newInflightTracker(5) + + // NextToSend on empty tracker should return nil + assert.Nil(t, tracker.nextToSend()) + + // Add buffered payloads + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + + // NextToSend should return first buffered payload + next := tracker.nextToSend() + assert.Equal(t, payload1, next) + + // Mark first as sent + tracker.markSent() + + // NextToSend should return second buffered payload + next = tracker.nextToSend() + assert.Equal(t, payload2, next) + + // Mark second as sent + tracker.markSent() + + // NextToSend should return nil when no buffered payloads + next = tracker.nextToSend() + assert.Nil(t, next) +} + +func TestInflightTrackerBatchIDSequence(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and send payloads + for i := 0; i < 3; i++ { + payload := createTestPayload("test") + tracker.append(payload) + } + + // Initial batchIDCounter should be 0 + assert.Equal(t, uint32(0), tracker.nextBatchID()) + + // Mark first as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + + // Mark second as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + // Mark third as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) + + // Pop first - headBatchID should advance + tracker.pop() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + + // Pop second - headBatchID should advance + tracker.pop() + assert.Equal(t, uint32(2), tracker.getHeadBatchID()) +} + +func TestInflightTrackerResetOnRotation(t *testing.T) { + tracker := newInflightTracker(5) + + // Add payloads and mark some as sent + for i := 0; i < 3; i++ { + payload := createTestPayload("test") + tracker.append(payload) + tracker.markSent() + } + + // Pop one ack + tracker.pop() + + // State before reset: 2 sent (awaiting ack), 0 buffered + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, 0, tracker.totalCount()-tracker.sentCount()) + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) + + // Reset on rotation + tracker.resetOnRotation() + + // After reset: 0 sent, 2 buffered (un-acked payloads become buffered) + assert.Equal(t, 0, tracker.sentCount()) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Batch IDs should reset to 1 + assert.Equal(t, uint32(1), tracker.headBatchID) + assert.Equal(t, uint32(1), tracker.nextBatchID()) +} + +func TestInflightTrackerWrapAround(t *testing.T) { + // Test wrap-around behavior without filling to absolute capacity + tracker := newInflightTracker(6) + + // Fill and empty to advance head pointer + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + + // Add, send, and ack first two to advance pointers + tracker.append(payload1) + tracker.markSent() + tracker.pop() + + tracker.append(payload2) + tracker.markSent() + tracker.pop() + + // Now add more items that will wrap around in the ring buffer + payload3 := createTestPayload("test3") + payload4 := createTestPayload("test4") + payload5 := createTestPayload("test5") + + assert.True(t, tracker.append(payload3)) + assert.True(t, tracker.append(payload4)) + assert.True(t, tracker.append(payload5)) + + assert.Equal(t, 3, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + // Mark all as sent and pop them + tracker.markSent() + tracker.markSent() + tracker.markSent() + + popped3 := tracker.pop() + popped4 := tracker.pop() + popped5 := tracker.pop() + + assert.Equal(t, payload3, popped3) + assert.Equal(t, payload4, popped4) + assert.Equal(t, payload5, popped5) + assert.Equal(t, 0, tracker.totalCount()) +} + +func TestInflightTrackerSentCount(t *testing.T) { + tracker := newInflightTracker(5) + + // Initially no sent items + assert.Equal(t, 0, tracker.sentCount()) + + // Add buffered payloads + tracker.append(createTestPayload("test1")) + tracker.append(createTestPayload("test2")) + tracker.append(createTestPayload("test3")) + + assert.Equal(t, 0, tracker.sentCount()) + + // Mark as sent + tracker.markSent() + assert.Equal(t, 1, tracker.sentCount()) + + tracker.markSent() + assert.Equal(t, 2, tracker.sentCount()) + + // Pop one + tracker.pop() + assert.Equal(t, 1, tracker.sentCount()) + + // Mark another as sent + tracker.markSent() + assert.Equal(t, 2, tracker.sentCount()) +} + +func TestInflightTrackerTotalCount(t *testing.T) { + tracker := newInflightTracker(5) + + // Initially empty + assert.Equal(t, 0, tracker.totalCount()) + + // Add buffered payloads + tracker.append(createTestPayload("test1")) + assert.Equal(t, 1, tracker.totalCount()) + + tracker.append(createTestPayload("test2")) + assert.Equal(t, 2, tracker.totalCount()) + + // Mark both as sent (doesn't change total count) + tracker.markSent() + tracker.markSent() + assert.Equal(t, 2, tracker.totalCount()) + + // Pop reduces total count + tracker.pop() + assert.Equal(t, 1, tracker.totalCount()) + + tracker.pop() + assert.Equal(t, 0, tracker.totalCount()) +} + +func TestInflightTrackerHasSpace(t *testing.T) { + tracker := newInflightTracker(10) + + // Initially has space + assert.True(t, tracker.hasSpace()) + + // Add several items + for i := 0; i < 5; i++ { + tracker.append(createTestPayload("test")) + } + assert.True(t, tracker.hasSpace()) + + // Pop one to verify space tracking + tracker.markSent() + tracker.pop() + assert.True(t, tracker.hasSpace()) +} + +func TestInflightTrackerMixedOperations(t *testing.T) { + // Test a realistic sequence of operations + tracker := newInflightTracker(5) + + // Add 3 buffered payloads + p1 := createTestPayload("msg1") + p2 := createTestPayload("msg2") + p3 := createTestPayload("msg3") + + tracker.append(p1) + tracker.append(p2) + tracker.append(p3) + + assert.Equal(t, 3, tracker.totalCount()) + assert.Equal(t, 0, tracker.sentCount()) + + // Send first 2 + tracker.markSent() + tracker.markSent() + + assert.Equal(t, 3, tracker.totalCount()) + assert.Equal(t, 2, tracker.sentCount()) + assert.True(t, tracker.hasUnacked()) + assert.True(t, tracker.hasUnSent()) + + // Receive ack for first + popped := tracker.pop() + assert.Equal(t, p1, popped) + assert.Equal(t, 2, tracker.totalCount()) + assert.Equal(t, 1, tracker.sentCount()) + + // Add more payloads + p4 := createTestPayload("msg4") + p5 := createTestPayload("msg5") + tracker.append(p4) + tracker.append(p5) + + assert.Equal(t, 4, tracker.totalCount()) + assert.Equal(t, 1, tracker.sentCount()) + + // Send remaining buffered + tracker.markSent() // p3 + tracker.markSent() // p4 + tracker.markSent() // p5 + + assert.Equal(t, 4, tracker.totalCount()) + assert.Equal(t, 4, tracker.sentCount()) + assert.False(t, tracker.hasUnSent()) + + // Receive all remaining acks + assert.Equal(t, p2, tracker.pop()) + assert.Equal(t, p3, tracker.pop()) + assert.Equal(t, p4, tracker.pop()) + assert.Equal(t, p5, tracker.pop()) + + assert.Equal(t, 0, tracker.totalCount()) + assert.False(t, tracker.hasUnacked()) +} + +func TestInflightTrackerResetOnRotationWithBuffered(t *testing.T) { + tracker := newInflightTracker(5) + + // Mix of sent and buffered payloads + tracker.append(createTestPayload("msg1")) + tracker.append(createTestPayload("msg2")) + tracker.append(createTestPayload("msg3")) + tracker.append(createTestPayload("msg4")) + + // Send first two + tracker.markSent() + tracker.markSent() + + // Ack first one + tracker.pop() + + // State: 1 sent, 2 buffered, total 3 + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, 3, tracker.totalCount()) + + // Reset on rotation + tracker.resetOnRotation() + + // All items should be buffered now + assert.Equal(t, 0, tracker.sentCount()) + assert.Equal(t, 3, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Batch IDs reset + assert.Equal(t, uint32(1), tracker.nextBatchID()) +} + +func TestInflightTrackerBatchIDAfterRotation(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and send some payloads + tracker.append(createTestPayload("msg1")) + tracker.append(createTestPayload("msg2")) + tracker.markSent() + tracker.markSent() + + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + // Reset on rotation + tracker.resetOnRotation() + + // Batch IDs should reset to 1 (0 is reserved for snapshot) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + + // Send items with new batch IDs + tracker.markSent() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + tracker.markSent() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) +} diff --git a/pkg/logs/sender/grpc/mock_encoder.go b/pkg/logs/sender/grpc/mock_encoder.go new file mode 100644 index 000000000000..00e97fa58490 --- /dev/null +++ b/pkg/logs/sender/grpc/mock_encoder.go @@ -0,0 +1,23 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/processor" +) + +// MockEncoder is a no-op encoder for gRPC stateful streaming. +// This is temporary scaffolding until the real State component is ready. +// Encoding happens in StartMessageTranslator instead of the processor. +var MockEncoder processor.Encoder = &mockEncoder{} + +type mockEncoder struct{} + +// Encode is a no-op implementation that satisfies the processor.Encoder interface +func (g *mockEncoder) Encode(_ *message.Message, _ string) error { + return nil +} diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go new file mode 100644 index 000000000000..c5b88e0938de --- /dev/null +++ b/pkg/logs/sender/grpc/mock_state.go @@ -0,0 +1,264 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "strings" + "time" + "unicode/utf8" + + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" +) + +const nanoToMillis = 1000000 + +// MessageTranslator handles translation of message.Message to message.StatefulMessage +// It manages pattern extraction, clustering, and stateful message creation +type MessageTranslator struct { + clusterManager *clustering.ClusterManager +} + +// NewMessageTranslator creates a new MessageTranslator instance +// If clusterManager is nil, a new one will be created +func NewMessageTranslator() *MessageTranslator { + return &MessageTranslator{ + clusterManager: clustering.NewClusterManager(), + } + + // Would be shared cluster manager instead across pipelines when implemented. + // if clusterManager == nil { + // clusterManager = clustering.NewClusterManager() + // } + // return &MessageTranslator{ + // clusterManager: clusterManager, + // } +} + +// Start starts a goroutine that translates message.Message to message.StatefulMessage +// It handles pattern extraction by: +// 1. Tokenizing the message content +// 2. Using ClusterManager to create/update patterns +// 3. Sending PatternDefine for new patterns, or PatternDelete+PatternDefine for updates +// 4. Sending StructuredLog with wildcard values +// Returns the output channel for StatefulMessages +func (mt *MessageTranslator) Start(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage { + outputChan := make(chan *message.StatefulMessage, bufferSize) + go func() { + defer close(outputChan) + + for msg := range inputChan { + mt.processMessage(msg, outputChan) + } + }() + return outputChan +} + +// StartMessageTranslator is a convenience function that creates a MessageTranslator with a cluster manager +// Returns the output channel for StatefulMessages +func StartMessageTranslator(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage { + // Use a shared cluster manager for all pipelines (patterns shared across pipelines) + translator := NewMessageTranslator() + return translator.Start(inputChan, bufferSize) +} + +// processMessage handles a single message: tokenizes, creates patterns, and sends appropriate datums +func (mt *MessageTranslator) processMessage(msg *message.Message, outputChan chan *message.StatefulMessage) { + var patternDefineSent bool + var patternDefineParamCount uint32 + + ts := getMessageTimestamp(msg) + + // Get message content + content := msg.GetContent() + if len(content) == 0 { + return + } + + // Tokenize the message content + contentStr := string(content) + tokenList := tokenizeMessage(contentStr) + + // Process tokenized log through cluster manager to get/create pattern + pattern, changeType := mt.clusterManager.Add(tokenList) + + // Extract wildcard values from the pattern + wildcardValues := pattern.GetWildcardValues(tokenList) + + // Handle pattern state changes (send PatternDefine/PatternDelete as needed) + mt.handlePatternChange(pattern, changeType, msg, outputChan, &patternDefineSent, &patternDefineParamCount) + + // Send StructuredLog with pattern_id + dynamic values + mt.sendStructuredLog(outputChan, msg, pattern, wildcardValues, ts, patternDefineSent, patternDefineParamCount) +} + +// getMessageTimestamp returns the timestamp for the message, preferring ServerlessExtra.Timestamp +func getMessageTimestamp(msg *message.Message) time.Time { + ts := time.Now().UTC() + if !msg.ServerlessExtra.Timestamp.IsZero() { + ts = msg.ServerlessExtra.Timestamp + } + return ts +} + +// tokenizeMessage tokenizes the message content string +func tokenizeMessage(contentStr string) *token.TokenList { + tokenizer := automaton.NewTokenizer(contentStr) + return tokenizer.Tokenize() +} + +// handlePatternChange handles pattern changes based on PatternChangeType from cluster manager +// Uses the change type to determine if we need to send PatternDefine/PatternDelete +// The snapshot mechanism in inflight.go tracks what's been sent for stream recovery +func (mt *MessageTranslator) handlePatternChange(pattern *clustering.Pattern, changeType clustering.PatternChangeType, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) { + switch changeType { + case clustering.PatternNew: + // New pattern - send PatternDefine (may have 0 wildcards initially) + mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount) + + case clustering.PatternUpdated: + // Pattern structure changed (e.g., 0→N wildcards, or N→M wildcards) + mt.sendPatternDelete(pattern.PatternID, msg, outputChan) + mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount) + + case clustering.PatternNoChange: + } +} + +// sendPatternDefine creates and sends a PatternDefine datum +func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) { + patternDatum := buildPatternDefine(pattern) + if pd := patternDatum.GetPatternDefine(); pd != nil { + *patternDefineParamCount = pd.ParamCount + } + outputChan <- &message.StatefulMessage{ + Datum: patternDatum, + Metadata: &msg.MessageMetadata, + } + *patternDefineSent = true +} + +// sendPatternDelete creates and sends a PatternDelete datum +func (mt *MessageTranslator) sendPatternDelete(patternID uint64, msg *message.Message, outputChan chan *message.StatefulMessage) { + deleteDatum := buildPatternDelete(patternID) + outputChan <- &message.StatefulMessage{ + Datum: deleteDatum, + Metadata: &msg.MessageMetadata, + } +} + +// sendRawLog creates and sends a raw log datum +func (mt *MessageTranslator) sendRawLog(outputChan chan *message.StatefulMessage, msg *message.Message, contentStr string, ts time.Time) { + logDatum := buildRawLog(contentStr, ts) + outputChan <- &message.StatefulMessage{ + Datum: logDatum, + Metadata: &msg.MessageMetadata, + } +} + +// sendStructuredLog creates and sends a StructuredLog datum +func (mt *MessageTranslator) sendStructuredLog(outputChan chan *message.StatefulMessage, msg *message.Message, pattern *clustering.Pattern, wildcardValues []string, ts time.Time, patternDefineSent bool, patternDefineParamCount uint32) { + logDatum := buildStructuredLog(pattern.PatternID, wildcardValues, ts) + outputChan <- &message.StatefulMessage{ + Datum: logDatum, + Metadata: &msg.MessageMetadata, + } +} + +// buildPatternDefine creates a PatternDefine Datum from a Pattern +func buildPatternDefine(pattern *clustering.Pattern) *statefulpb.Datum { + charPositions := pattern.GetWildcardCharPositions() + posList := make([]uint32, len(charPositions)) + for i, pos := range charPositions { + posList[i] = uint32(pos) + } + + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{ + PatternDefine: &statefulpb.PatternDefine{ + PatternId: pattern.PatternID, + Template: pattern.GetPatternString(), + ParamCount: uint32(pattern.GetWildcardCount()), + PosList: posList, + }, + }, + } +} + +// buildPatternDelete creates a PatternDelete Datum for a pattern ID +func buildPatternDelete(patternID uint64) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDelete{ + PatternDelete: &statefulpb.PatternDelete{ + PatternId: patternID, + }, + }, + } +} + +// buildStructuredLog creates a Datum containing a StructuredLog +func buildStructuredLog(patternID uint64, wildcardValues []string, ts time.Time) *statefulpb.Datum { + // Convert wildcard values to DynamicValue format + dynamicValues := make([]*statefulpb.DynamicValue, len(wildcardValues)) + for i, value := range wildcardValues { + dynamicValues[i] = &statefulpb.DynamicValue{ + Value: &statefulpb.DynamicValue_StringValue{ + StringValue: value, + }, + } + } + + return &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ + Timestamp: uint64(ts.UnixNano() / nanoToMillis), + Content: &statefulpb.Log_Structured{ + Structured: &statefulpb.StructuredLog{ + PatternId: patternID, + DynamicValues: dynamicValues, + }, + }, + }, + }, + } +} + +// buildRawLog creates a Datum containing a raw log (no pattern) +func buildRawLog(content string, ts time.Time) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ + Timestamp: uint64(ts.UnixNano() / nanoToMillis), + Content: &statefulpb.Log_Raw{ + Raw: content, + }, + }, + }, + } +} + +// toValidUtf8 ensures all characters are UTF-8 +func toValidUtf8(data []byte) string { + if utf8.Valid(data) { + return string(data) + } + + var str strings.Builder + str.Grow(len(data)) + + for len(data) > 0 { + r, size := utf8.DecodeRune(data) + // in case of invalid utf-8, DecodeRune returns (utf8.RuneError, 1) + // and since RuneError is the same as unicode.ReplacementChar + // no need to handle the error explicitly + str.WriteRune(r) + data = data[size:] + } + return str.String() +} diff --git a/pkg/logs/sender/grpc/sender.go b/pkg/logs/sender/grpc/sender.go new file mode 100644 index 000000000000..e4dd93c4a1b1 --- /dev/null +++ b/pkg/logs/sender/grpc/sender.go @@ -0,0 +1,265 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package grpc implements gRPC-based log sender +package grpc + +import ( + "context" + "crypto/tls" + "fmt" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/version" + + "go.uber.org/atomic" +) + +const ( + // inputChanBufferSize is the buffer size for worker input channels - may become configurable + inputChanBufferSize = 100 +) + +// headerCredentials implements credentials.PerRPCCredentials to add headers to RPC calls +type headerCredentials struct { + endpoint config.Endpoint +} + +// GetRequestMetadata adds required headers to each RPC call +func (h *headerCredentials) GetRequestMetadata(_ context.Context, _ ...string) (map[string]string, error) { + headers := map[string]string{ + "dd-api-key": h.endpoint.GetAPIKey(), + } + + // Add protocol header if specified + if h.endpoint.Protocol != "" { + headers["dd-protocol"] = string(h.endpoint.Protocol) + } + + // Add origin headers if specified + if h.endpoint.Origin != "" { + headers["dd-evp-origin"] = string(h.endpoint.Origin) + headers["dd-evp-origin-version"] = version.AgentVersion + } + + return headers, nil +} + +// RequireTransportSecurity indicates whether the credentials require transport security +func (h *headerCredentials) RequireTransportSecurity() bool { + return false // We handle TLS separately via WithTransportCredentials +} + +// Sender implements PipelineComponent interface for gRPC log transmission. +// It manages multiple streamWorker instances (one per pipeline) using round-robin distribution. +// It is similar to Sender/Worker architecture +type Sender struct { + // Configuration + endpoint config.Endpoint + destinationsContext *client.DestinationsContext + cfg pkgconfigmodel.Reader + numberOfWorkers int + + // Pipeline integration + pipelineMonitor metrics.PipelineMonitor + + // Stream management (similar to Sender's workers and queues) + workers []*streamWorker + queues []chan *message.Payload + idx *atomic.Uint32 + + // Auditor integration + sink sender.Sink + + // gRPC connection management (shared across all streams) + conn *grpc.ClientConn + client statefulpb.StatefulLogsServiceClient +} + +// NewSender creates a new gRPC sender that implements PipelineComponent +// numberOfPipelines determines how many streamWorker to create (same as number of pipelines) +func NewSender( + numberOfPipelines int, + cfg pkgconfigmodel.Reader, + sink sender.Sink, + endpoints *config.Endpoints, + destinationsCtx *client.DestinationsContext, +) *Sender { + + // For now, use the first reliable endpoint + // TODO: Support multiple endpoints with failover + var endpoint config.Endpoint + if len(endpoints.GetReliableEndpoints()) > 0 { + endpoint = endpoints.GetReliableEndpoints()[0] + } else { + log.Error("No reliable gRPC endpoints configured") + return nil + } + + // For the moment, we use the number of pipelines as the number of workers + numberOfWorkers := numberOfPipelines + + // Get stream lifetime from config + streamLifetime := config.StreamLifetime(cfg) + + sender := &Sender{ + endpoint: endpoint, + destinationsContext: destinationsCtx, + cfg: cfg, + numberOfWorkers: numberOfWorkers, + pipelineMonitor: metrics.NewTelemetryPipelineMonitor(), + workers: make([]*streamWorker, 0, numberOfWorkers), + queues: make([]chan *message.Payload, numberOfWorkers), + idx: &atomic.Uint32{}, + sink: sink, + } + + // Note: outputChan will be set in each streamWorker's start() method when sink.Channel() is available + + // Create gRPC connection (shared by all streams inside streamWorkers) + if err := sender.createConnection(); err != nil { + log.Errorf("Failed to create gRPC connection: %v", err) + return nil + } + + // Create multiple streamWorker instances (like Sender creates Workers) + for i := 0; i < numberOfWorkers; i++ { + workerID := fmt.Sprintf("worker-%d", i) + + // Create input queue for this worker (like Sender creates queues) + sender.queues[i] = make(chan *message.Payload, inputChanBufferSize) + + // Create streamWorker instance + worker := newStreamWorker( + workerID, + sender.queues[i], + destinationsCtx, + sender.conn, + sender.client, + sender.sink, + endpoint, + streamLifetime, + ) + + sender.workers = append(sender.workers, worker) + } + + log.Infof("Created gRPC sender with %d streams for endpoint %s:%d", + numberOfWorkers, endpoint.Host, endpoint.Port) + return sender +} + +// createConnection establishes the shared gRPC connection +func (s *Sender) createConnection() error { + log.Infof("Creating gRPC connection to %s:%d", s.endpoint.Host, s.endpoint.Port) + + // Build connection options + var opts []grpc.DialOption + + // Configure TLS + if s.endpoint.UseSSL() { + tlsConfig := &tls.Config{ + ServerName: s.endpoint.Host, + } + opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))) + } else { + opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + + // Configure keepalive + keepaliveParams := keepalive.ClientParameters{ + Time: 30 * time.Second, + Timeout: 5 * time.Second, + PermitWithoutStream: true, + } + opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) + + // Add user agent + userAgent := fmt.Sprintf("datadog-agent/%s", version.AgentVersion) + opts = append(opts, grpc.WithUserAgent(userAgent)) + + // Add headers via per-RPC credentials + headerCreds := &headerCredentials{endpoint: s.endpoint} + opts = append(opts, grpc.WithPerRPCCredentials(headerCreds)) + + // Add load balancing configuration, to utilize all available LB IPs + opts = append(opts, grpc.WithDefaultServiceConfig( + `{"loadBalancingPolicy":"round_robin"}`, + )) + + // Create connection, lazy connection establishment, does not block + address := fmt.Sprintf("%s:%d", s.endpoint.Host, s.endpoint.Port) + conn, err := grpc.NewClient(address, opts...) + if err != nil { + return fmt.Errorf("failed to create gRPC connection: %w", err) + } + + s.conn = conn + s.client = statefulpb.NewStatefulLogsServiceClient(conn) + + log.Infof("Successfully created gRPC connection to %s", address) + return nil +} + +// PipelineComponent interface implementation + +// In returns the input channel using round-robin distribution (same as Sender.In()) +func (s *Sender) In() chan *message.Payload { + idx := s.idx.Inc() % uint32(len(s.queues)) + return s.queues[idx] +} + +// PipelineMonitor returns the pipeline monitor +func (s *Sender) PipelineMonitor() metrics.PipelineMonitor { + return s.pipelineMonitor +} + +// Start starts all streamWorker instances (same pattern as Sender.Start()) +func (s *Sender) Start() { + log.Infof("Starting gRPC sender with %d workers", len(s.workers)) + + for _, worker := range s.workers { + worker.start() + } + + log.Info("All streamWorkers started") +} + +// Stop stops all streamWorker instances and closes the connection +func (s *Sender) Stop() { + log.Info("Stopping gRPC sender") + + // Stop all workers (same pattern as Sender.Stop()) + for _, worker := range s.workers { + worker.stop() + } + + // Close all queues + for _, queue := range s.queues { + close(queue) + } + + // Close the shared connection + if s.conn != nil { + if err := s.conn.Close(); err != nil { + log.Warnf("Error closing gRPC connection: %v", err) + } + } + + log.Info("gRPC sender stopped") +} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go new file mode 100644 index 000000000000..f2cee3ad3592 --- /dev/null +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -0,0 +1,712 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "context" + "errors" + "io" + "time" + + "github.com/benbjohnson/clock" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/status" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" + "github.com/DataDog/datadog-agent/pkg/util/backoff" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// TODO For PoC Stage 1 +// - implement snapshot state transmission +// - better handle unrecoverable errors - auth/perm, protocol, stream-level gRPC status +// - telemetries (send/recv, failure, rotations) + +// TODO for PoC Stage 2 +// - implement more graceful shutdown, the current version we could lose some acks +// - currently, s.currentStream.stream.Send(batch) can still block, especially +// if we have a lot of buffered payloads to re-send after a stream rotation, +// especially if we are flow controlled. This will block the supervisor loop +// and potentially backpressure the input channel +// - implement proper "stream/ordered" backpressure + +// TODO for production +// - implement stream neotiation (state size, etc), able to downgrade to HTTP transport +// - Testing plan + +const ( + // Various constants - may become configurable + batchAckChanBuffer = 10 + maxInflight = 10000 + connectionTimeout = 10 * time.Second + drainTimeout = 5 * time.Second +) + +// streamState represents the current state of the stream worker +// +//go:generate stringer -type=streamState +type streamState int + +const ( + // disconnected is the initial state or stream creation failure backoff state + disconnected streamState = iota + // connecting is the state while waiting for asyncCreateNewStream to complete or fail + connecting + // active is the normal operating state with a valid stream + active + // draining waits for all acks to arrive before rotating to a new stream + draining +) + +// streamInfo holds all stream-related information +type streamInfo struct { + stream statefulpb.StatefulLogsService_LogsStreamClient + ctx context.Context + cancel context.CancelFunc +} + +// streamCreationResult represents the result of async stream creation +type streamCreationResult struct { + info *streamInfo + err error +} + +// batchAck wraps a batch acknowledgment with stream identity to prevent stale signals +type batchAck struct { + stream *streamInfo + status *statefulpb.BatchStatus +} + +// streamWorker manages a single gRPC bidirectional stream with Master-Slave threading model +// Architecture: One supervisor/sender goroutine + one receiver goroutine per worker +type streamWorker struct { + // Configuration + workerID string + destinationsContext *client.DestinationsContext + + // Pipeline integration + inputChan chan *message.Payload + outputChan chan *message.Payload // For auditor acknowledgments + sink sender.Sink // For getting auditor channel + + // gRPC connection management (shared with other streams) + conn *grpc.ClientConn + client statefulpb.StatefulLogsServiceClient + + // Stream management + currentStream *streamInfo + streamState streamState + recvFailureCh chan *streamInfo // Signal receiver failure with stream identity + batchAckCh chan *batchAck // Signal batch acknowledgments with stream identity + streamReadyCh chan streamCreationResult // Signal when async stream creation completes + streamLifetime time.Duration + streamTimer *clock.Timer // Timer for stream lifetime, trigger soft rotation + drainTimer *clock.Timer // In case of unacked payloads, drain/wait before soft rotation + backoffTimer *clock.Timer // In case of stream creation failure, backoff before retrying + + // Inflight tracking - tracks sent (awaiting ack) and buffered (not sent) payloads + inflight *inflightTracker + + // Retry backoff + backoffPolicy backoff.Policy + nbErrors int + + // Control + stopChan chan struct{} + done chan struct{} + clock clock.Clock +} + +// newStreamWorker creates a new gRPC stream worker +func newStreamWorker( + workerID string, + inputChan chan *message.Payload, + destinationsCtx *client.DestinationsContext, + conn *grpc.ClientConn, + client statefulpb.StatefulLogsServiceClient, + sink sender.Sink, + endpoint config.Endpoint, + streamLifetime time.Duration, +) *streamWorker { + return newStreamWorkerWithClock(workerID, inputChan, destinationsCtx, conn, client, sink, + endpoint, streamLifetime, clock.New(), nil) +} + +// newStreamWorkerWithClock creates a new gRPC stream worker with injectable clock for testing +func newStreamWorkerWithClock( + workerID string, + inputChan chan *message.Payload, + destinationsCtx *client.DestinationsContext, + conn *grpc.ClientConn, + client statefulpb.StatefulLogsServiceClient, + sink sender.Sink, + endpoint config.Endpoint, + streamLifetime time.Duration, + clock clock.Clock, + inflightTracker *inflightTracker, +) *streamWorker { + backoffPolicy := backoff.NewExpBackoffPolicy( + endpoint.BackoffFactor, + endpoint.BackoffBase, + endpoint.BackoffMax, + endpoint.RecoveryInterval, + endpoint.RecoveryReset, + ) + + // Use provided inflightTracker (testing) or create default one + if inflightTracker == nil { + inflightTracker = newInflightTracker(maxInflight) + } + + worker := &streamWorker{ + workerID: workerID, + destinationsContext: destinationsCtx, + inputChan: inputChan, + outputChan: nil, + sink: sink, + conn: conn, + client: client, + streamState: disconnected, + recvFailureCh: make(chan *streamInfo), + batchAckCh: make(chan *batchAck, batchAckChanBuffer), + streamReadyCh: make(chan streamCreationResult), + streamLifetime: streamLifetime, + inflight: inflightTracker, + backoffPolicy: backoffPolicy, + nbErrors: 0, + stopChan: make(chan struct{}), + done: make(chan struct{}), + clock: clock, + streamTimer: createStoppedTimer(clock, 0), + backoffTimer: createStoppedTimer(clock, 0), + drainTimer: createStoppedTimer(clock, 0), + } + + return worker +} + +// start begins the supervisor goroutine & creates a new stream asynchronously +func (s *streamWorker) start() { + log.Infof("Starting gRPC stream worker %s", s.workerID) + s.outputChan = s.sink.Channel() + + // Start supervisor/sender goroutine (master) + go s.supervisorLoop() + + s.asyncCreateNewStream() + + log.Infof("Worker %s: Started", s.workerID) +} + +// stop shuts down the stream worker +func (s *streamWorker) stop() { + log.Infof("Stopping gRPC stream worker %s", s.workerID) + close(s.stopChan) + <-s.done + log.Infof("Worker %s: Stopped", s.workerID) +} + +// supervisorLoop is the master goroutine that handles sending and stream lifecycle +func (s *streamWorker) supervisorLoop() { + defer close(s.done) + + // supervisor loop starts without a stream, but asyncCreateNewStream is called + // right after in streamWorker's start(), so we are in connecting state right away + s.streamState = connecting + + for { + // Conditional inputChan - only enabled when inflight tracker has space + // This backpressures to upstream when at capacity + var inputChan <-chan *message.Payload + if s.inflight.hasSpace() { + inputChan = s.inputChan // Enable reading + } else { + inputChan = nil // Disable reading + } + + select { + case payload := <-inputChan: + // Fires in any state (gated only by inflight capacity), payload is always + // added to the inflight tracker. But we only proceed to send if we are + // in the active state with a valid stream + s.inflight.append(payload) + s.sendPayloads() + + case ack := <-s.batchAckCh: + // Fires in any state + s.handleBatchAck(ack) + + case failedStream := <-s.recvFailureCh: + // Fires in active/draining/connecting states + s.handleRecvFailure(failedStream) + + case result := <-s.streamReadyCh: + // Fires only in connecting state + s.handleStreamReady(result) + + case <-s.streamTimer.C: + // Fires only in active state (except rare timing race, it's in connecting) + s.handleStreamTimeout() + + case <-s.drainTimer.C: + // Fires in draining state or (rarely) in connecting/active state + // If in non-draining state, it means acks arrival at the same time + // as the drain timer expiration, so we will skip the signal + s.handleDrainTimeout() + + case <-s.backoffTimer.C: + // Fires only in disconnected state + s.handleBackoffTimeout() + + case <-s.stopChan: + // Fires in any state + s.handleShutdown() + return + } + } +} + +// sendPayloads attempts to send all buffered payloads when in Active state +// the same function is used to send new payload in normal operation, and +// to send (or resend) buffered payloads after a stream rotation +func (s *streamWorker) sendPayloads() { + if s.streamState != active { + return + } + + // Send all buffered payloads in order + for { + payload := s.inflight.nextToSend() + if payload == nil { + // No more buffered payloads to send + break + } + + batchID := s.inflight.nextBatchID() + batch := createBatch(payload.Encoded, batchID) + + // TODO Send call can block, by TCP/HTTP2 flow controls + if err := s.currentStream.stream.Send(batch); err != nil { + log.Warnf("Worker %s: Send failed, initiating stream rotation: %v", s.workerID, err) + s.beginStreamRotation() + return // stop sending, payloads remain buffered for next rotation + } + + // Successfully sent, mark as sent in the inflight tracker + s.inflight.markSent() + } +} + +// sendSnapshot sends the snapshot state as batch 0 on a new stream +// Returns true if successful, initiates stream rotation and returns false if failed +func (s *streamWorker) sendSnapshot() bool { + serialized := s.inflight.getSnapshot() + + // Snapshot is empty means no state + if serialized == nil { + return true + } + + // Create batch with batchID 0 (reserved for snapshot) + batch := createBatch(serialized, 0) + + // Send snapshot + if err := s.currentStream.stream.Send(batch); err != nil { + log.Warnf("Worker %s: Failed to send snapshot: %v, initiating stream rotation", s.workerID, err) + s.beginStreamRotation() + return false + } + + log.Infof("Worker %s: Sent snapshot (%d bytes)", s.workerID, len(serialized)) + return true +} + +// handleBatchAck processes a BatchStatus acknowledgment from the server +func (s *streamWorker) handleBatchAck(ack *batchAck) { + // Ignore stale acks from old streams + if ack.stream != s.currentStream { + return + } + + receivedBatchID := uint32(ack.status.BatchId) + + // Handle snapshot/state ack (batch 0) - no payload to pop + if receivedBatchID == 0 { + return + } + + // The two errors below should never happen if Intake is implemented + // correctly, but we are being defensive. + + // Verify we have "sent payloads" awaiting ack + if !s.inflight.hasUnacked() { + log.Errorf("Worker %s: Received ack for batch %d but no sent payloads in inflight tracker, "+ + "irrecoverable error - initiating stream rotation", s.workerID, receivedBatchID) + s.beginStreamRotation() + return + } + + // Verify batchID matches expected sequence + expectedBatchID := s.inflight.getHeadBatchID() + if receivedBatchID != expectedBatchID { + log.Errorf("Worker %s: BatchID mismatch! Expected %d, received %d. "+ + "ut-of-order or duplicate ack, irrecoverable error - initiating stream rotation", + s.workerID, expectedBatchID, receivedBatchID) + s.beginStreamRotation() + return + } + + // Pop the acknowledged payload and send to auditor + payload := s.inflight.pop() + if s.outputChan != nil { + select { + case s.outputChan <- payload: + // Successfully sent to auditor + default: + log.Warnf("Worker %s: Auditor channel full, dropping ack for batch %d", s.workerID, receivedBatchID) + } + } + + // If in Draining state and all acks received, transition to Connecting + if s.streamState == draining && !s.inflight.hasUnacked() { + log.Infof("Worker %s: All acks received in draining state, proceeding with rotation", s.workerID) + s.drainTimer.Stop() + s.beginStreamRotation() + } +} + +// handleRecvFailure processes receiver failure signals +func (s *streamWorker) handleRecvFailure(failedStream *streamInfo) { + // Ignore if: stale signal OR not in active/draining state + if failedStream != s.currentStream || (s.streamState != active && s.streamState != draining) { + return + } + + log.Infof("Worker %s: Receiver reported failure (state: %v), initiating stream rotation", s.workerID, s.streamState) + s.beginStreamRotation() +} + +// handleStreamReady processes async stream creation results +func (s *streamWorker) handleStreamReady(result streamCreationResult) { + if s.streamState != connecting { + return + } + + if result.err != nil { + s.nbErrors = s.backoffPolicy.IncError(s.nbErrors) + s.handleStreamCreationFailure(result.err) + } else { + s.nbErrors = s.backoffPolicy.DecError(s.nbErrors) + s.finishStreamRotation(result.info) + } +} + +// handleStreamTimeout processes stream lifetime expiration +func (s *streamWorker) handleStreamTimeout() { + if s.streamState != active { + return + } + + if s.inflight.hasUnacked() { + log.Infof("Worker %s: Stream lifetime expired with %d unacked payloads, entering Draining state", + s.workerID, s.inflight.sentCount()) + s.streamState = draining + s.drainTimer.Reset(drainTimeout) + } else { + log.Infof("Worker %s: Stream lifetime expired with no unacked payloads, rotating immediately", + s.workerID) + s.beginStreamRotation() + } +} + +// handleDrainTimeout handles drain timer expiration +func (s *streamWorker) handleDrainTimeout() { + if s.streamState != draining { + return + } + + log.Warnf("Worker %s: Drain timer expired in draining state, proceeding with rotation (may lose some acks)", + s.workerID) + s.beginStreamRotation() +} + +// handleBackoffTimeout processes backoff timer expiration and retries stream creation +func (s *streamWorker) handleBackoffTimeout() { + if s.streamState != disconnected { + return + } + + log.Infof("Worker %s: Backoff timer expired, retrying stream creation (error count: %d)", s.workerID, s.nbErrors) + s.streamState = connecting + s.asyncCreateNewStream() +} + +// handleShutdown performs graceful shutdown cleanup +func (s *streamWorker) handleShutdown() { + log.Infof("Worker %s: Shutting down", s.workerID) + s.streamTimer.Stop() + s.backoffTimer.Stop() + s.drainTimer.Stop() + s.closeStream(s.currentStream) +} + +// beginStreamRotation initiates stream rotation +// Closes current stream and starts async creation of a new stream +func (s *streamWorker) beginStreamRotation() { + log.Infof("Worker %s: Beginning stream rotation (state: %v → connecting)", s.workerID, s.streamState) + + s.closeStream(s.currentStream) + s.currentStream = nil + s.streamTimer.Stop() + s.drainTimer.Stop() + s.backoffTimer.Stop() + + s.streamState = connecting + s.asyncCreateNewStream() +} + +// finishStreamRotation completes stream rotation (Connecting → Active transition) +// Activates the newly created stream and starts the receiver +// Transmits the snapshot state first, then (if any) the buffered payloads +func (s *streamWorker) finishStreamRotation(streamInfo *streamInfo) { + log.Infof("Worker %s: Finishing stream rotation (state: connecting → active)", s.workerID) + + s.currentStream = streamInfo + s.streamState = active + + go s.receiverLoop(streamInfo) + + s.streamTimer.Reset(s.streamLifetime) + + // Convert all the unacked items to buffered items by resetting inflight tracker + // because we need to resent them. + s.inflight.resetOnRotation() + + log.Infof("Worker %s: Stream rotation complete, now active", s.workerID) + + // Send snapshot state first (batch 0) + if !s.sendSnapshot() { + return + } + + // Then send the remaining buffered payloads (batch 1, 2, ...) + if s.inflight.hasUnSent() { + s.sendPayloads() + } +} + +// handleStreamCreationFailure processes stream creation failures with exponential backoff +func (s *streamWorker) handleStreamCreationFailure(err error) { + backoffDuration := s.backoffPolicy.GetBackoffDuration(s.nbErrors) + + log.Warnf("Worker %s: Stream creation failed: %v. Backing off for %v (error count: %d)", + s.workerID, err, backoffDuration, s.nbErrors) + + s.streamState = disconnected + + if backoffDuration > 0 { + s.backoffTimer.Reset(backoffDuration) + } else { + // it shouldn't happen, but be defensive + // retry immediately by transitioning directly to connecting + log.Infof("Worker %s: Zero backoff duration, retrying immediately", s.workerID) + s.streamState = connecting + s.asyncCreateNewStream() + } +} + +// asyncCreateNewStream creates a new gRPC stream asynchronously +// Signals completion (success or failure) via streamReadyCh +func (s *streamWorker) asyncCreateNewStream() { + go func() { + log.Infof("Worker %s: Starting async stream creation", s.workerID) + + var result streamCreationResult + + // Ensure the connection is ready, can block up to connectionTimeout + err := s.ensureConnectionReady() + if err != nil { + log.Errorf("Worker %s: Async stream creation failed (connection failure) %v", s.workerID, err) + result = streamCreationResult{info: nil, err: err} + } else { + // Create per-stream context derived from destinations context + streamCtx, streamCancel := context.WithCancel(s.destinationsContext.Context()) + + // Create the stream, shouldn't block at this point. + stream, err := s.client.LogsStream(streamCtx) + + if err != nil { + streamCancel() + log.Errorf("Worker %s: Async stream creation failed (post-connection): %v", s.workerID, err) + result = streamCreationResult{info: nil, err: err} + } else { + log.Infof("Worker %s: Async stream creation succeeded", s.workerID) + result = streamCreationResult{ + info: &streamInfo{ + stream: stream, + ctx: streamCtx, + cancel: streamCancel, + }, + err: nil, + } + } + } + + // Signal result to supervisor (blocks until received or stopped) + select { + case s.streamReadyCh <- result: + case <-s.stopChan: + // Worker stopped before supervisor could receive result + // We own cleanup since supervisor never got the stream + if result.info != nil { + s.closeStream(result.info) + } + } + }() +} + +func (s *streamWorker) ensureConnectionReady() error { + // Skip connection check if conn is nil (for testing with mock clients) + if s.conn == nil { + return nil + } + + connCtx, cancel := context.WithTimeout(s.destinationsContext.Context(), connectionTimeout) + defer cancel() + + // Nudge dialing if idle; doesn't block + s.conn.Connect() + + for { + state := s.conn.GetState() + switch state { + case connectivity.Ready: + return nil + case connectivity.Shutdown: + return errors.New("gRPC conn is shutdown") + } + // Wait for state change or timeout/cancel. + if !s.conn.WaitForStateChange(connCtx, state) { + // context done (timeout or cancellation) + return connCtx.Err() + } + } +} + +// closeStream safely closes a stream and cancels its context +func (s *streamWorker) closeStream(streamInfo *streamInfo) { + if streamInfo != nil { + if err := streamInfo.stream.CloseSend(); err != nil { + log.Debugf("Worker %s: Error closing stream send: %v", s.workerID, err) + } + streamInfo.cancel() + } +} + +// receiverLoop runs in the receiver goroutine to process server responses for a specific stream +// The receiver is stateless - it only forwards acks/errors to the supervisor +// This goroutine exits when the stream fails (after signaling the supervisor) +func (s *streamWorker) receiverLoop(streamInfo *streamInfo) { + stream := streamInfo.stream + for { + msg, err := stream.Recv() + if err == nil { + // Normal message (batch acknowledgment) - forward to supervisor + s.signalBatchAck(streamInfo, msg) + continue + } + + // Clean inbound close (server OK in trailers): policy = signal receiver failure + if errors.Is(err, io.EOF) { + log.Warnf("Worker %s: Stream closed by server", s.workerID) + s.signalRecvFailure(streamInfo) + return + } + + // Local cancel/deadline (supervisor rotated, worker shutdown): just exit + ctxErr := streamInfo.ctx.Err() + if errors.Is(ctxErr, context.Canceled) || errors.Is(ctxErr, context.DeadlineExceeded) { + log.Infof("Worker %s: Stream context cancelled, receiver exiting", s.workerID) + return + } + + // Stream-level gRPC status (non-OK): RPC is over → signal receiver failure or block terminal + if st, ok := status.FromError(err); ok { + log.Warnf("Worker %s: recv: gRPC error (code %v): %v", s.workerID, st.Code(), err) + + switch st.Code() { + case codes.Unauthenticated, codes.PermissionDenied: + // Terminal until fixed; do not signal receiver failure here + s.handleIrrecoverableError("auth/perm: "+st.Message(), streamInfo) + return + case codes.InvalidArgument, codes.FailedPrecondition, codes.OutOfRange, codes.Unimplemented: + // Terminal protocol/semantic issue; do not signal receiver failure + s.handleIrrecoverableError("protocol: "+st.Message(), streamInfo) + return + default: + // All other non-OK statuses: signal receiver failure + s.signalRecvFailure(streamInfo) + return + } + } + + // Transport error without status (RST/GOAWAY/TLS, socket close): signal receiver failure + log.Warnf("Worker %s: Transport error: %v", s.workerID, err) + s.signalRecvFailure(streamInfo) + return + } +} + +// signalRecvFailure signals the supervisor to rotate the stream +func (s *streamWorker) signalRecvFailure(streamInfo *streamInfo) { + // This signaling is blocking by design, it's okey to block the receiver, + // since the only way we get here is through an irrecoverable error. + select { + case s.recvFailureCh <- streamInfo: + case <-s.stopChan: + } +} + +// signalBatchAck forwards a batch acknowledgment to the supervisor +// If the worker is stopped, returns without delivering (shutdown is in progress anyway) +func (s *streamWorker) signalBatchAck(streamInfo *streamInfo, msg *statefulpb.BatchStatus) { + select { + case s.batchAckCh <- &batchAck{stream: streamInfo, status: msg}: + case <-s.stopChan: + } +} + +// handleIrrecoverableError are errors that shouldn't be retried, and ideally +// should be block the ingestion, until the error is resolved. +func (s *streamWorker) handleIrrecoverableError(_ string, streamInfo *streamInfo) { + // Currently this is treated as stream error, which will trigger a stream rotation + // and retry of the same payload, which loops on. this IS NOT the desired behavior. + // TODO: Implement proper handling of irrecoverable errors, by blocking the ingestion + s.signalRecvFailure(streamInfo) +} + +// createBatch creates a StatefulBatch from serialized data and batch ID +func createBatch(data []byte, batchID uint32) *statefulpb.StatefulBatch { + return &statefulpb.StatefulBatch{ + BatchId: batchID, + Data: data, + } +} + +// createStoppedTimer creates a timer that is stopped and has its channel drained +func createStoppedTimer(clk clock.Clock, d time.Duration) *clock.Timer { + t := clk.Timer(d) + if !t.Stop() { + <-t.C + } + return t +} diff --git a/pkg/logs/sender/grpc/stream_worker_test.go b/pkg/logs/sender/grpc/stream_worker_test.go new file mode 100644 index 000000000000..59c5114646e2 --- /dev/null +++ b/pkg/logs/sender/grpc/stream_worker_test.go @@ -0,0 +1,1053 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package grpc + +import ( + "context" + "errors" + "io" + "sync" + "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" +) + +const ( + testTimeout = 100 * time.Millisecond + testTickInterval = 10 * time.Millisecond + testShortWait = 50 * time.Millisecond +) + +// mockSink implements sender.Sink for testing +type mockSink struct { + outputChan chan *message.Payload +} + +func newMockSink() *mockSink { + return &mockSink{ + outputChan: make(chan *message.Payload, 100), + } +} + +func (m *mockSink) Channel() chan *message.Payload { + return m.outputChan +} + +// mockLogsStream implements StatefulLogsService_LogsStreamClient for testing +type mockLogsStream struct { + grpc.ClientStream + + mu sync.Mutex + + // Channels for communication + sendCh chan *statefulpb.StatefulBatch // Batches sent by client + recvCh chan *statefulpb.BatchStatus // Acks to send to client + errCh chan error // To inject immediate errors in Recv() + + // Error control + sendErr error // If set, next Send() will return this error + recvErr error // If set, next Recv() will return this error + + // Track sent batches + sentBatches []*statefulpb.StatefulBatch + + // Context + ctx context.Context +} + +func newMockLogsStream(ctx context.Context) *mockLogsStream { + return &mockLogsStream{ + sendCh: make(chan *statefulpb.StatefulBatch, 100), + recvCh: make(chan *statefulpb.BatchStatus, 100), + errCh: make(chan error, 1), + sentBatches: make([]*statefulpb.StatefulBatch, 0), + ctx: ctx, + } +} + +func (m *mockLogsStream) Send(batch *statefulpb.StatefulBatch) error { + m.mu.Lock() + if m.sendErr != nil { + err := m.sendErr + m.mu.Unlock() + return err + } + m.mu.Unlock() + + select { + case m.sendCh <- batch: + m.mu.Lock() + m.sentBatches = append(m.sentBatches, batch) + m.mu.Unlock() + return nil + case <-m.ctx.Done(): + return m.ctx.Err() + } +} + +func (m *mockLogsStream) Recv() (*statefulpb.BatchStatus, error) { + m.mu.Lock() + if m.recvErr != nil { + err := m.recvErr + m.mu.Unlock() + return nil, err + } + m.mu.Unlock() + + select { + case ack := <-m.recvCh: + return ack, nil + case err := <-m.errCh: + return nil, err + case <-m.ctx.Done(): + return nil, m.ctx.Err() + } +} + +func (m *mockLogsStream) CloseSend() error { + return nil +} + +// Helper to set send error +func (m *mockLogsStream) setSendError(err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.sendErr = err +} + +// Helper to send an ack to the client +func (m *mockLogsStream) sendAck(batchID int32) { + m.recvCh <- &statefulpb.BatchStatus{ + BatchId: batchID, + } +} + +// Helper to inject an error immediately (unblocks Recv()) +func (m *mockLogsStream) injectRecvError(err error) { + m.errCh <- err +} + +// Helper to get sent batch count +func (m *mockLogsStream) getSentBatchCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return len(m.sentBatches) +} + +// Helper to get a specific sent batch by index +func (m *mockLogsStream) getSentBatch(index int) *statefulpb.StatefulBatch { + m.mu.Lock() + defer m.mu.Unlock() + if index < 0 || index >= len(m.sentBatches) { + return nil + } + return m.sentBatches[index] +} + +// mockLogsClient implements StatefulLogsServiceClient for testing +type mockLogsClient struct { + mu sync.Mutex + + // Control stream creation + createStreamErr error // If set, LogsStream() will return this error + failStreamCreationFor int // Fail the next N stream creation attempts + currentStream *mockLogsStream + streamCtx context.Context + streamCancel context.CancelFunc +} + +func newMockLogsClient() *mockLogsClient { + return &mockLogsClient{} +} + +func (m *mockLogsClient) LogsStream(ctx context.Context, _ ...grpc.CallOption) (statefulpb.StatefulLogsService_LogsStreamClient, error) { + m.mu.Lock() + defer m.mu.Unlock() + + // Check counter-based failure first + if m.failStreamCreationFor > 0 { + m.failStreamCreationFor-- + err := m.createStreamErr + // Clear error when counter reaches 0 + if m.failStreamCreationFor == 0 { + m.createStreamErr = nil + } + return nil, err + } + + // Check error-based failure (only if counter is not in use) + if m.createStreamErr != nil { + return nil, m.createStreamErr + } + + // Create a new stream with a child context + m.streamCtx, m.streamCancel = context.WithCancel(ctx) + m.currentStream = newMockLogsStream(m.streamCtx) + return m.currentStream, nil +} + +// Helper to fail the next N stream creation attempts +func (m *mockLogsClient) failNextStreamCreations(count int, err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.failStreamCreationFor = count + m.createStreamErr = err +} + +// Helper to get current stream +func (m *mockLogsClient) getCurrentStream() *mockLogsStream { + m.mu.Lock() + defer m.mu.Unlock() + return m.currentStream +} + +// testFixture holds all the components needed for testing +type testFixture struct { + t *testing.T + mockClock *clock.Mock + mockClient *mockLogsClient + mockSink *mockSink + inputChan chan *message.Payload + outputChan chan *message.Payload + destCtx *client.DestinationsContext + endpoint config.Endpoint + streamLifetime time.Duration + worker *streamWorker +} + +// newTestFixture creates all the test infrastructure +func newTestFixture(t *testing.T) *testFixture { + // Create mock client + mockClient := newMockLogsClient() + + // Create mock sink + mockSink := newMockSink() + + // Create input channel + inputChan := make(chan *message.Payload, 100) + + // Create mock destination context + destCtx := client.NewDestinationsContext() + destCtx.Start() + + // Create endpoint config with test backoff settings + endpoint := config.Endpoint{ + BackoffFactor: 2.0, + BackoffBase: 1.0, + BackoffMax: 10.0, + RecoveryInterval: 2, + RecoveryReset: false, + } + + // Create mock clock + mockClock := clock.NewMock() + + fixture := &testFixture{ + t: t, + mockClock: mockClock, + mockClient: mockClient, + mockSink: mockSink, + inputChan: inputChan, + outputChan: mockSink.outputChan, + destCtx: destCtx, + endpoint: endpoint, + streamLifetime: 10 * time.Second, + } + + return fixture +} + +// createWorker creates a streamWorker with the fixture's components +func (f *testFixture) createWorker() *streamWorker { + return f.createWorkerWithInflight(nil) // nil = use default maxInflight +} + +// createWorkerWithInflight creates a streamWorker with custom inflight capacity for testing +func (f *testFixture) createWorkerWithInflight(inflight *inflightTracker) *streamWorker { + worker := newStreamWorkerWithClock( + "test-worker", + f.inputChan, + f.destCtx, + nil, // conn not needed with mock client + f.mockClient, + f.mockSink, + f.endpoint, + f.streamLifetime, + f.mockClock, + inflight, + ) + f.worker = worker + return worker +} + +// cleanup shuts down all resources +func (f *testFixture) cleanup() { + if f.worker != nil { + // Check if worker is still running before stopping + select { + case <-f.worker.done: + // Already stopped + default: + f.worker.stop() + } + } + if f.destCtx != nil { + f.destCtx.Stop() + } +} + +// Helper to create test payload for stream worker tests +func createWorkerTestPayload(content string) *message.Payload { + return &message.Payload{ + Encoded: []byte(content), + MessageMetas: []*message.MessageMetadata{ + { + RawDataLen: len(content), + }, + }, + } +} + +// TestStreamWorkerBasicStartStop tests the basic lifecycle +func TestStreamWorkerBasicStartStop(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + + // Start the worker + worker.start() + + // Wait for stream to become active (mocked stream creation should be quick) + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval, "Worker should transition to active state") + + // Verify stream was created + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream, "Stream should be created") + + // Stop the worker + worker.stop() + + // Verify clean shutdown + select { + case <-worker.done: + // Success + case <-time.After(testTimeout): + t.Fatal("Worker did not shut down in time") + } +} + +// TestStreamWorkerSendReceive tests basic message flow from input to output +func TestStreamWorkerSendReceive(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream) + + // Send one message + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for message to be sent to stream + require.Eventually(t, func() bool { + return stream.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Send ack for batch 1 + stream.sendAck(1) + + // Verify message appears in output channel + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerReceiverFailureRotation tests stream rotation on receiver failure +// with an inflight message that gets re-sent on the new stream +func TestStreamWorkerReceiverFailureRotation(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Send 1 message + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for message to be sent to stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Give receiverLoop time to enter Recv() and block + time.Sleep(testShortWait) + + // Inject receiver error immediately (this unblocks Recv() and triggers stream rotation) + // Note: We do NOT send an ack, so the message stays inflight + stream1.injectRecvError(io.EOF) + + // Wait for rotation to complete (stream changes and state is active again) + // Note: Rotation is very fast with mocks, so we just check for the new stream + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete stream rotation with new stream") + + // The inflight message should be re-sent on the new stream (after rotation reset, it's batch 1 again) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream") + + // Send ack for batch 1 on new stream + stream2.sendAck(1) + + // Verify message appears in output channel + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack on new stream") + } +} + +// TestStreamWorkerStreamTimeout tests stream rotation triggered by stream timer expiration +func TestStreamWorkerStreamTimeout(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Advance clock past stream lifetime to trigger stream timeout + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + // Wait for rotation to complete (new stream created and active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should rotate to new stream after timer expires") + + // Send a message on the new stream + payload := createWorkerTestPayload("test on stream2") + fixture.inputChan <- payload + + // Wait for message to be sent on stream2 + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be sent on new stream") + + // Send ack + stream2.sendAck(1) + + // Verify message appears in output + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerStreamTimeoutWithDrain tests graceful rotation when stream timer expires with inflight messages +func TestStreamWorkerStreamTimeoutWithDrain(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Step 1: Send 1 message on stream1, don't send ack + payload1 := createWorkerTestPayload("message 1") + fixture.inputChan <- payload1 + + // Wait for message to be sent on stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Step 2 & 3: Advance clock to trigger stream timeout, verify draining state + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + // Should transition to draining (not connecting) because there's an unacked message + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Should transition to draining state with unacked messages") + + // Step 4: Send another message, verify it's buffered (NOT sent on stream1) + payload2 := createWorkerTestPayload("message 2") + fixture.inputChan <- payload2 + + // Give time for message to be processed if it was going to be sent + time.Sleep(testShortWait) + + // stream1 should still only have 1 batch sent + require.Equal(t, 1, stream1.getSentBatchCount(), "Message 2 should be buffered, not sent on stream1") + + // Step 5 & 6 & 7: Send ack for batch 1, verify it appears in output + stream1.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload1, output, "First message should appear in output") + case <-time.After(testTimeout): + t.Fatal("Message 1 should appear in outputChan after ack") + } + + // Step 8: Verify stream2 is created (draining → connecting → active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete rotation to new stream after ack received") + + // Step 9: Verify message 2 is sent on stream2 (batch ID resets to 1 after rotation) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Buffered message 2 should be sent on new stream") + + // Send ack for batch 1 on stream2 to verify it's the second message + stream2.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload2, output, "Second message should appear in output") + case <-time.After(testTimeout): + t.Fatal("Message 2 should appear in outputChan after ack on stream2") + } +} + +// TestStreamWorkerDrainTimeout tests forced rotation when drain timer expires without receiving all acks +func TestStreamWorkerDrainTimeout(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Step 1: Send message on stream1, don't send ack (stays inflight) + payload := createWorkerTestPayload("message 1") + fixture.inputChan <- payload + + // Wait for message to be sent on stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Step 2: Advance clock to trigger stream timeout → enter draining + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Should transition to draining state") + + // Step 3: Advance clock to trigger drain timeout (without sending ack) → force rotation + fixture.mockClock.Add(drainTimeout + time.Second) + + // Step 4: Verify stream2 is created (draining → connecting → active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete rotation to new stream after drain timeout") + + // Step 5: Verify batch 1 is re-sent on stream2 (inflight message replayed) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream") + + // Send ack for batch 1 on stream2 + stream2.sendAck(1) + + // Verify message appears in output + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack on new stream") + } +} + +// TestStreamWorkerBackoff tests exponential backoff on stream creation failure +func TestStreamWorkerBackoff(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + + // Configure mock to fail stream creation once, then succeed + testErr := errors.New("simulated stream creation failure") + fixture.mockClient.failNextStreamCreations(1, testErr) + + // Start worker (will attempt to create stream and should fail) + worker.start() + + // Should fail to create stream and enter disconnected state + require.Eventually(t, func() bool { + return worker.streamState == disconnected + }, testTimeout, testTickInterval, "Should transition to disconnected state after stream creation failure") + + // Verify no stream was created + require.Nil(t, fixture.mockClient.getCurrentStream(), "No stream should be created on error") + + // Advance clock gradually to trigger backoff timer and verify stream is established + // For first error, backoff is between 1-2 seconds (base=1s, factor=2, jitter) + var stream *mockLogsStream + require.Eventually(t, func() bool { + fixture.mockClock.Add(500 * time.Millisecond) + stream = fixture.mockClient.getCurrentStream() + return stream != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Should transition to active state after backoff expires") + + // Verify we can send a message on the new stream + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + require.Eventually(t, func() bool { + return stream.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be sent on new stream") + + stream.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerBackpressure verifies that inputChan blocks when inflight is full +func TestStreamWorkerBackpressure(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + // Use small inflight capacity for fast test + smallInflight := newInflightTracker(5) + worker := fixture.createWorkerWithInflight(smallInflight) + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream) + + // Send 5 messages (don't send acks, so they stay in "sent" state and fill inflight) + for i := 0; i < 5; i++ { + fixture.inputChan <- createWorkerTestPayload("test") + } + + // Wait for inflight to be full + require.Eventually(t, func() bool { + return !worker.inflight.hasSpace() + }, testTimeout, testTickInterval, "Inflight should be full") + + // Verify backpressure: send one more message, it should NOT be consumed + fixture.inputChan <- createWorkerTestPayload("blocked") + time.Sleep(testShortWait) + require.Equal(t, 1, len(fixture.inputChan), "Message should remain in inputChan due to backpressure") + + // Send ack for batch 1 to free up space + stream.sendAck(1) + + // Verify backpressure released: the blocked message should now be consumed + require.Eventually(t, func() bool { + return len(fixture.inputChan) == 0 + }, testTimeout, testTickInterval, "Message should be consumed after ack frees space") +} + +// TestStreamWorkerErrorRecovery tests that Send() and Recv() failures trigger rotation and retry +func TestStreamWorkerErrorRecovery(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for initial stream to be active + var stream1 *mockLogsStream + require.Eventually(t, func() bool { + stream1 = fixture.mockClient.getCurrentStream() + return stream1 != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should reach active state") + + // Inject send error BEFORE sending message + stream1.setSendError(errors.New("simulated send failure")) + + // Send message - this will trigger Send() failure and rotation + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for stream rotation (new stream created) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should rotate to new stream after send error") + + // New stream should have retried the message (batch 1) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be retried on new stream") + + // Send ack on new stream + stream2.sendAck(1) + + // Verify message appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after rotation and ack") + } + + // Part 2: Test injectRecvError with retriable gRPC error + // Inject recv error (codes.Unavailable falls into default case -> rotation) + stream2.injectRecvError(status.Error(codes.Unavailable, "simulated unavailable error")) + + // Send another message + payload2 := createWorkerTestPayload("test message 2") + fixture.inputChan <- payload2 + + // Wait for stream rotation (new stream created) + var stream3 *mockLogsStream + require.Eventually(t, func() bool { + stream3 = fixture.mockClient.getCurrentStream() + return stream3 != nil && stream3 != stream2 && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should rotate to new stream after recv error") + + // New stream should have retried the message (batch 1 - reset after rotation) + require.Eventually(t, func() bool { + return stream3.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be retried on new stream after recv error") + + // Send ack on new stream + stream3.sendAck(1) + + // Verify message appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, payload2, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after recv error rotation and ack") + } +} + +// Helper functions to create Datum objects for testing +func createPatternDefine(id uint64, template string) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{ + PatternDefine: &statefulpb.PatternDefine{ + PatternId: id, + Template: template, + }, + }, + } +} + +func createPatternDelete(id uint64) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDelete{ + PatternDelete: &statefulpb.PatternDelete{ + PatternId: id, + }, + }, + } +} + +func createDictEntryDefine(id uint64, value string) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{ + DictEntryDefine: &statefulpb.DictEntryDefine{ + Id: id, + Value: value, + }, + }, + } +} + +func createDictEntryDelete(id uint64) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDelete{ + DictEntryDelete: &statefulpb.DictEntryDelete{ + Id: id, + }, + }, + } +} + +// createPayloadWithState creates a payload with state changes in StatefulExtra +func createPayloadWithState(content string, stateChanges []*statefulpb.Datum) *message.Payload { + payload := createWorkerTestPayload(content) + if len(stateChanges) > 0 { + payload.StatefulExtra = &StatefulExtra{ + StateChanges: stateChanges, + } + } + return payload +} + +// verifySnapshotContents checks if a snapshot batch contains the expected state +func verifySnapshotContents(t *testing.T, batch *statefulpb.StatefulBatch, expectedPatterns map[uint64]string, expectedDictEntries map[uint64]string) { + require.NotNil(t, batch) + require.Equal(t, uint32(0), batch.BatchId, "Snapshot should have batch ID 0") + + // Deserialize the snapshot data (it's a DatumSequence) + var datumSeq statefulpb.DatumSequence + err := proto.Unmarshal(batch.Data, &datumSeq) + require.NoError(t, err) + + // Count what we find + foundPatterns := make(map[uint64]string) + foundDictEntries := make(map[uint64]string) + + for _, datum := range datumSeq.Data { + switch d := datum.Data.(type) { + case *statefulpb.Datum_PatternDefine: + foundPatterns[d.PatternDefine.PatternId] = d.PatternDefine.Template + case *statefulpb.Datum_DictEntryDefine: + foundDictEntries[d.DictEntryDefine.Id] = d.DictEntryDefine.Value + default: + t.Fatalf("Snapshot should only contain PatternDefine and DictEntryDefine, got: %T", datum.Data) + } + } + + require.Equal(t, expectedPatterns, foundPatterns, "Snapshot patterns mismatch") + require.Equal(t, expectedDictEntries, foundDictEntries, "Snapshot dict entries mismatch") +} + +// TestStreamWorkerSnapshot tests the snapshot functionality across stream rotations +func TestStreamWorkerSnapshot(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + // Override stream lifetime for this test + fixture.streamLifetime = time.Second + worker := fixture.createWorker() + worker.start() + + // Wait for initial stream to be ready + var stream1 *mockLogsStream + require.Eventually(t, func() bool { + stream1 = fixture.mockClient.getCurrentStream() + return stream1 != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Initial stream should be established") + + // === Step 1: Send Batch 1 (5 entries) === + batch1StateChanges := []*statefulpb.Datum{ + createPatternDefine(1, "pattern1"), + createDictEntryDefine(1, "value1"), + createPatternDefine(2, "pattern2"), + createDictEntryDefine(2, "value2"), + } + batch1Payload := createPayloadWithState("log with p1/d1", batch1StateChanges) + fixture.inputChan <- batch1Payload + + // Wait for batch 1 to be sent + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Batch 1 should be sent") + + // === Step 2: Ack Batch 1 === + stream1.sendAck(1) + + // Verify batch 1 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch1Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 1 should appear in outputChan") + } + + // === Step 3: Send Batch 2 (6 entries) === + batch2StateChanges := []*statefulpb.Datum{ + createPatternDelete(1), + createDictEntryDelete(1), + createPatternDefine(3, "pattern3"), + createDictEntryDefine(3, "value3"), + } + batch2Payload := createPayloadWithState("log with p2/d2 and p3/d3", batch2StateChanges) + fixture.inputChan <- batch2Payload + + // Wait for batch 2 to be sent + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 2 + }, testTimeout, testTickInterval, "Batch 2 should be sent") + + // === Step 4: Cut stream with recv failure (before acking batch 2) === + stream1.injectRecvError(io.EOF) + + // Wait for stream rotation + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Stream should rotate after recv failure") + + // === Step 5: Verify snapshot on new stream === + // Snapshot should contain state BEFORE batch 2: {p1, p2, d1, d2} + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() >= 1 + }, testTimeout, testTickInterval, "Snapshot should be sent on new stream") + + snapshotBatch := stream2.getSentBatch(0) + expectedPatterns1 := map[uint64]string{ + 1: "pattern1", + 2: "pattern2", + } + expectedDictEntries1 := map[uint64]string{ + 1: "value1", + 2: "value2", + } + verifySnapshotContents(t, snapshotBatch, expectedPatterns1, expectedDictEntries1) + + // === Step 6: Ack snapshot (batch 0) === + stream2.sendAck(0) + + // === Step 7: Verify Batch 2 is retransmitted === + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 2 + }, testTimeout, testTickInterval, "Batch 2 should be retransmitted") + + batch2Retransmitted := stream2.getSentBatch(1) + require.Equal(t, uint32(1), batch2Retransmitted.BatchId) + + // === Step 8: Ack Batch 2 === + stream2.sendAck(1) + + // Verify batch 2 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch2Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 2 should appear in outputChan") + } + + // === Step 9: Send Batch 3 (3 entries) === + batch3StateChanges := []*statefulpb.Datum{ + createPatternDefine(4, "pattern4"), + createDictEntryDefine(4, "value4"), + } + batch3Payload := createPayloadWithState("log with p4/d4", batch3StateChanges) + fixture.inputChan <- batch3Payload + + // Wait for batch 3 to be sent + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 3 + }, testTimeout, testTickInterval, "Batch 3 should be sent") + + // === Step 10: Stream timer expires === + fixture.mockClock.Add(time.Second) + + // Worker should enter draining state (batch 3 is still inflight) + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Worker should enter draining state") + + // === Step 11: Drain timer expires (force rotation) === + fixture.mockClock.Add(5 * time.Second) // drainTimeout is 5 seconds + + // Wait for new stream to be created + var stream3 *mockLogsStream + require.Eventually(t, func() bool { + stream3 = fixture.mockClient.getCurrentStream() + return stream3 != nil && stream3 != stream2 && worker.streamState == active + }, testTimeout, testTickInterval, "Stream should rotate after drain timeout") + + // === Step 12: Verify snapshot on new stream === + // Snapshot should contain state AFTER batch 2, BEFORE batch 3: {p2, p3, d2, d3} + // (p1/d1 were deleted in batch 2) + require.Eventually(t, func() bool { + return stream3.getSentBatchCount() >= 1 + }, testTimeout, testTickInterval, "Snapshot should be sent on new stream") + + snapshotBatch2 := stream3.getSentBatch(0) + expectedPatterns2 := map[uint64]string{ + 2: "pattern2", + 3: "pattern3", + } + expectedDictEntries2 := map[uint64]string{ + 2: "value2", + 3: "value3", + } + verifySnapshotContents(t, snapshotBatch2, expectedPatterns2, expectedDictEntries2) + + // Ack snapshot and batch 3 + stream3.sendAck(0) + stream3.sendAck(1) + + // Verify batch 3 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch3Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 3 should appear in outputChan") + } +} diff --git a/pkg/logs/sender/grpc/streamstate_string.go b/pkg/logs/sender/grpc/streamstate_string.go new file mode 100644 index 000000000000..6081ed22284e --- /dev/null +++ b/pkg/logs/sender/grpc/streamstate_string.go @@ -0,0 +1,27 @@ +// Code generated by "stringer -type=streamState"; DO NOT EDIT. + +package grpc + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[disconnected-0] + _ = x[connecting-1] + _ = x[active-2] + _ = x[draining-3] +} + +const _streamState_name = "disconnectedconnectingactivedraining" + +var _streamState_index = [...]uint8{0, 12, 22, 28, 36} + +func (i streamState) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_streamState_index)-1 { + return "streamState(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _streamState_name[_streamState_index[idx]:_streamState_index[idx+1]] +} diff --git a/pkg/logs/sender/message_buffer.go b/pkg/logs/sender/message_buffer.go index ad2112193eaa..b1185e16ade7 100644 --- a/pkg/logs/sender/message_buffer.go +++ b/pkg/logs/sender/message_buffer.go @@ -28,8 +28,15 @@ func NewMessageBuffer(batchSizeLimit int, contentSizeLimit int) *MessageBuffer { // returns true if the message was added. func (p *MessageBuffer) AddMessage(message *message.Message) bool { contentSize := len(message.GetContent()) + return p.AddMessageWithSize(&message.MessageMetadata, contentSize) +} + +// AddMessageWithSize adds a message to the buffer if there is still some free space, +// returns true if the message was added. +// As input it takes directly metadata and content size, instead of a message. +func (p *MessageBuffer) AddMessageWithSize(metadata *message.MessageMetadata, contentSize int) bool { if len(p.messageBuffer) < cap(p.messageBuffer) && p.contentSize+contentSize <= p.contentSizeLimit { - meta := message.MessageMetadata // Copy metadata instead of taking reference + meta := *metadata // Copy metadata instead of taking reference p.messageBuffer = append(p.messageBuffer, &meta) p.contentSize += contentSize return true diff --git a/pkg/proto/datadog/stateful/stateful_encoding.proto b/pkg/proto/datadog/stateful/stateful_encoding.proto new file mode 100644 index 000000000000..6696e971b163 --- /dev/null +++ b/pkg/proto/datadog/stateful/stateful_encoding.proto @@ -0,0 +1,122 @@ +syntax = "proto3"; + +package datadog.intake.stateful; + +option go_package = "pkg/proto/pbgo/statefulpb"; + +// --------------------------------------------------------------------------- +// Dictionary-encoded +// --------------------------------------------------------------------------- + +message DictEntryDefine { + uint64 id = 1; + string value = 2; +} + +message DictEntryDelete { + uint64 id = 1; +} + +// --------------------------------------------------------------------------- +// Pattern dictionary +// --------------------------------------------------------------------------- + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +message PatternDefine { + uint64 pattern_id = 1; + string template = 2; + uint32 param_count = 3; + repeated uint32 pos_list = 4; +} + +message PatternDelete { + uint64 pattern_id = 1; +} + +// --------------------------------------------------------------------------- +// Log payload +// --------------------------------------------------------------------------- + +message Tag { + DynamicValue key = 1; + DynamicValue value = 2; +} + +message Log { + uint64 timestamp = 1; + oneof content { + StructuredLog structured = 2; + string raw = 3; + } + // TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream + // state and auto-populate them downstream. + // Required tags: `service`, `hostname`, + // Other tags on agent payload: `status`, `source` + // All other tags are sent as `ddtags` + repeated Tag tags = 4; +} + +message StructuredLog { + uint64 pattern_id = 1; + repeated DynamicValue dynamic_values = 2; +} + +// TODO not sure we need numeric type +message DynamicValue { + oneof value { + int64 int_value = 1; + double float_value = 2; + string string_value = 3; + uint64 dict_index = 4; + } +} + +// --------------------------------------------------------------------------- +// Streaming envelope +// --------------------------------------------------------------------------- + +message Datum { + oneof data { + PatternDefine pattern_define = 1; + PatternDelete pattern_delete = 2; + DictEntryDefine dict_entry_define = 3; + DictEntryDelete dict_entry_delete = 4; + Log logs = 5; + } +} + +// DatumSequence wraps a sequence of Datum messages +// Used for serialization in application-level compression +message DatumSequence { + repeated Datum data = 1; +} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +message StatefulBatch { + uint32 batch_id = 1; + + // Bytes of a serialized DatumSequence. Eventually this will also be compressed. + // This allows for Datums to be compressed while they are buffered in memory before being acked by the server. + bytes data = 2; +} + +message BatchStatus { + uint32 batch_id = 1; + + // TODO: only OK is used right now - should we just remove this enum? + enum Status { + UNKNOWN = 0; + OK = 1; + } + Status status = 2; +} + +// --------------------------------------------------------------------------- +// gRPC service definition (bi-directional streaming) +// --------------------------------------------------------------------------- + +service StatefulLogsService { + rpc LogsStream(stream StatefulBatch) returns (stream BatchStatus); +} diff --git a/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go new file mode 100644 index 000000000000..c70bb84bea12 --- /dev/null +++ b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go @@ -0,0 +1,1159 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.10 +// protoc v5.29.3 +// source: datadog/stateful/stateful_encoding.proto + +package statefulpb + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// TODO: only OK is used right now - should we just remove this enum? +type BatchStatus_Status int32 + +const ( + BatchStatus_UNKNOWN BatchStatus_Status = 0 + BatchStatus_OK BatchStatus_Status = 1 +) + +// Enum value maps for BatchStatus_Status. +var ( + BatchStatus_Status_name = map[int32]string{ + 0: "UNKNOWN", + 1: "OK", + } + BatchStatus_Status_value = map[string]int32{ + "UNKNOWN": 0, + "OK": 1, + } +) + +func (x BatchStatus_Status) Enum() *BatchStatus_Status { + p := new(BatchStatus_Status) + *p = x + return p +} + +func (x BatchStatus_Status) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor { + return file_datadog_stateful_stateful_encoding_proto_enumTypes[0].Descriptor() +} + +func (BatchStatus_Status) Type() protoreflect.EnumType { + return &file_datadog_stateful_stateful_encoding_proto_enumTypes[0] +} + +func (x BatchStatus_Status) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use BatchStatus_Status.Descriptor instead. +func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11, 0} +} + +type DictEntryDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDefine) Reset() { + *x = DictEntryDefine{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDefine) ProtoMessage() {} + +func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead. +func (*DictEntryDefine) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{0} +} + +func (x *DictEntryDefine) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *DictEntryDefine) GetValue() string { + if x != nil { + return x.Value + } + return "" +} + +type DictEntryDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDelete) Reset() { + *x = DictEntryDelete{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDelete) ProtoMessage() {} + +func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead. +func (*DictEntryDelete) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{1} +} + +func (x *DictEntryDelete) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +type PatternDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + Template string `protobuf:"bytes,2,opt,name=template,proto3" json:"template,omitempty"` + ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` + PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDefine) Reset() { + *x = PatternDefine{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDefine) ProtoMessage() {} + +func (x *PatternDefine) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead. +func (*PatternDefine) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{2} +} + +func (x *PatternDefine) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *PatternDefine) GetTemplate() string { + if x != nil { + return x.Template + } + return "" +} + +func (x *PatternDefine) GetParamCount() uint32 { + if x != nil { + return x.ParamCount + } + return 0 +} + +func (x *PatternDefine) GetPosList() []uint32 { + if x != nil { + return x.PosList + } + return nil +} + +type PatternDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDelete) Reset() { + *x = PatternDelete{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDelete) ProtoMessage() {} + +func (x *PatternDelete) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead. +func (*PatternDelete) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{3} +} + +func (x *PatternDelete) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +type Tag struct { + state protoimpl.MessageState `protogen:"open.v1"` + Key *DynamicValue `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Value *DynamicValue `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Tag) Reset() { + *x = Tag{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Tag) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Tag) ProtoMessage() {} + +func (x *Tag) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Tag.ProtoReflect.Descriptor instead. +func (*Tag) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{4} +} + +func (x *Tag) GetKey() *DynamicValue { + if x != nil { + return x.Key + } + return nil +} + +func (x *Tag) GetValue() *DynamicValue { + if x != nil { + return x.Value + } + return nil +} + +type Log struct { + state protoimpl.MessageState `protogen:"open.v1"` + Timestamp uint64 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + // Types that are valid to be assigned to Content: + // + // *Log_Structured + // *Log_Raw + Content isLog_Content `protobuf_oneof:"content"` + // TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream + // state and auto-populate them downstream. + // Required tags: `service`, `hostname`, + // Other tags on agent payload: `status`, `source` + // All other tags are sent as `ddtags` + Tags []*Tag `protobuf:"bytes,4,rep,name=tags,proto3" json:"tags,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Log) Reset() { + *x = Log{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Log) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Log) ProtoMessage() {} + +func (x *Log) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Log.ProtoReflect.Descriptor instead. +func (*Log) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{5} +} + +func (x *Log) GetTimestamp() uint64 { + if x != nil { + return x.Timestamp + } + return 0 +} + +func (x *Log) GetContent() isLog_Content { + if x != nil { + return x.Content + } + return nil +} + +func (x *Log) GetStructured() *StructuredLog { + if x != nil { + if x, ok := x.Content.(*Log_Structured); ok { + return x.Structured + } + } + return nil +} + +func (x *Log) GetRaw() string { + if x != nil { + if x, ok := x.Content.(*Log_Raw); ok { + return x.Raw + } + } + return "" +} + +func (x *Log) GetTags() []*Tag { + if x != nil { + return x.Tags + } + return nil +} + +type isLog_Content interface { + isLog_Content() +} + +type Log_Structured struct { + Structured *StructuredLog `protobuf:"bytes,2,opt,name=structured,proto3,oneof"` +} + +type Log_Raw struct { + Raw string `protobuf:"bytes,3,opt,name=raw,proto3,oneof"` +} + +func (*Log_Structured) isLog_Content() {} + +func (*Log_Raw) isLog_Content() {} + +type StructuredLog struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + DynamicValues []*DynamicValue `protobuf:"bytes,2,rep,name=dynamic_values,json=dynamicValues,proto3" json:"dynamic_values,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StructuredLog) Reset() { + *x = StructuredLog{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StructuredLog) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StructuredLog) ProtoMessage() {} + +func (x *StructuredLog) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. +func (*StructuredLog) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{6} +} + +func (x *StructuredLog) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *StructuredLog) GetDynamicValues() []*DynamicValue { + if x != nil { + return x.DynamicValues + } + return nil +} + +// TODO not sure we need numeric type +type DynamicValue struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Value: + // + // *DynamicValue_IntValue + // *DynamicValue_FloatValue + // *DynamicValue_StringValue + // *DynamicValue_DictIndex + Value isDynamicValue_Value `protobuf_oneof:"value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DynamicValue) Reset() { + *x = DynamicValue{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DynamicValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DynamicValue) ProtoMessage() {} + +func (x *DynamicValue) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. +func (*DynamicValue) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{7} +} + +func (x *DynamicValue) GetValue() isDynamicValue_Value { + if x != nil { + return x.Value + } + return nil +} + +func (x *DynamicValue) GetIntValue() int64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_IntValue); ok { + return x.IntValue + } + } + return 0 +} + +func (x *DynamicValue) GetFloatValue() float64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_FloatValue); ok { + return x.FloatValue + } + } + return 0 +} + +func (x *DynamicValue) GetStringValue() string { + if x != nil { + if x, ok := x.Value.(*DynamicValue_StringValue); ok { + return x.StringValue + } + } + return "" +} + +func (x *DynamicValue) GetDictIndex() uint64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_DictIndex); ok { + return x.DictIndex + } + } + return 0 +} + +type isDynamicValue_Value interface { + isDynamicValue_Value() +} + +type DynamicValue_IntValue struct { + IntValue int64 `protobuf:"varint,1,opt,name=int_value,json=intValue,proto3,oneof"` +} + +type DynamicValue_FloatValue struct { + FloatValue float64 `protobuf:"fixed64,2,opt,name=float_value,json=floatValue,proto3,oneof"` +} + +type DynamicValue_StringValue struct { + StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"` +} + +type DynamicValue_DictIndex struct { + DictIndex uint64 `protobuf:"varint,4,opt,name=dict_index,json=dictIndex,proto3,oneof"` +} + +func (*DynamicValue_IntValue) isDynamicValue_Value() {} + +func (*DynamicValue_FloatValue) isDynamicValue_Value() {} + +func (*DynamicValue_StringValue) isDynamicValue_Value() {} + +func (*DynamicValue_DictIndex) isDynamicValue_Value() {} + +type Datum struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Data: + // + // *Datum_PatternDefine + // *Datum_PatternDelete + // *Datum_DictEntryDefine + // *Datum_DictEntryDelete + // *Datum_Logs + Data isDatum_Data `protobuf_oneof:"data"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Datum) Reset() { + *x = Datum{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Datum) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Datum) ProtoMessage() {} + +func (x *Datum) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Datum.ProtoReflect.Descriptor instead. +func (*Datum) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{8} +} + +func (x *Datum) GetData() isDatum_Data { + if x != nil { + return x.Data + } + return nil +} + +func (x *Datum) GetPatternDefine() *PatternDefine { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDefine); ok { + return x.PatternDefine + } + } + return nil +} + +func (x *Datum) GetPatternDelete() *PatternDelete { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDelete); ok { + return x.PatternDelete + } + } + return nil +} + +func (x *Datum) GetDictEntryDefine() *DictEntryDefine { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDefine); ok { + return x.DictEntryDefine + } + } + return nil +} + +func (x *Datum) GetDictEntryDelete() *DictEntryDelete { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDelete); ok { + return x.DictEntryDelete + } + } + return nil +} + +func (x *Datum) GetLogs() *Log { + if x != nil { + if x, ok := x.Data.(*Datum_Logs); ok { + return x.Logs + } + } + return nil +} + +type isDatum_Data interface { + isDatum_Data() +} + +type Datum_PatternDefine struct { + PatternDefine *PatternDefine `protobuf:"bytes,1,opt,name=pattern_define,json=patternDefine,proto3,oneof"` +} + +type Datum_PatternDelete struct { + PatternDelete *PatternDelete `protobuf:"bytes,2,opt,name=pattern_delete,json=patternDelete,proto3,oneof"` +} + +type Datum_DictEntryDefine struct { + DictEntryDefine *DictEntryDefine `protobuf:"bytes,3,opt,name=dict_entry_define,json=dictEntryDefine,proto3,oneof"` +} + +type Datum_DictEntryDelete struct { + DictEntryDelete *DictEntryDelete `protobuf:"bytes,4,opt,name=dict_entry_delete,json=dictEntryDelete,proto3,oneof"` +} + +type Datum_Logs struct { + Logs *Log `protobuf:"bytes,5,opt,name=logs,proto3,oneof"` +} + +func (*Datum_PatternDefine) isDatum_Data() {} + +func (*Datum_PatternDelete) isDatum_Data() {} + +func (*Datum_DictEntryDefine) isDatum_Data() {} + +func (*Datum_DictEntryDelete) isDatum_Data() {} + +func (*Datum_Logs) isDatum_Data() {} + +// DatumSequence wraps a sequence of Datum messages +// Used for serialization in application-level compression +type DatumSequence struct { + state protoimpl.MessageState `protogen:"open.v1"` + Data []*Datum `protobuf:"bytes,1,rep,name=data,proto3" json:"data,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DatumSequence) Reset() { + *x = DatumSequence{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DatumSequence) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DatumSequence) ProtoMessage() {} + +func (x *DatumSequence) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DatumSequence.ProtoReflect.Descriptor instead. +func (*DatumSequence) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{9} +} + +func (x *DatumSequence) GetData() []*Datum { + if x != nil { + return x.Data + } + return nil +} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +type StatefulBatch struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + // Bytes of a serialized DatumSequence. Eventually this will also be compressed. + // This allows for Datums to be compressed while they are buffered in memory before being acked by the server. + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StatefulBatch) Reset() { + *x = StatefulBatch{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StatefulBatch) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StatefulBatch) ProtoMessage() {} + +func (x *StatefulBatch) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. +func (*StatefulBatch) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10} +} + +func (x *StatefulBatch) GetBatchId() uint32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *StatefulBatch) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +type BatchStatus struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=datadog.intake.stateful.BatchStatus_Status" json:"status,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *BatchStatus) Reset() { + *x = BatchStatus{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *BatchStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BatchStatus) ProtoMessage() {} + +func (x *BatchStatus) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. +func (*BatchStatus) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11} +} + +func (x *BatchStatus) GetBatchId() uint32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *BatchStatus) GetStatus() BatchStatus_Status { + if x != nil { + return x.Status + } + return BatchStatus_UNKNOWN +} + +var File_datadog_stateful_stateful_encoding_proto protoreflect.FileDescriptor + +const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" + + "\n" + + "(datadog/stateful/stateful_encoding.proto\x12\x17datadog.intake.stateful\"7\n" + + "\x0fDictEntryDefine\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + + "\x0fDictEntryDelete\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\"\x86\x01\n" + + "\rPatternDefine\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12\x1a\n" + + "\btemplate\x18\x02 \x01(\tR\btemplate\x12\x1f\n" + + "\vparam_count\x18\x03 \x01(\rR\n" + + "paramCount\x12\x19\n" + + "\bpos_list\x18\x04 \x03(\rR\aposList\".\n" + + "\rPatternDelete\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" + + "\x03Tag\x127\n" + + "\x03key\x18\x01 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x03key\x12;\n" + + "\x05value\x18\x02 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x05value\"\xbe\x01\n" + + "\x03Log\x12\x1c\n" + + "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x12H\n" + + "\n" + + "structured\x18\x02 \x01(\v2&.datadog.intake.stateful.StructuredLogH\x00R\n" + + "structured\x12\x12\n" + + "\x03raw\x18\x03 \x01(\tH\x00R\x03raw\x120\n" + + "\x04tags\x18\x04 \x03(\v2\x1c.datadog.intake.stateful.TagR\x04tagsB\t\n" + + "\acontent\"|\n" + + "\rStructuredLog\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12L\n" + + "\x0edynamic_values\x18\x02 \x03(\v2%.datadog.intake.stateful.DynamicValueR\rdynamicValues\"\x9f\x01\n" + + "\fDynamicValue\x12\x1d\n" + + "\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" + + "\vfloat_value\x18\x02 \x01(\x01H\x00R\n" + + "floatValue\x12#\n" + + "\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" + + "\n" + + "dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" + + "\x05value\"\x95\x03\n" + + "\x05Datum\x12O\n" + + "\x0epattern_define\x18\x01 \x01(\v2&.datadog.intake.stateful.PatternDefineH\x00R\rpatternDefine\x12O\n" + + "\x0epattern_delete\x18\x02 \x01(\v2&.datadog.intake.stateful.PatternDeleteH\x00R\rpatternDelete\x12V\n" + + "\x11dict_entry_define\x18\x03 \x01(\v2(.datadog.intake.stateful.DictEntryDefineH\x00R\x0fdictEntryDefine\x12V\n" + + "\x11dict_entry_delete\x18\x04 \x01(\v2(.datadog.intake.stateful.DictEntryDeleteH\x00R\x0fdictEntryDelete\x122\n" + + "\x04logs\x18\x05 \x01(\v2\x1c.datadog.intake.stateful.LogH\x00R\x04logsB\x06\n" + + "\x04data\"C\n" + + "\rDatumSequence\x122\n" + + "\x04data\x18\x01 \x03(\v2\x1e.datadog.intake.stateful.DatumR\x04data\">\n" + + "\rStatefulBatch\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12\x12\n" + + "\x04data\x18\x02 \x01(\fR\x04data\"\x8c\x01\n" + + "\vBatchStatus\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12C\n" + + "\x06status\x18\x02 \x01(\x0e2+.datadog.intake.stateful.BatchStatus.StatusR\x06status\"\x1d\n" + + "\x06Status\x12\v\n" + + "\aUNKNOWN\x10\x00\x12\x06\n" + + "\x02OK\x10\x012u\n" + + "\x13StatefulLogsService\x12^\n" + + "\n" + + "LogsStream\x12&.datadog.intake.stateful.StatefulBatch\x1a$.datadog.intake.stateful.BatchStatus(\x010\x01B\x1bZ\x19pkg/proto/pbgo/statefulpbb\x06proto3" + +var ( + file_datadog_stateful_stateful_encoding_proto_rawDescOnce sync.Once + file_datadog_stateful_stateful_encoding_proto_rawDescData []byte +) + +func file_datadog_stateful_stateful_encoding_proto_rawDescGZIP() []byte { + file_datadog_stateful_stateful_encoding_proto_rawDescOnce.Do(func() { + file_datadog_stateful_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc))) + }) + return file_datadog_stateful_stateful_encoding_proto_rawDescData +} + +var file_datadog_stateful_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_datadog_stateful_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 12) +var file_datadog_stateful_stateful_encoding_proto_goTypes = []any{ + (BatchStatus_Status)(0), // 0: datadog.intake.stateful.BatchStatus.Status + (*DictEntryDefine)(nil), // 1: datadog.intake.stateful.DictEntryDefine + (*DictEntryDelete)(nil), // 2: datadog.intake.stateful.DictEntryDelete + (*PatternDefine)(nil), // 3: datadog.intake.stateful.PatternDefine + (*PatternDelete)(nil), // 4: datadog.intake.stateful.PatternDelete + (*Tag)(nil), // 5: datadog.intake.stateful.Tag + (*Log)(nil), // 6: datadog.intake.stateful.Log + (*StructuredLog)(nil), // 7: datadog.intake.stateful.StructuredLog + (*DynamicValue)(nil), // 8: datadog.intake.stateful.DynamicValue + (*Datum)(nil), // 9: datadog.intake.stateful.Datum + (*DatumSequence)(nil), // 10: datadog.intake.stateful.DatumSequence + (*StatefulBatch)(nil), // 11: datadog.intake.stateful.StatefulBatch + (*BatchStatus)(nil), // 12: datadog.intake.stateful.BatchStatus +} +var file_datadog_stateful_stateful_encoding_proto_depIdxs = []int32{ + 8, // 0: datadog.intake.stateful.Tag.key:type_name -> datadog.intake.stateful.DynamicValue + 8, // 1: datadog.intake.stateful.Tag.value:type_name -> datadog.intake.stateful.DynamicValue + 7, // 2: datadog.intake.stateful.Log.structured:type_name -> datadog.intake.stateful.StructuredLog + 5, // 3: datadog.intake.stateful.Log.tags:type_name -> datadog.intake.stateful.Tag + 8, // 4: datadog.intake.stateful.StructuredLog.dynamic_values:type_name -> datadog.intake.stateful.DynamicValue + 3, // 5: datadog.intake.stateful.Datum.pattern_define:type_name -> datadog.intake.stateful.PatternDefine + 4, // 6: datadog.intake.stateful.Datum.pattern_delete:type_name -> datadog.intake.stateful.PatternDelete + 1, // 7: datadog.intake.stateful.Datum.dict_entry_define:type_name -> datadog.intake.stateful.DictEntryDefine + 2, // 8: datadog.intake.stateful.Datum.dict_entry_delete:type_name -> datadog.intake.stateful.DictEntryDelete + 6, // 9: datadog.intake.stateful.Datum.logs:type_name -> datadog.intake.stateful.Log + 9, // 10: datadog.intake.stateful.DatumSequence.data:type_name -> datadog.intake.stateful.Datum + 0, // 11: datadog.intake.stateful.BatchStatus.status:type_name -> datadog.intake.stateful.BatchStatus.Status + 11, // 12: datadog.intake.stateful.StatefulLogsService.LogsStream:input_type -> datadog.intake.stateful.StatefulBatch + 12, // 13: datadog.intake.stateful.StatefulLogsService.LogsStream:output_type -> datadog.intake.stateful.BatchStatus + 13, // [13:14] is the sub-list for method output_type + 12, // [12:13] is the sub-list for method input_type + 12, // [12:12] is the sub-list for extension type_name + 12, // [12:12] is the sub-list for extension extendee + 0, // [0:12] is the sub-list for field type_name +} + +func init() { file_datadog_stateful_stateful_encoding_proto_init() } +func file_datadog_stateful_stateful_encoding_proto_init() { + if File_datadog_stateful_stateful_encoding_proto != nil { + return + } + file_datadog_stateful_stateful_encoding_proto_msgTypes[5].OneofWrappers = []any{ + (*Log_Structured)(nil), + (*Log_Raw)(nil), + } + file_datadog_stateful_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ + (*DynamicValue_IntValue)(nil), + (*DynamicValue_FloatValue)(nil), + (*DynamicValue_StringValue)(nil), + (*DynamicValue_DictIndex)(nil), + } + file_datadog_stateful_stateful_encoding_proto_msgTypes[8].OneofWrappers = []any{ + (*Datum_PatternDefine)(nil), + (*Datum_PatternDelete)(nil), + (*Datum_DictEntryDefine)(nil), + (*Datum_DictEntryDelete)(nil), + (*Datum_Logs)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc)), + NumEnums: 1, + NumMessages: 12, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_datadog_stateful_stateful_encoding_proto_goTypes, + DependencyIndexes: file_datadog_stateful_stateful_encoding_proto_depIdxs, + EnumInfos: file_datadog_stateful_stateful_encoding_proto_enumTypes, + MessageInfos: file_datadog_stateful_stateful_encoding_proto_msgTypes, + }.Build() + File_datadog_stateful_stateful_encoding_proto = out.File + file_datadog_stateful_stateful_encoding_proto_goTypes = nil + file_datadog_stateful_stateful_encoding_proto_depIdxs = nil +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConnInterface + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion6 + +// StatefulLogsServiceClient is the client API for StatefulLogsService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type StatefulLogsServiceClient interface { + LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) +} + +type statefulLogsServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { + return &statefulLogsServiceClient{cc} +} + +func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &_StatefulLogsService_serviceDesc.Streams[0], "/datadog.intake.stateful.StatefulLogsService/LogsStream", opts...) + if err != nil { + return nil, err + } + x := &statefulLogsServiceLogsStreamClient{stream} + return x, nil +} + +type StatefulLogsService_LogsStreamClient interface { + Send(*StatefulBatch) error + Recv() (*BatchStatus, error) + grpc.ClientStream +} + +type statefulLogsServiceLogsStreamClient struct { + grpc.ClientStream +} + +func (x *statefulLogsServiceLogsStreamClient) Send(m *StatefulBatch) error { + return x.ClientStream.SendMsg(m) +} + +func (x *statefulLogsServiceLogsStreamClient) Recv() (*BatchStatus, error) { + m := new(BatchStatus) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// StatefulLogsServiceServer is the server API for StatefulLogsService service. +type StatefulLogsServiceServer interface { + LogsStream(StatefulLogsService_LogsStreamServer) error +} + +// UnimplementedStatefulLogsServiceServer can be embedded to have forward compatible implementations. +type UnimplementedStatefulLogsServiceServer struct { +} + +func (*UnimplementedStatefulLogsServiceServer) LogsStream(StatefulLogsService_LogsStreamServer) error { + return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") +} + +func RegisterStatefulLogsServiceServer(s *grpc.Server, srv StatefulLogsServiceServer) { + s.RegisterService(&_StatefulLogsService_serviceDesc, srv) +} + +func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(StatefulLogsServiceServer).LogsStream(&statefulLogsServiceLogsStreamServer{stream}) +} + +type StatefulLogsService_LogsStreamServer interface { + Send(*BatchStatus) error + Recv() (*StatefulBatch, error) + grpc.ServerStream +} + +type statefulLogsServiceLogsStreamServer struct { + grpc.ServerStream +} + +func (x *statefulLogsServiceLogsStreamServer) Send(m *BatchStatus) error { + return x.ServerStream.SendMsg(m) +} + +func (x *statefulLogsServiceLogsStreamServer) Recv() (*StatefulBatch, error) { + m := new(StatefulBatch) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +var _StatefulLogsService_serviceDesc = grpc.ServiceDesc{ + ServiceName: "datadog.intake.stateful.StatefulLogsService", + HandlerType: (*StatefulLogsServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "LogsStream", + Handler: _StatefulLogsService_LogsStream_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "datadog/stateful/stateful_encoding.proto", +} diff --git a/tasks/protobuf.py b/tasks/protobuf.py index 78ef3e412f06..cf1e7a3fa320 100644 --- a/tasks/protobuf.py +++ b/tasks/protobuf.py @@ -21,6 +21,7 @@ 'remoteagent': False, 'autodiscovery': False, 'trace/idx': False, + 'stateful': False, } CLI_EXTRAS = {