From e98a194ee1cba0399880fa8aa576fdd75a2c1058 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Mon, 20 Oct 2025 16:18:36 -0400 Subject: [PATCH 01/16] WIP, cherrypick + working on merging logic + functional e2e --- clustering_architecture.md | 279 +++++ comp/logs/agent/config/config.go | 17 +- comp/logs/agent/config/config_keys.go | 14 + comp/logs/agent/config/config_test.go | 6 +- comp/logs/agent/config/endpoints.go | 21 +- comp/logs/agent/config/endpoints_test.go | 32 + pkg/config/setup/config.go | 4 + pkg/logs/message/message.go | 2 + pkg/logs/patterns/automaton/rules.go | 568 +++++++++ pkg/logs/patterns/automaton/tokenizer.go | 465 ++++++++ pkg/logs/patterns/clustering/cluster.go | 292 +++++ .../patterns/clustering/merging/merging.go | 225 ++++ .../clustering/merging/merging_test.go | 345 ++++++ pkg/logs/patterns/comprehensive_demo.go | 146 +++ pkg/logs/patterns/merging.md | 626 ++++++++++ pkg/logs/patterns/token/signature.go | 87 ++ pkg/logs/patterns/token/signature_test.go | 229 ++++ pkg/logs/patterns/token/token.go | 248 ++++ pkg/logs/patterns/token/tokenlist.go | 69 ++ pkg/logs/patterns/token/tokenlist_test.go | 129 +++ pkg/logs/pipeline/pipeline.go | 29 +- pkg/logs/pipeline/provider.go | 5 +- pkg/logs/sender/dumb_strategy.go | 274 +++++ pkg/logs/sender/grpc/grpc_sender.go | 286 +++++ pkg/logs/sender/grpc/grpc_sender_test.go | 642 +++++++++++ pkg/logs/sender/grpc/stateful_encoding.pb.go | 1014 +++++++++++++++++ pkg/logs/sender/grpc/stateful_encoding.proto | 109 ++ .../sender/grpc/stateful_encoding_grpc.pb.go | 115 ++ pkg/logs/sender/grpc/stream_worker.go | 592 ++++++++++ 29 files changed, 6849 insertions(+), 21 deletions(-) create mode 100644 clustering_architecture.md create mode 100644 pkg/logs/patterns/automaton/rules.go create mode 100644 pkg/logs/patterns/automaton/tokenizer.go create mode 100644 pkg/logs/patterns/clustering/cluster.go create mode 100644 pkg/logs/patterns/clustering/merging/merging.go create mode 100644 pkg/logs/patterns/clustering/merging/merging_test.go create mode 100644 pkg/logs/patterns/comprehensive_demo.go create mode 100644 pkg/logs/patterns/merging.md create mode 100644 pkg/logs/patterns/token/signature.go create mode 100644 pkg/logs/patterns/token/signature_test.go create mode 100644 pkg/logs/patterns/token/token.go create mode 100644 pkg/logs/patterns/token/tokenlist.go create mode 100644 pkg/logs/patterns/token/tokenlist_test.go create mode 100644 pkg/logs/sender/dumb_strategy.go create mode 100644 pkg/logs/sender/grpc/grpc_sender.go create mode 100644 pkg/logs/sender/grpc/grpc_sender_test.go create mode 100644 pkg/logs/sender/grpc/stateful_encoding.pb.go create mode 100644 pkg/logs/sender/grpc/stateful_encoding.proto create mode 100644 pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go create mode 100644 pkg/logs/sender/grpc/stream_worker.go diff --git a/clustering_architecture.md b/clustering_architecture.md new file mode 100644 index 000000000000..032312911a69 --- /dev/null +++ b/clustering_architecture.md @@ -0,0 +1,279 @@ +# Log Pattern Clustering Architecture + +## Main Data Flow Pipeline + +```mermaid +flowchart TD + A[Raw Log Messages
GET /api/users 200
POST /api/users 201] --> B[Tokenization] + + B --> C["Token Classification
automaton.TokenizeString()"] + C --> D["Token List Creation
token.NewTokenList()"] + + D --> E["Signature Generation
.Signature()"] + E --> F["Hash Computation
computeHash()"] + + F --> G["Cluster Manager
clustering.Add()"] + G --> H{Hash Bucket
Lookup} + + H -->|Existing Cluster| I["Add to Cluster
cluster.Add()"] + H -->|New Signature| J["Create New Cluster
NewCluster()"] + + I --> K["Pattern Generation
cluster.GeneratePattern()"] + J --> K + + K --> L[Wildcard Patterns
* /api/users *
ERROR * failed] + + style A fill:#e1f5fe + style L fill:#c8e6c9 + style G fill:#fff3e0 +``` + +## Core Function Call Graph + +```mermaid +graph TD + A[automaton.TokenizeString] --> B[NewTokenizer] + A --> C[Tokenizer.Tokenize] + + C --> D[processNextToken] + C --> E[consumeWhitespace] + C --> F[extractWord] + C --> G[classifyToken] + + G --> H[globalTrie.Match] + G --> I[GetTerminalRules] + + A --> J[token.NewTokenList] + J --> K[TokenList.Signature] + + K --> L[PositionSignature] + K --> M[CountSignature] + K --> N[computeHash] + + O[clustering.NewClusterManager] --> P[ClusterManager.Add] + P --> Q[hashBuckets lookup] + P --> R[cluster.Signature.Equals] + P --> S[NewCluster] + + S --> T[Cluster.Add] + T --> U[Cluster.GeneratePattern] + + style A fill:#ffecb3 + style P fill:#f3e5f5 + style U fill:#e8f5e8 +``` + +## Hash Bucket Architecture + +```mermaid +graph TB + A[ClusterManager] --> B["hashBuckets: map[uint64][]*Cluster"] + + B --> C["Hash: 12345"] + B --> D["Hash: 67890"] + + C --> E["Cluster1
HTTP Requests"] + C --> F["Cluster2
Hash Collision"] + + E --> G["TokenLists:
GET /api 200
POST /api 201
PUT /api 200"] + E --> H["Pattern: * /api *
Wildcards: positions 0, 4"] + + D --> I["Cluster3
Error Messages"] + I --> J["TokenLists:
ERROR DB failed
ERROR Auth failed"] + I --> K["Pattern: ERROR * failed
Wildcards: position 2"] + + style A fill:#f9f,stroke:#333,stroke-width:2px + style E fill:#bbf,stroke:#333,stroke-width:2px + style I fill:#fbb,stroke:#333,stroke-width:2px +``` + +## Memory Layout and Data Structure + +```mermaid +classDiagram + class ClusterManager { + +map~uint64~[]Cluster hashBuckets + +int totalTokenLists + +int totalClusters + +Add(tokenList) Cluster + +GetCluster(signature) Cluster + } + + class Cluster { + +Signature signature + +[]TokenList tokenLists + +TokenList pattern + +map~int~bool wildcardMap + +Add(tokenList) bool + +GeneratePattern() TokenList + } + + class TokenList { + +[]Token tokens + +Signature() Signature + +PositionSignature() string + +CountSignature() string + } + + class Token { + +string Value + +TokenType Type + +bool IsWildcard + } + + ClusterManager --> Cluster : contains + Cluster --> TokenList : groups + TokenList --> Token : contains +``` + +## Performance Characteristics + +### Algorithm Complexity by Operation + +```mermaid +graph LR + subgraph "Tokenization Pipeline" + A["Raw Log
O(n) time
O(k) space"] --> B["Token Classification
O(1) per token
Trie + Rules"] + B --> C["TokenList
O(k) creation
O(k) memory"] + end + + subgraph "Clustering Pipeline" + C --> D["Signature Generation
O(k) time
O(1) space"] + D --> E["Hash Lookup
O(1) avg
O(m) worst"] + E --> F["Cluster Assignment
O(1) insertion
O(1) space"] + end + + subgraph "Pattern Pipeline" + F --> G["Pattern Generation
O(k Ɨ c) time
O(k) space"] + G --> H["Wildcard Detection
O(k Ɨ c) comparison
Lazy evaluation"] + end + + style A fill:#ffecb3 + style E fill:#f3e5f5 + style G fill:#e8f5e8 +``` + +### Performance Analysis + +```mermaid +graph TB + subgraph "Performance Characteristics" + A["šŸš€ Tokenization
O(n) always
Single-pass processing"] + B["šŸ“Š Signature
O(k) linear
Cached result"] + C["šŸ” Hash Lookup
O(1) avg, O(m) worst
Rare collisions"] + D["šŸŽÆ Clustering
O(1) typical
Hit existing clusters"] + E["šŸŽØ Pattern Gen
O(k) single, O(kƗc) multiple
Lazy evaluation"] + end + + A --> B + B --> C + C --> D + D --> E + + style A fill:#e3f2fd + style B fill:#f3e5f5 + style C fill:#fff3e0 + style D fill:#e8f5e8 + style E fill:#fce4ec +``` + +### Test Results from Codebase + +From the actual test suite (`TestClusteringPerformance`): +- **Input**: 400 similar log messages +- **Output**: 3 clusters created +- **Demonstrates**: Effective pattern consolidation for similar structured logs + +### Algorithm Variables + +```mermaid +graph LR + subgraph "Input Variables" + A["n: String Length
Character count
Linear tokenization cost"] + B["k: Tokens per Message
After tokenization
Affects signature generation"] + end + + subgraph "System Variables" + C["m: Clusters per Bucket
Hash collisions
Usually 1 cluster"] + D["c: Messages per Cluster
Pattern generation cost
Compression vs speed trade-off"] + end + + style A fill:#e3f2fd + style B fill:#e3f2fd + style C fill:#fff3e0 + style D fill:#fff3e0 +``` + +### Key Optimizations + +```mermaid +graph TB + subgraph "Memory Optimizations" + A["String Interning
Common tokens cached
GET, POST, ERROR reused"] + B["Lazy Evaluation
Patterns generated on-demand
Reduces memory footprint"] + end + + subgraph "CPU Optimizations" + C["Hash Pre-computation
Signatures include cached hash
Avoids repeated calculations"] + D["Trie Lookup
O(1) for HTTP methods
O(1) for severity levels"] + end + + subgraph "Reliability Features" + E["Collision Handling
Graceful hash collision recovery
Exact signature fallback"] + F["Input Validation
UTF-8 safety checks
Defensive programming"] + end + + style A fill:#e8f5e8 + style B fill:#e8f5e8 + style C fill:#fff3e0 + style D fill:#fff3e0 + style E fill:#fce4ec + style F fill:#fce4ec +``` + +## Production Data Flow Example + +```mermaid +sequenceDiagram + participant L as Log Message + participant T as Tokenizer + participant TL as TokenList + participant CM as ClusterManager + participant C as Cluster + + L->>T: "GET /api/users 200" + T->>T: TokenizeString() + T->>TL: [HttpMethod(GET), Whitespace( ), AbsolutePath(/api/users), ...] + TL->>TL: Generate Signature() + TL->>TL: "HttpMethod,Whitespace,AbsolutePath,Whitespace,HttpStatus" + TL->>TL: Hash: 0x1a2b3c4d + + TL->>CM: ClusterManager.Add(tokenList) + CM->>CM: hashBuckets[0x1a2b3c4d] lookup + CM->>C: Found existing cluster + C->>C: cluster.Add(tokenList) + C->>C: GeneratePattern() + C-->>CM: Pattern: "* /api/users *" + CM-->>L: Clustered successfully + + Note over C: Wildcards at positions [0, 4]
for HTTP method and status code +``` + +## Key Production Functions + +### Core Pipeline +- `automaton.TokenizeString()` - Entry point +- `ClusterManager.Add()` - Main clustering logic +- `Cluster.GeneratePattern()` - Pattern extraction +- `TokenList.Signature()` - Clustering key generation + +### Support Functions +- `NewClusterManager()` - Initialization +- `NewCluster()` - Cluster creation +- `Cluster.Add()` - Add TokenList to existing cluster +- `ClusterManager.GetCluster()` - Retrieve by signature + +### Infrastructure +- `globalTrie.Match()` - Fast token classification +- `Signature.Equals()` - Hash collision resolution +- `computeHash()` - Signature hashing for buckets \ No newline at end of file diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go index 44d9c98a4303..d34b6593c89c 100644 --- a/comp/logs/agent/config/config.go +++ b/comp/logs/agent/config/config.go @@ -124,7 +124,8 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs if logsDDURL, defined := logsConfig.logsDDURL(); defined { haveHTTPProxy = strings.HasPrefix(logsDDURL, "http://") || strings.HasPrefix(logsDDURL, "https://") } - if logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { + + if logsConfig.isGRPCUse() || logsConfig.isForceHTTPUse() || haveHTTPProxy || mrfEnabled || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { return BuildHTTPEndpointsWithConfig(coreConfig, logsConfig, endpointPrefix, intakeTrackType, intakeProtocol, intakeOrigin) } log.Warnf("You are currently sending Logs to Datadog through TCP (either because %s or %s is set or the HTTP connectivity test has failed) "+ @@ -373,7 +374,14 @@ func buildHTTPEndpoints(coreConfig pkgconfigmodel.Reader, logsConfig *LogsConfig batchMaxContentSize := logsConfig.batchMaxContentSize() inputChanSize := logsConfig.inputChanSize() - return NewEndpointsWithBatchSettings(main, additionals, false, true, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil + // Detect if gRPC transport is requested + useGRPC := logsConfig.isGRPCUse() + useProto := logsConfig.devModeUseProto() // Enable proto/pattern extraction mode + if useGRPC { + return NewEndpointsWithBatchSettings(main, additionals, useProto, false, true, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil + } else { + return NewEndpointsWithBatchSettings(main, additionals, false, true, false, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil + } } type defaultParseAddressFunc func(string) (host string, port int, err error) @@ -447,6 +455,11 @@ func TaggerWarmupDuration(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).taggerWarmupDuration() } +// StreamLifetime returns the duration for gRPC stream lifetime before rotation. +func StreamLifetime(coreConfig pkgconfigmodel.Reader) time.Duration { + return defaultLogsConfigKeys(coreConfig).streamLifetime() +} + // AggregationTimeout is used when performing aggregation operations func AggregationTimeout(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).aggregationTimeout() diff --git a/comp/logs/agent/config/config_keys.go b/comp/logs/agent/config/config_keys.go index bf6f9313c28c..110243d41068 100644 --- a/comp/logs/agent/config/config_keys.go +++ b/comp/logs/agent/config/config_keys.go @@ -101,6 +101,10 @@ func (l *LogsConfigKeys) isForceHTTPUse() bool { l.getConfig().GetBool(l.getConfigKey("force_use_http")) } +func (l *LogsConfigKeys) isGRPCUse() bool { + return l.getConfig().GetBool(l.getConfigKey("use_grpc")) +} + func (l *LogsConfigKeys) logsNoSSL() bool { return l.getConfig().GetBool(l.getConfigKey("logs_no_ssl")) } @@ -292,6 +296,16 @@ func (l *LogsConfigKeys) senderRecoveryReset() bool { return l.getConfig().GetBool(l.getConfigKey("sender_recovery_reset")) } +func (l *LogsConfigKeys) streamLifetime() time.Duration { + key := l.getConfigKey("stream_lifetime") + streamLifetime := l.getConfig().GetInt(key) + if streamLifetime <= 0 { + log.Warnf("Invalid %s: %v should be > 0, fallback on %v", key, streamLifetime, pkgconfigsetup.DefaultLogsStreamLifetime) + return time.Duration(pkgconfigsetup.DefaultLogsStreamLifetime) * time.Second + } + return time.Duration(streamLifetime) * time.Second +} + // AggregationTimeout is used when performing aggregation operations func (l *LogsConfigKeys) aggregationTimeout() time.Duration { return l.getConfig().GetDuration(l.getConfigKey("aggregation_timeout")) * time.Millisecond diff --git a/comp/logs/agent/config/config_test.go b/comp/logs/agent/config/config_test.go index ee9049f48d9a..1de098e78668 100644 --- a/comp/logs/agent/config/config_test.go +++ b/comp/logs/agent/config/config_test.go @@ -287,7 +287,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsEnvVar() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -414,7 +414,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -504,7 +504,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig2() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) diff --git a/comp/logs/agent/config/endpoints.go b/comp/logs/agent/config/endpoints.go index 6771f20d6d25..238c222c38ff 100644 --- a/comp/logs/agent/config/endpoints.go +++ b/comp/logs/agent/config/endpoints.go @@ -343,6 +343,7 @@ type Endpoints struct { Endpoints []Endpoint UseProto bool UseHTTP bool + UseGRPC bool BatchWait time.Duration BatchMaxConcurrentSend int BatchMaxSize int @@ -369,6 +370,23 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, additionalEndpoints, useProto, useHTTP, + false, // useGRPC defaults to false for backward compatibility + pkgconfigsetup.DefaultBatchWait, + pkgconfigsetup.DefaultBatchMaxConcurrentSend, + pkgconfigsetup.DefaultBatchMaxSize, + pkgconfigsetup.DefaultBatchMaxContentSize, + pkgconfigsetup.DefaultInputChanSize, + ) +} + +// NewEndpointsWithGRPC returns a new endpoints composite with gRPC support +func NewEndpointsWithGRPC(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool) *Endpoints { + return NewEndpointsWithBatchSettings( + main, + additionalEndpoints, + useProto, + useHTTP, + useGRPC, pkgconfigsetup.DefaultBatchWait, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, @@ -378,12 +396,13 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, } // NewEndpointsWithBatchSettings returns a new endpoints composite with non-default batching settings specified -func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { +func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { return &Endpoints{ Main: main, Endpoints: append([]Endpoint{main}, additionalEndpoints...), UseProto: useProto, UseHTTP: useHTTP, + UseGRPC: useGRPC, BatchWait: batchWait, BatchMaxConcurrentSend: batchMaxConcurrentSend, BatchMaxSize: batchMaxSize, diff --git a/comp/logs/agent/config/endpoints_test.go b/comp/logs/agent/config/endpoints_test.go index 831e7b52113c..cfdba0590321 100644 --- a/comp/logs/agent/config/endpoints_test.go +++ b/comp/logs/agent/config/endpoints_test.go @@ -135,6 +135,24 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPCon suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) } +func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidGRPCConfig() { + var endpoints *Endpoints + var endpoint Endpoint + var err error + + suite.config.SetWithoutSource("logs_config.use_grpc", true) + + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + suite.Equal(endpoints.BatchWait, 5*time.Second) + + endpoint = endpoints.Main + suite.True(endpoint.UseSSL()) + suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) +} + func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPConfigAndCompression() { var endpoints *Endpoints var endpoint Endpoint @@ -259,6 +277,7 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.force_use_tcp", "false") suite.config.SetWithoutSource("logs_config.use_http", "false") suite.config.SetWithoutSource("logs_config.force_use_http", "false") + suite.config.SetWithoutSource("logs_config.use_grpc", "false") suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{}) } @@ -329,6 +348,19 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") }) + suite.Run("When use_grpc is true always create gRPC endpoints", func() { + defer resetHTTPConfigValuesToFalse() + suite.config.SetWithoutSource("logs_config.use_grpc", "true") + endpoints, err := BuildEndpoints(suite.config, HTTPConnectivitySuccess, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + }) + suite.Run("When additional_endpoints is not empty always create TCP endpoints", func() { defer resetHTTPConfigValuesToFalse() suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{ diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index 35961c1c4d45..136da127be96 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -121,6 +121,9 @@ const ( // DefaultLogsSenderBackoffRecoveryInterval is the default logs sender backoff recovery interval DefaultLogsSenderBackoffRecoveryInterval = 2 + // DefaultLogsStreamLifetime is the default gRPC stream lifetime in seconds (15 minutes) + DefaultLogsStreamLifetime = 900 + // maxExternalMetricsProviderChunkSize ensures batch queries are limited in size. maxExternalMetricsProviderChunkSize = 35 @@ -2675,6 +2678,7 @@ func bindEnvAndSetLogsConfigKeys(config pkgconfigmodel.Setup, prefix string) { config.BindEnvAndSetDefault(prefix+"sender_backoff_max", DefaultLogsSenderBackoffMax) config.BindEnvAndSetDefault(prefix+"sender_recovery_interval", DefaultForwarderRecoveryInterval) config.BindEnvAndSetDefault(prefix+"sender_recovery_reset", false) + config.BindEnvAndSetDefault(prefix+"stream_lifetime", DefaultLogsStreamLifetime) config.BindEnvAndSetDefault(prefix+"use_v2_api", true) config.SetKnown(prefix + "dev_mode_no_ssl") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' } diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 4f91ef10ffc4..5cb9f9050ed8 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -38,6 +38,8 @@ type Payload struct { Encoding string // The size of the unencoded payload UnencodedSize int + // Indicates if this payload is a snapshot for stream rotation + IsSnapshot bool } // NewPayload creates a new payload with the given message metadata, encoded content, encoding type and unencoded size diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go new file mode 100644 index 000000000000..de799c13e3c1 --- /dev/null +++ b/pkg/logs/patterns/automaton/rules.go @@ -0,0 +1,568 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides terminal rules for token classification. +package automaton + +import ( + "fmt" + "regexp" + "sort" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Priority constants for rule evaluation order +const ( + PriorityHigh = 3 // Very specific patterns like IPv4, IPv6, Email + PriorityMedium = 2 // Structured patterns like URI, Dates, HTTPStatus + PriorityLow = 1 // Generic fallback patterns like Numeric +) + +// TerminalRule represents a classification rule +type TerminalRule struct { + Name string + Pattern *regexp.Regexp + TokenType token.TokenType + Priority int // Use PriorityHigh/Medium/Low constants - higher values evaluated first + Category string + Description string + Examples []string +} + +// RuleCategory represents a grouping of rules +type RuleCategory struct { + Name string + Description string + Rules []*TerminalRule +} + +// RuleManager manages terminal rules +type RuleManager struct { + rules []*TerminalRule + categories map[string]*RuleCategory +} + +// NewRuleManager creates a new rule manager +func NewRuleManager() *RuleManager { + return &RuleManager{ + rules: make([]*TerminalRule, 0), + categories: make(map[string]*RuleCategory), + } +} + +// AddRule adds a new terminal rule +func (rm *RuleManager) AddRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error { + regex, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex pattern '%s': %v", pattern, err) + } + + rule := &TerminalRule{ + Name: name, + Pattern: regex, + TokenType: tokenType, + Priority: priority, + Category: category, + Description: description, + Examples: examples, + } + + for _, example := range examples { + if !regex.MatchString(example) { + return fmt.Errorf("example '%s' does not match pattern '%s'", example, pattern) + } + } + + rm.insertRuleByPriority(rule) + rm.addToCategory(rule) + + return nil +} + +// RemoveRule removes a rule by name +func (rm *RuleManager) RemoveRule(name string) bool { + for i, rule := range rm.rules { + if rule.Name == name { + // Remove from rules list + rm.rules = append(rm.rules[:i], rm.rules[i+1:]...) + + // Remove from category + rm.removeFromCategory(rule) + return true + } + } + return false +} + +// ApplyRules applies terminal rules in priority order to classify a token +func (rm *RuleManager) ApplyRules(value string) token.TokenType { + for _, rule := range rm.rules { + if rule.Pattern.MatchString(value) { + return rule.TokenType + } + } + return token.TokenWord +} + +// LoadPredefinedRules loads predefined rules +func (rm *RuleManager) LoadPredefinedRules() error { + predefined := GetPredefinedRules() + + for _, rule := range predefined { + err := rm.AddRule( + rule.Name, + rule.Pattern.String(), + rule.Category, + rule.Description, + rule.TokenType, + rule.Priority, + rule.Examples, + ) + if err != nil { + return fmt.Errorf("failed to load rule '%s': %v", rule.Name, err) + } + } + + return nil +} + +// Helper methods + +func (rm *RuleManager) insertRuleByPriority(rule *TerminalRule) { + // Insert in priority order (higher priority first) + inserted := false + for i, existing := range rm.rules { + if rule.Priority > existing.Priority { + // Insert at position i + rm.rules = append(rm.rules[:i], append([]*TerminalRule{rule}, rm.rules[i:]...)...) + inserted = true + break + } + } + + if !inserted { + rm.rules = append(rm.rules, rule) + } +} + +func (rm *RuleManager) addToCategory(rule *TerminalRule) { + if rm.categories[rule.Category] == nil { + rm.categories[rule.Category] = &RuleCategory{ + Name: rule.Category, + Description: fmt.Sprintf("Rules for %s tokens", rule.Category), + Rules: make([]*TerminalRule, 0), + } + } + + rm.categories[rule.Category].Rules = append(rm.categories[rule.Category].Rules, rule) +} + +func (rm *RuleManager) removeFromCategory(rule *TerminalRule) { + if category, exists := rm.categories[rule.Category]; exists { + for i, r := range category.Rules { + if r.Name == rule.Name { + category.Rules = append(category.Rules[:i], category.Rules[i+1:]...) + break + } + } + + // Remove category if empty + if len(category.Rules) == 0 { + delete(rm.categories, rule.Category) + } + } +} + +// GetRule retrieves a rule by name +func (rm *RuleManager) GetRule(name string) *TerminalRule { + for _, rule := range rm.rules { + if rule.Name == name { + return rule + } + } + return nil +} + +// ListRules returns all rules sorted by priority +func (rm *RuleManager) ListRules() []*TerminalRule { + // Return a copy to prevent external modification + result := make([]*TerminalRule, len(rm.rules)) + copy(result, rm.rules) + return result +} + +// GetRulesByCategory returns rules in a specific category +func (rm *RuleManager) GetRulesByCategory(category string) []*TerminalRule { + if cat, exists := rm.categories[category]; exists { + result := make([]*TerminalRule, len(cat.Rules)) + copy(result, cat.Rules) + return result + } + return []*TerminalRule{} +} + +// GetCategories returns all rule categories +func (rm *RuleManager) GetCategories() []string { + categories := make([]string, 0, len(rm.categories)) + for name := range rm.categories { + categories = append(categories, name) + } + sort.Strings(categories) + return categories +} + +// ValidateRule checks if a rule would work correctly +func (rm *RuleManager) ValidateRule(name, pattern string, examples []string) error { + regex, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex: %v", err) + } + + // Check for conflicts with existing rules + for _, existing := range rm.rules { + if existing.Name == name { + return fmt.Errorf("rule '%s' already exists", name) + } + } + + // Validate examples + for _, example := range examples { + if !regex.MatchString(example) { + return fmt.Errorf("example '%s' does not match pattern", example) + } + } + + return nil +} + +// GetRuleStats returns statistics about the rule system +func (rm *RuleManager) GetRuleStats() RuleStats { + stats := RuleStats{ + TotalRules: len(rm.rules), + Categories: len(rm.categories), + ByCategory: make(map[string]int), + ByTokenType: make(map[token.TokenType]int), + } + + for _, rule := range rm.rules { + stats.ByCategory[rule.Category]++ + stats.ByTokenType[rule.TokenType]++ + } + + return stats +} + +// RuleStats contains statistics about the rule system +type RuleStats struct { + TotalRules int + Categories int + ByCategory map[string]int + ByTokenType map[token.TokenType]int +} + +// GetPredefinedRules returns the standard set of terminal rules +func GetPredefinedRules() []*TerminalRule { + rules := []*TerminalRule{ + + // ============================================================================= + // DATE & TIME PATTERNS (Priority: High to Medium) + // Based on multiline aggregation patterns for comprehensive coverage + // ============================================================================= + + // High Priority - Modern Standards with Timezone Support + { + Name: "RFC3339DateTime", + Pattern: regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(Z|[\+\-]\d{2}:?\d{2})?`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Matches RFC3339 datetime format with timezone", + Examples: []string{"2024-01-15T10:30:45Z", "2024-01-15T10:30:45.123Z", "2024-01-15T10:30:45+02:00"}, + }, + { + Name: "RFC3339NanoDateTime", + Pattern: regexp.MustCompile(`^(\d+)-(\d+)-(\d+)([A-Za-z_]+)(\d+):(\d+):(\d+)\.(\d+)([A-Za-z_]+)(\d+):(\d+)`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Matches RFC3339 datetime format with nanosecond precision", + Examples: []string{"2006-01-02T15:04:05.999999999Z07:00", "2024-12-25T14:30:00.123456789+02:00"}, + }, + { + Name: "StandardTimestamp", + Pattern: regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(,\d+)?`), + TokenType: token.TokenDate, + Priority: PriorityHigh, + Category: "time", + Description: "Matches standard timestamp format with optional milliseconds", + Examples: []string{"2024-01-15 10:30:45", "2024-01-15 10:30:45,123"}, + }, + + // Medium Priority - Legacy RFC Standards + { + Name: "RFC1123DateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC1123 datetime format", + Examples: []string{"Mon, 02 Jan 2006 15:04:05 MST", "Wed, 25 Dec 2024 14:30:00 UTC"}, + }, + { + Name: "RFC1123ZDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) (-\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC1123Z datetime format with numeric timezone", + Examples: []string{"Mon, 02 Jan 2006 15:04:05 -0700", "Wed, 25 Dec 2024 14:30:00 +0200"}, + }, + { + Name: "RFC850DateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+), (\d+)-([A-Za-z_]+)-(\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC850 datetime format", + Examples: []string{"Monday, 02-Jan-06 15:04:05 MST", "Wednesday, 25-Dec-24 14:30:00 UTC"}, + }, + { + Name: "RFC822DateTime", + Pattern: regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) ([A-Za-z_]+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC822 datetime format", + Examples: []string{"02 Jan 06 15:04 MST", "25 Dec 24 14:30 UTC"}, + }, + { + Name: "RFC822ZDateTime", + Pattern: regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) (-\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches RFC822Z datetime format with numeric timezone", + Examples: []string{"02 Jan 06 15:04 -0700", "25 Dec 24 14:30 +0200"}, + }, + + // Medium Priority - Unix/System Formats + { + Name: "ANSICDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+) (\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches ANSIC datetime format", + Examples: []string{"Mon Jan 2 15:04:05 2006", "Wed Dec 25 14:30:00 2024"}, + }, + { + Name: "UnixDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+)( [A-Za-z_]+ (\d+))?`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Unix datetime format with optional timezone", + Examples: []string{"Mon Jan 2 15:04:05 2006", "Mon Jan 2 15:04:05 MST 2006"}, + }, + { + Name: "RubyDateTime", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([\-\+]\d+) (\d+)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Ruby datetime format with timezone offset", + Examples: []string{"Mon Jan 02 15:04:05 -0700 2006", "Wed Dec 25 14:30:00 +0200 2024"}, + }, + + // Medium Priority - Application-Specific Formats + { + Name: "JavaSimpleFormatter", + Pattern: regexp.MustCompile(`^([A-Za-z_]+) (\d+), (\d{4}) (\d+):(\d+):(\d+) (AM|PM)`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches Java SimpleFormatter date format", + Examples: []string{"January 15, 2024 2:30:45 PM", "December 31, 2023 11:59:59 AM"}, + }, + { + Name: "SlashDateTime", + Pattern: regexp.MustCompile(`^(\d{4})/(\d{2})/(\d{2}) (\d{2}):(\d{2}):(\d{2})`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches slash-separated datetime format", + Examples: []string{"2024/01/15 10:30:45", "2024/12/31 23:59:59"}, + }, + { + Name: "SimpleDate", + Pattern: regexp.MustCompile(`^(\d{4})-(1[012]|0?[1-9])-([12][0-9]|3[01]|0?[1-9])$`), + TokenType: token.TokenDate, + Priority: PriorityMedium, + Category: "time", + Description: "Matches YYYY-MM-DD date format with validation", + Examples: []string{"2024-01-15", "2024-12-31", "2024-02-29"}, + }, + + // ============================================================================= + // NETWORK PATTERNS (Priority: High) + // ============================================================================= + + { + Name: "IPv4Address", + Pattern: regexp.MustCompile(`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`), + TokenType: token.TokenIPv4, + Priority: PriorityHigh, + Category: "network", + Description: "Matches IPv4 addresses in dotted decimal notation", + Examples: []string{"192.168.1.1", "10.0.0.1", "255.255.255.255", "0.0.0.0"}, + }, + { + Name: "IPv6Address", + Pattern: regexp.MustCompile(`^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$`), + TokenType: token.TokenIPv6, + Priority: PriorityHigh, + Category: "network", + Description: "Matches basic IPv6 addresses", + Examples: []string{"2001:0db8:85a3:0000:0000:8a2e:0370:7334", "fe80:0000:0000:0000:0000:0000:0000:0001"}, + }, + { + Name: "EmailAddress", + Pattern: regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`), + TokenType: token.TokenEmail, + Priority: PriorityHigh, + Category: "network", + Description: "Matches email addresses", + Examples: []string{"user@example.com", "test.email+tag@domain.org", "admin@company.co.uk"}, + }, + { + Name: "URI", + Pattern: regexp.MustCompile(`^https?://[^\s]+$`), + TokenType: token.TokenURI, + Priority: PriorityMedium, + Category: "network", + Description: "Matches HTTP and HTTPS URIs", + Examples: []string{"http://example.com", "https://api.domain.com/v1/users", "https://cdn.example.org/assets/style.css"}, + }, + + // ============================================================================= + // HTTP PATTERNS (Priority: Medium) + // ============================================================================= + + { + Name: "HTTPStatus", + Pattern: regexp.MustCompile(`^[1-5][0-9][0-9]$`), + TokenType: token.TokenHttpStatus, + Priority: PriorityMedium, + Category: "http", + Description: "Matches HTTP status codes", + Examples: []string{"200", "404", "500", "301", "403"}, + }, + + // ============================================================================= + // FILESYSTEM PATTERNS (Priority: Medium) + // ============================================================================= + + { + Name: "AbsolutePath", + Pattern: regexp.MustCompile(`^/[^\s]+$`), + TokenType: token.TokenAbsolutePath, + Priority: PriorityMedium, + Category: "filesystem", + Description: "Matches absolute file/URL paths", + Examples: []string{"/api/users", "/var/log/app.log", "/home/user/documents"}, + }, + + // ============================================================================= + // NUMERIC PATTERNS (Priority: Low - Fallback) + // ============================================================================= + + { + Name: "Numeric", + Pattern: regexp.MustCompile(`^\d+$`), + TokenType: token.TokenNumeric, + Priority: PriorityLow, + Category: "numeric", + Description: "Matches pure numeric values", + Examples: []string{"123", "0", "999999", "42"}, + }, + } + + return rules +} + +// GetRuleByPriority returns rules with a specific priority +func (rm *RuleManager) GetRuleByPriority(priority int) []*TerminalRule { + result := make([]*TerminalRule, 0) + for _, rule := range rm.rules { + if rule.Priority == priority { + result = append(result, rule) + } + } + return result +} + +// GetHighestPriorityRules returns rules with the highest priority +func (rm *RuleManager) GetHighestPriorityRules() []*TerminalRule { + if len(rm.rules) == 0 { + return []*TerminalRule{} + } + + highestPriority := rm.rules[0].Priority + result := make([]*TerminalRule, 0) + + for _, rule := range rm.rules { + if rule.Priority == highestPriority { + result = append(result, rule) + } else { + break // Rules are sorted by priority + } + } + return result +} + +// UpdateRulePriority changes the priority of an existing rule +func (rm *RuleManager) UpdateRulePriority(name string, newPriority int) error { + rule := rm.GetRule(name) + if rule == nil { + return fmt.Errorf("rule '%s' not found", name) + } + + // Remove the rule and re-add with new priority + if !rm.RemoveRule(name) { + return fmt.Errorf("failed to remove rule '%s'", name) + } + + return rm.AddRule( + rule.Name, + rule.Pattern.String(), + rule.Category, + rule.Description, + rule.TokenType, + newPriority, + rule.Examples, + ) +} + +// GetCategoryDescription returns the description for a category +func (rm *RuleManager) GetCategoryDescription(category string) string { + if cat, exists := rm.categories[category]; exists { + return cat.Description + } + return "" +} + +// SetCategoryDescription updates the description for a category +func (rm *RuleManager) SetCategoryDescription(category, description string) { + if rm.categories[category] == nil { + rm.categories[category] = &RuleCategory{ + Name: category, + Description: description, + Rules: make([]*TerminalRule, 0), + } + } else { + rm.categories[category].Description = description + } +} diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go new file mode 100644 index 000000000000..f99c56f19688 --- /dev/null +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -0,0 +1,465 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides log message tokenization using finite state automaton +// and pattern matching for semantic token classification. +package automaton + +import ( + "regexp" + "unicode" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TokenizerState represents the current state of the FSA +type TokenizerState int + +const ( + StateStart TokenizerState = iota + StateWord // Letters, digits, and common separators for structured tokens + StateNumeric // Pure numbers + StateWhitespace // Spaces, tabs, newlines + StateSpecial // Operators, punctuation, symbols +) + +// Tokenizer implements a finite state automaton for log tokenization +type Tokenizer struct { + input string + pos int + length int + state TokenizerState + buffer []rune + tokens []token.Token +} + +// NewTokenizer creates a new tokenizer for the given input +func NewTokenizer(input string) *Tokenizer { + return &Tokenizer{ + input: input, + pos: 0, + length: len(input), + state: StateStart, + buffer: make([]rune, 0, 64), // Pre-allocate buffer + tokens: make([]token.Token, 0, 32), // Pre-allocate tokens slice + } +} + +// Tokenize processes the input string and returns a TokenList +func (t *Tokenizer) Tokenize() *token.TokenList { + for t.pos < t.length { + if !t.processNextToken() { + break + } + } + + t.flushBuffer() + t.classifyTokens() + + return token.NewTokenListWithTokens(t.tokens) +} + +// classifyTokens applies terminal rules for token classification +func (t *Tokenizer) classifyTokens() { + for i, tok := range t.tokens { + // Only classify word-like and numeric tokens that might be structured + if tok.Type != token.TokenWord && tok.Type != token.TokenNumeric { + continue + } + + classifiedType := t.classifyToken(tok.Value) + if classifiedType == token.TokenUnknown { + continue + } + + // Update token type + t.tokens[i].Type = classifiedType + + // Parse date components for date tokens + if classifiedType == token.TokenDate { + t.tokens[i].DateInfo = parseDateComponents(tok.Value) + } + } +} + +// processNextToken advances the automaton by one token +func (t *Tokenizer) processNextToken() bool { + if t.pos >= t.length { + return false + } + + char := rune(t.input[t.pos]) + + switch t.state { + case StateStart: + return t.handleStartState(char) + case StateWord: + return t.handleWordState(char) + case StateNumeric: + return t.handleNumericState(char) + case StateWhitespace: + return t.handleWhitespaceState(char) + case StateSpecial: + return t.handleSpecialState(char) + default: + return t.handleStartState(char) // Fallback + } +} + +// handleStartState determines initial state based on character type +func (t *Tokenizer) handleStartState(char rune) bool { + switch { + case unicode.IsSpace(char): + t.setState(StateWhitespace) + case unicode.IsDigit(char): + t.setState(StateNumeric) + case unicode.IsLetter(char) || char == '/': + t.setState(StateWord) + default: + t.setState(StateSpecial) + } + + t.addToBuffer(char) + t.pos++ + return true +} + +// handleWordState processes word tokens +func (t *Tokenizer) handleWordState(char rune) bool { + if unicode.IsLetter(char) || unicode.IsDigit(char) || char == '_' || char == '-' || + char == '.' || char == '@' || char == '/' || + (char == ':' && t.isURLScheme()) { + t.addToBuffer(char) + t.pos++ + return true + } + + t.createWordToken() + t.setState(StateStart) + return true +} + +// handleNumericState processes numeric tokens +// Allows digits and special chars for dates (2024-01-15), times (10:30:45), IPs (192.168.1.1) +func (t *Tokenizer) handleNumericState(char rune) bool { + switch { + case unicode.IsDigit(char), char == '.', char == '-', char == '/', char == ':': + t.addToBuffer(char) + t.pos++ + return true + default: + t.createNumericToken() + t.setState(StateStart) + return true + } +} + +// handleWhitespaceState processes whitespace +func (t *Tokenizer) handleWhitespaceState(char rune) bool { + switch { + case unicode.IsSpace(char): + t.addToBuffer(char) + t.pos++ + return true + default: + t.createWhitespaceToken() + t.setState(StateStart) + return true + } +} + +// handleSpecialState processes special characters +func (t *Tokenizer) handleSpecialState(char rune) bool { + // Treat each special char as separate token + t.addToBuffer(char) + t.pos++ + t.createSpecialToken() + t.setState(StateStart) + return true +} + +// classifyToken attempts to classify a single token's type using terminal rules +// Takes a token value and returns a more specific type if a rule matches, or TokenUnknown +func (t *Tokenizer) classifyToken(value string) token.TokenType { + return globalTrie.Match(value) +} + +// parseDateComponents extracts structural information from date strings +// Uses the same comprehensive patterns as the multiline aggregation package +func parseDateComponents(dateStr string) *token.DateComponents { + // Comprehensive date patterns from multiline aggregation package + patterns := []struct { + regex *regexp.Regexp + format string + parser func([]string) *token.DateComponents + }{ + // RFC3339: 2006-01-02T15:04:05Z07:00 + { + regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(Z|[\+\-]\d{2}:?\d{2})?`), + "RFC3339", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Year: matches[1], Month: matches[2], Day: matches[3], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "RFC3339", + } + }, + }, + // Standard timestamp: 2021-07-08 05:08:19,214 + { + regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(,\d+)?`), + "YYYY-MM-DD HH:mm:ss", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Year: matches[1], Month: matches[2], Day: matches[3], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "YYYY-MM-DD HH:mm:ss", + } + }, + }, + // Date only: 2021-01-31 (with strict month/day validation) + { + regexp.MustCompile(`^(\d{4})-(1[012]|0?[1-9])-([12][0-9]|3[01]|0?[1-9])`), + "YYYY-MM-DD", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Year: matches[1], Month: matches[2], Day: matches[3], + Format: "YYYY-MM-DD", + } + }, + }, + // Slash format: 2023/02/20 14:33:24 + { + regexp.MustCompile(`^(\d{4})/(\d{2})/(\d{2}) (\d{2}):(\d{2}):(\d{2})`), + "YYYY/MM/DD HH:mm:ss", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Year: matches[1], Month: matches[2], Day: matches[3], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "YYYY/MM/DD HH:mm:ss", + } + }, + }, + // Java SimpleFormatter: January 31, 2021 2:30:45 PM + { + regexp.MustCompile(`^([A-Za-z_]+) (\d+), (\d{4}) (\d+):(\d+):(\d+) (AM|PM)`), + "Month DD, YYYY HH:mm:ss AM/PM", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Month: matches[1], Day: matches[2], Year: matches[3], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "Month DD, YYYY HH:mm:ss AM/PM", + } + }, + }, + // ANSIC: Mon Jan _2 15:04:05 2006 + { + regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+) (\d+)`), + "ANSIC", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Month: matches[2], Day: matches[3], Year: matches[7], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "ANSIC", + } + }, + }, + // UnixDate: Mon Jan _2 15:04:05 MST 2006 + { + regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+)( [A-Za-z_]+ (\d+))?`), + "UnixDate", + func(matches []string) *token.DateComponents { + year := matches[7] + if year == "" && len(matches) > 8 { + year = matches[8] + } + return &token.DateComponents{ + Month: matches[2], Day: matches[3], Year: year, + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "UnixDate", + } + }, + }, + // RubyDate: Mon Jan 02 15:04:05 -0700 2006 + { + regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([\-\+]\d+) (\d+)`), + "RubyDate", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Month: matches[2], Day: matches[3], Year: matches[8], + Hour: matches[4], Minute: matches[5], Second: matches[6], + Format: "RubyDate", + } + }, + }, + // RFC822: 02 Jan 06 15:04 MST + { + regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) ([A-Za-z_]+)`), + "RFC822", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Day: matches[1], Month: matches[2], Year: matches[3], + Hour: matches[4], Minute: matches[5], + Format: "RFC822", + } + }, + }, + // RFC822Z: 02 Jan 06 15:04 -0700 + { + regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) (-\d+)`), + "RFC822Z", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Day: matches[1], Month: matches[2], Year: matches[3], + Hour: matches[4], Minute: matches[5], + Format: "RFC822Z", + } + }, + }, + // RFC850: Monday, 02-Jan-06 15:04:05 MST + { + regexp.MustCompile(`^([A-Za-z_]+), (\d+)-([A-Za-z_]+)-(\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + "RFC850", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Day: matches[2], Month: matches[3], Year: matches[4], + Hour: matches[5], Minute: matches[6], Second: matches[7], + Format: "RFC850", + } + }, + }, + // RFC1123: Mon, 02 Jan 2006 15:04:05 MST + { + regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), + "RFC1123", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Day: matches[2], Month: matches[3], Year: matches[4], + Hour: matches[5], Minute: matches[6], Second: matches[7], + Format: "RFC1123", + } + }, + }, + // RFC1123Z: Mon, 02 Jan 2006 15:04:05 -0700 + { + regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) (-\d+)`), + "RFC1123Z", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Day: matches[2], Month: matches[3], Year: matches[4], + Hour: matches[5], Minute: matches[6], Second: matches[7], + Format: "RFC1123Z", + } + }, + }, + // RFC3339Nano: 2006-01-02T15:04:05.999999999Z07:00 + { + regexp.MustCompile(`^(\d+)-(\d+)-(\d+)([A-Za-z_]+)(\d+):(\d+):(\d+)\.(\d+)([A-Za-z_]+)(\d+):(\d+)`), + "RFC3339Nano", + func(matches []string) *token.DateComponents { + return &token.DateComponents{ + Year: matches[1], Month: matches[2], Day: matches[3], + Hour: matches[5], Minute: matches[6], Second: matches[7], + Format: "RFC3339Nano", + } + }, + }, + } + + for _, pattern := range patterns { + if matches := pattern.regex.FindStringSubmatch(dateStr); matches != nil { + return pattern.parser(matches) + } + } + + return nil // Couldn't parse +} + +// hasNumericPattern checks if a word contains numbers +func hasNumericPattern(word string) bool { + return regexp.MustCompile(`\d`).MatchString(word) +} + +// shouldSetPossiblyWildcard determines if a token should have the possiblyWildcard flag +// Words with numeric patterns (user123, admin456) can be wildcarded during merging +func shouldSetPossiblyWildcard(tokenType token.TokenType, value string) bool { + return tokenType == token.TokenWord && hasNumericPattern(value) +} + +// Helper functions + +// isURLScheme checks if current buffer looks like a URL scheme +func (t *Tokenizer) isURLScheme() bool { + buffer := string(t.buffer) + return buffer == "http" || buffer == "https" +} + +// State management helpers + +func (t *Tokenizer) setState(newState TokenizerState) { + t.state = newState +} + +func (t *Tokenizer) addToBuffer(char rune) { + t.buffer = append(t.buffer, char) +} + +func (t *Tokenizer) clearBuffer() { + t.buffer = t.buffer[:0] // Keep capacity, reset length +} + +func (t *Tokenizer) bufferToString() string { + return string(t.buffer) +} + +func (t *Tokenizer) flushBuffer() { + if len(t.buffer) > 0 { + // Create remaining content as word token + t.createWordToken() + } +} + +// Token creation methods + +func (t *Tokenizer) createWordToken() { + value := t.bufferToString() + tokenType := t.classifyToken(value) + + tok := token.NewTokenWithFlags(tokenType, value, false, shouldSetPossiblyWildcard(tokenType, value)) + t.tokens = append(t.tokens, tok) + t.clearBuffer() +} + +func (t *Tokenizer) createNumericToken() { + value := t.bufferToString() + t.tokens = append(t.tokens, token.Token{ + Type: token.TokenNumeric, + Value: value, + PossiblyWildcard: true, // Numeric tokens can be merged (25 vs 62 → *) + }) + t.clearBuffer() +} + +func (t *Tokenizer) createWhitespaceToken() { + value := t.bufferToString() + t.tokens = append(t.tokens, token.Token{ + Type: token.TokenWhitespace, + Value: value, + PossiblyWildcard: false, // Whitespace tokens are not mergeable + }) + t.clearBuffer() +} + +func (t *Tokenizer) createSpecialToken() { + value := t.bufferToString() + tokenType := t.classifyToken(value) + + t.tokens = append(t.tokens, token.Token{ + Type: tokenType, + Value: value, + PossiblyWildcard: shouldSetPossiblyWildcard(tokenType, value), + }) + t.clearBuffer() +} diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go new file mode 100644 index 000000000000..f49ce1cbd7c3 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster.go @@ -0,0 +1,292 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "strings" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Cluster represents a group of TokenLists with identical signatures. +type Cluster struct { + Signature token.Signature + TokenLists []*token.TokenList + Pattern *token.TokenList + WildcardMap map[int]bool + PatternID uint64 + + // Timestamp tracking for stateful encoding + CreatedAt time.Time // When pattern was first created + UpdatedAt time.Time // When pattern was last modified + LastSentAt time.Time // When we last sent this pattern to gRPC +} + +// NewCluster creates a new cluster. +func NewCluster(signature token.Signature, tokenList *token.TokenList) *Cluster { + now := time.Now() + return &Cluster{ + Signature: signature, + TokenLists: []*token.TokenList{tokenList}, + Pattern: nil, + WildcardMap: make(map[int]bool), + PatternID: 0, // Will be assigned when pattern is generated + CreatedAt: now, + UpdatedAt: now, + LastSentAt: time.Time{}, // Zero time - never sent + } +} + +// Add adds a TokenList to this cluster if it has a matching signature. +func (c *Cluster) Add(tokenList *token.TokenList) bool { + signature := token.NewSignature(tokenList) + + if !c.Signature.Equals(signature) { + return false + } + + c.TokenLists = append(c.TokenLists, tokenList) + + c.Pattern = nil + c.WildcardMap = make(map[int]bool) + c.UpdatedAt = time.Now() // Pattern will change when regenerated z + + return true +} + +// Size returns the number of TokenLists in this cluster. +func (c *Cluster) Size() int { + return len(c.TokenLists) +} + +// GeneratePattern analyzes all TokenLists in the cluster to identify wildcard positions. +// Uses intelligent mergeability logic to determine which positions can be wildcarded. +// If the cluster contains heterogeneous TokenLists that can't merge, uses the largest +// mergeable group for pattern generation. +func (c *Cluster) GeneratePattern() *token.TokenList { + if c.Pattern != nil { + return c.Pattern + } + + if len(c.TokenLists) == 0 { + return nil + } + + if len(c.TokenLists) == 1 { + c.Pattern = c.TokenLists[0] + return c.Pattern + } + + // Check if cluster is heterogeneous - contains unmergeable sub-groups + groups := merging.FindMergeableGroups(c.TokenLists) + + // If we have multiple groups, the cluster is heterogeneous + // Use the largest group for pattern generation + var primaryGroup []*token.TokenList + if len(groups) > 1 { + // Find the largest group + maxSize := 0 + for _, group := range groups { + if len(group) > maxSize { + maxSize = len(group) + primaryGroup = group + } + } + // TODO: Log warning that cluster is heterogeneous and we're only using primary group + } else { + primaryGroup = groups[0] + } + + // Now generate pattern from the primary group using merging logic + template := primaryGroup[0] + if template.Length() == 0 { + return nil + } + + // Start with the template + pattern := template + + // Progressively merge with each TokenList in the group + for i := 1; i < len(primaryGroup); i++ { + merged := merging.MergeTokenLists(pattern, primaryGroup[i]) + if merged != nil { + pattern = merged + } + // If merge fails (shouldn't happen since FindMergeableGroups verified it), keep current pattern + } + + // Build wildcard map and handle special path patterns + c.WildcardMap = make(map[int]bool) + patternTokens := make([]token.Token, pattern.Length()) + + for i := 0; i < pattern.Length(); i++ { + tok := pattern.Tokens[i] + + if tok.IsWildcard { + c.WildcardMap[i] = true + + // Special handling for path wildcards + if tok.Type == token.TokenAbsolutePath && len(primaryGroup) > 0 { + firstPath := primaryGroup[0].Tokens[i].Value + tok.Value = getPathPattern(firstPath) + } + } + + patternTokens[i] = tok + } + + c.Pattern = token.NewTokenListWithTokens(patternTokens) + return c.Pattern +} + +// GetWildcardPositions returns wildcard positions. +func (c *Cluster) GetWildcardPositions() []int { + if c.Pattern == nil { + c.GeneratePattern() + } + + var positions []int + for pos := range c.WildcardMap { + positions = append(positions, pos) + } + + return positions +} + +// HasWildcards returns true if this cluster contains wildcard positions. +func (c *Cluster) HasWildcards() bool { + if c.Pattern == nil { + c.GeneratePattern() + } + + return len(c.WildcardMap) > 0 +} + +// ExtractWildcardValues extracts the wildcard values from a specific TokenList +func (c *Cluster) ExtractWildcardValues(tokenList *token.TokenList) []string { + if c.Pattern == nil { + c.GeneratePattern() + } + + if len(c.WildcardMap) == 0 { + return []string{} + } + + var wildcardValues []string + for i := 0; i < tokenList.Length(); i++ { + if c.WildcardMap[i] { + wildcardValues = append(wildcardValues, tokenList.Tokens[i].Value) + } + } + + return wildcardValues +} + +// GetPatternString returns a string representation of the pattern +func (c *Cluster) GetPatternString() string { + if c.Pattern == nil { + c.GeneratePattern() + } + + if c.Pattern == nil { + return "" + } + + var parts []string + for _, tok := range c.Pattern.Tokens { + parts = append(parts, tok.Value) + } + return strings.Join(parts, "") +} + +// GetPatternID returns the pattern ID for this cluster +func (c *Cluster) GetPatternID() uint64 { + return c.PatternID +} + +// SetPatternID sets the pattern ID for this cluster +func (c *Cluster) SetPatternID(id uint64) { + c.PatternID = id +} + +// MarkAsSent updates the LastSentAt timestamp to indicate this pattern was sent to gRPC +func (c *Cluster) MarkAsSent() { + c.LastSentAt = time.Now() +} + +// NeedsSending returns true if this pattern has never been sent or has been updated since last sent +func (c *Cluster) NeedsSending() bool { + return c.LastSentAt.IsZero() || c.UpdatedAt.After(c.LastSentAt) +} + +// IsNewPattern returns true if this pattern has never been sent +func (c *Cluster) IsNewPattern() bool { + return c.LastSentAt.IsZero() +} + +// WasUpdatedSinceLastSent returns true if pattern was updated since last sent +func (c *Cluster) WasUpdatedSinceLastSent() bool { + return !c.LastSentAt.IsZero() && c.UpdatedAt.After(c.LastSentAt) +} + +// MergeTokensIfFits attempts to merge this cluster with another cluster. +// This is used for batch consolidation where clusters with the same signature +// might be further consolidated based on semantic mergeability. +func (c *Cluster) MergeTokensIfFits(other *Cluster) bool { + // Check if clusters have the same structure + if c.Signature.Position != other.Signature.Position || c.Signature.Length != other.Signature.Length { + return false + } + + // Check if tokens can be merged at each position + if len(c.TokenLists) == 0 || len(other.TokenLists) == 0 { + return false + } + + // Use the first TokenList from each cluster for comparison + tokenList1 := c.TokenLists[0] + tokenList2 := other.TokenLists[0] + + // Delegate to merging package for semantic mergeability check + if !merging.CanMergeTokenLists(tokenList1, tokenList2) { + return false + } + + // Merge is possible - add other cluster's TokenLists to this cluster + c.TokenLists = append(c.TokenLists, other.TokenLists...) + + // Invalidate pattern cache since cluster has changed + c.Pattern = nil + c.WildcardMap = make(map[int]bool) + c.UpdatedAt = time.Now() + + return true +} + +// getPathPattern converts a path to hierarchical wildcard pattern +func getPathPattern(path string) string { + if path == "/" { + return "/" + } + + // Remove leading/trailing slashes and split + trimmed := strings.Trim(path, "/") + if trimmed == "" { + return "/" + } + + parts := strings.Split(trimmed, "/") + result := "" + for i := 0; i < len(parts); i++ { + result += "/*" + } + + return result +} diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go new file mode 100644 index 000000000000..aae8bcd66842 --- /dev/null +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -0,0 +1,225 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package merging provides intelligent mergeability logic for pattern generation. +// It determines which TokenLists can be merged into unified patterns with wildcards, +// and enforces protection rules to maintain semantic quality. +package merging + +import ( + "strings" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// ShouldProtectPosition determines if a position should never be wildcarded. +// Protection rules ensure pattern quality by preventing wildcarding of +// semantically important positions. +func ShouldProtectPosition(position int, tokenType token.TokenType) bool { + // Rule 1: Never wildcard the first word token + // The first word typically indicates the action/command and is semantically critical + // e.g., "Login successful" vs "Error occurred" should not merge to "* *" + if position == 0 && tokenType == token.TokenWord { + return true + } + + // Future: Add more protection rules + // - Never wildcard HTTP methods? + // - Never wildcard severity levels? + // - Protect first N tokens? + + return false +} + +// CanMergeTokenLists checks if two TokenLists can be merged into a unified pattern. +// Returns true only if all token positions are either identical or mergeable according +// to their mergeability levels and protection rules. +func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { + if tl1.Length() != tl2.Length() { + return false + } + + for i := 0; i < tl1.Length(); i++ { + tok1 := &tl1.Tokens[i] + tok2 := &tl2.Tokens[i] + + level := tok1.GetMergeabilityLevel(tok2) + + // If tokens match exactly, continue + if level == token.FitsAsItIs { + continue + } + + // If tokens can't merge at all, reject + if !level.IsMergeable() { + return false + } + + // Check protection rules - if position is protected and tokens differ, reject + if ShouldProtectPosition(i, tok1.Type) { + return false + } + } + + return true +} + +// MergeTokenLists performs the actual merge of two TokenLists, creating a new TokenList +// with wildcards at positions where tokens differ but are mergeable. +// Returns nil if the TokenLists cannot be merged. +func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { + if !CanMergeTokenLists(tl1, tl2) { + return nil + } + + merged := token.NewTokenList() + + for i := 0; i < tl1.Length(); i++ { + tok1 := &tl1.Tokens[i] + tok2 := &tl2.Tokens[i] + + level := tok1.GetMergeabilityLevel(tok2) + + if level == token.FitsAsItIs { + // Tokens are identical, keep as-is + merged.Add(*tok1) + continue + } + + // Handle different merge types + switch level { + case token.MergeableWithWiderRange: + // Special handling for structured tokens (e.g., dates with partial wildcards) + if tok1.Type == token.TokenDate && tok1.DateInfo != nil && tok2.DateInfo != nil { + merged.Add(createPartialDateWildcard(tok1.DateInfo, tok2.DateInfo)) + } else { + // Fallback to full wildcard + merged.AddWildcardToken(tok1.Type) + } + case token.MergeableAsWildcard: + // Create a full wildcard for this position + merged.AddWildcardToken(tok1.Type) + default: + // Shouldn't reach here if CanMergeTokenLists passed, but be defensive + merged.Add(*tok1) + } + } + + return merged +} + +// createPartialDateWildcard creates a date token with wildcards in differing components. +// This allows for more precise patterns like "2024-01-* 10:30:45" instead of just "*". +func createPartialDateWildcard(d1, d2 *token.DateComponents) token.Token { + // Create a pattern where differing components become wildcards + var pattern strings.Builder + + switch d1.Format { + case "RFC3339", "ISO8601": + // Format: YYYY-MM-DDTHH:MM:SS + if d1.Year == d2.Year { + pattern.WriteString(d1.Year) + } else { + pattern.WriteString("*") + } + pattern.WriteString("-") + + if d1.Month == d2.Month { + pattern.WriteString(d1.Month) + } else { + pattern.WriteString("*") + } + pattern.WriteString("-") + + if d1.Day == d2.Day { + pattern.WriteString(d1.Day) + } else { + pattern.WriteString("*") + } + pattern.WriteString("T") + + if d1.Hour == d2.Hour { + pattern.WriteString(d1.Hour) + } else { + pattern.WriteString("*") + } + pattern.WriteString(":") + + if d1.Minute == d2.Minute { + pattern.WriteString(d1.Minute) + } else { + pattern.WriteString("*") + } + pattern.WriteString(":") + + if d1.Second == d2.Second { + pattern.WriteString(d1.Second) + } else { + pattern.WriteString("*") + } + + default: + // For other formats, just use a full wildcard + return token.NewWildcardToken(token.TokenDate) + } + + return token.Token{ + Type: token.TokenDate, + Value: pattern.String(), + IsWildcard: true, + DateInfo: d1, // Keep the first date's structure for reference + } +} + +// FindMergeableGroups analyzes a list of TokenLists and groups them by mergeability. +// This is used to detect heterogeneous clusters that should be split into multiple patterns. +// Returns a list of groups where each group contains mutually mergeable TokenLists. +func FindMergeableGroups(tokenLists []*token.TokenList) [][]*token.TokenList { + if len(tokenLists) == 0 { + return nil + } + + if len(tokenLists) == 1 { + return [][]*token.TokenList{tokenLists} + } + + var groups [][]*token.TokenList + processed := make(map[int]bool) + + for i := 0; i < len(tokenLists); i++ { + if processed[i] { + continue + } + + // Start a new group with this TokenList + group := []*token.TokenList{tokenLists[i]} + processed[i] = true + + // Find all TokenLists that can merge with this one + for j := i + 1; j < len(tokenLists); j++ { + if processed[j] { + continue + } + + // Check if this TokenList can merge with all members of the current group + canMergeWithGroup := true + for _, groupMember := range group { + if !CanMergeTokenLists(tokenLists[j], groupMember) { + canMergeWithGroup = false + break + } + } + + if canMergeWithGroup { + group = append(group, tokenLists[j]) + processed[j] = true + } + } + + groups = append(groups, group) + } + + return groups +} diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go new file mode 100644 index 000000000000..305fc443733f --- /dev/null +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -0,0 +1,345 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package merging + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestShouldProtectPosition(t *testing.T) { + tests := []struct { + name string + position int + tokenType token.TokenType + expected bool + }{ + { + name: "First word should be protected", + position: 0, + tokenType: token.TokenWord, + expected: true, + }, + { + name: "First numeric should not be protected", + position: 0, + tokenType: token.TokenNumeric, + expected: false, + }, + { + name: "Second word should not be protected", + position: 1, + tokenType: token.TokenWord, + expected: false, + }, + { + name: "First whitespace should not be protected", + position: 0, + tokenType: token.TokenWhitespace, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ShouldProtectPosition(tt.position, tt.tokenType) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestCanMergeTokenLists_IdenticalLists(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "world"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "world"), + }) + + assert.True(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_PossiblyWildcardTokens(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + }) + + assert.True(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_GenericWords(t *testing.T) { + // Generic words without possiblyWildcard flag should not merge + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "bob"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "likes"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "cat"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "likes"), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_DifferentLengths(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "world"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello"), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2)) +} + +func TestCanMergeTokenLists_FirstWordProtection(t *testing.T) { + // First word protection should prevent merge even with possiblyWildcard + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "logged"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "logged"), + }) + + assert.False(t, CanMergeTokenLists(tl1, tl2), "First word should be protected from wildcarding") +} + +func TestMergeTokenLists_CreateWildcard(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + }) + + merged := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged) + assert.Equal(t, 3, merged.Length()) + assert.Equal(t, "logged", merged.Tokens[0].Value) + assert.False(t, merged.Tokens[0].IsWildcard) + assert.Equal(t, " ", merged.Tokens[1].Value) + assert.Equal(t, "*", merged.Tokens[2].Value) + assert.True(t, merged.Tokens[2].IsWildcard) +} + +func TestMergeTokenLists_UnmergeableReturnsNil(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "bob"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "likes"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "cat"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "likes"), + }) + + merged := MergeTokenLists(tl1, tl2) + assert.Nil(t, merged, "Unmergeable TokenLists should return nil") +} + +func TestMergeTokenLists_DateMerging(t *testing.T) { + dateInfo1 := &token.DateComponents{ + Year: "2024", + Month: "01", + Day: "15", + Hour: "10", + Minute: "30", + Second: "45", + Format: "RFC3339", + } + + dateInfo2 := &token.DateComponents{ + Year: "2024", + Month: "01", + Day: "15", + Hour: "14", + Minute: "22", + Second: "30", + Format: "RFC3339", + } + + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Log"), + token.NewToken(token.TokenWhitespace, " "), + token.NewDateToken("2024-01-15T10:30:45Z", dateInfo1), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Log"), + token.NewToken(token.TokenWhitespace, " "), + token.NewDateToken("2024-01-15T14:22:30Z", dateInfo2), + }) + + merged := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged) + assert.Equal(t, 3, merged.Length()) + + // Date token should have partial wildcard for time components + dateToken := merged.Tokens[2] + assert.True(t, dateToken.IsWildcard) + assert.Equal(t, token.TokenDate, dateToken.Type) + // Should preserve date, wildcard time: 2024-01-15T*:*:* + assert.Contains(t, dateToken.Value, "2024-01-15") +} + +func TestFindMergeableGroups_SingleGroup(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + }) + + tl3 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewPossiblyWildcardToken(token.TokenWord, "guest789"), + }) + + groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3}) + assert.Equal(t, 1, len(groups), "All mergeable TokenLists should be in one group") + assert.Equal(t, 3, len(groups[0]), "Group should contain all three TokenLists") +} + +func TestFindMergeableGroups_MultipleGroups(t *testing.T) { + // Group 1: mergeable user logs + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + }) + + // Group 2: unmergeable generic words + tl3 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWord, "cat"), + }) + + tl4 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWord, "dog"), + }) + + groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3, tl4}) + assert.Equal(t, 3, len(groups), "Should have 3 groups: user group + 2 separate generic word entries") + + // Find the largest group (should be the user group with 2 members) + maxSize := 0 + for _, group := range groups { + if len(group) > maxSize { + maxSize = len(group) + } + } + assert.Equal(t, 2, maxSize, "Largest group should have 2 TokenLists") +} + +func TestFindMergeableGroups_EmptyInput(t *testing.T) { + groups := FindMergeableGroups([]*token.TokenList{}) + assert.Nil(t, groups) +} + +func TestFindMergeableGroups_SingleTokenList(t *testing.T) { + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "hello"), + }) + + groups := FindMergeableGroups([]*token.TokenList{tl1}) + assert.Equal(t, 1, len(groups)) + assert.Equal(t, 1, len(groups[0])) +} + +func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { + // Try to merge when first token is a word but differs + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewPossiblyWildcardToken(token.TokenWord, "Login"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "successful"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewPossiblyWildcardToken(token.TokenWord, "Logout"), + token.NewToken(token.TokenWhitespace, " "), + token.NewToken(token.TokenWord, "successful"), + }) + + // Should fail because first word is protected + merged := MergeTokenLists(tl1, tl2) + assert.Nil(t, merged, "Should not merge when first word differs (protected)") +} + +func TestMergeTokenLists_ProgressiveMerging(t *testing.T) { + // Test merging multiple TokenLists progressively + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenNumeric, "123"), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenNumeric, "456"), + }) + + tl3 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenWord, "Request"), + token.NewToken(token.TokenWhitespace, " "), + token.NewPossiblyWildcardToken(token.TokenNumeric, "789"), + }) + + // Merge first two + merged12 := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged12) + assert.True(t, merged12.Tokens[2].IsWildcard) + + // Merge result with third + merged123 := MergeTokenLists(merged12, tl3) + assert.NotNil(t, merged123) + assert.Equal(t, 3, merged123.Length()) + assert.Equal(t, "Request", merged123.Tokens[0].Value) + assert.Equal(t, "*", merged123.Tokens[2].Value) + assert.True(t, merged123.Tokens[2].IsWildcard) +} diff --git a/pkg/logs/patterns/comprehensive_demo.go b/pkg/logs/patterns/comprehensive_demo.go new file mode 100644 index 000000000000..0343ee2bdadc --- /dev/null +++ b/pkg/logs/patterns/comprehensive_demo.go @@ -0,0 +1,146 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package patterns provides a simple demo of pattern extraction +package main + +import ( + "fmt" + "strings" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func main() { + fmt.Println("=== Log Pattern Extraction Demo ===") + + // Step 1: Setup + runBasicDemo() + + // Step 2: Advanced features + runAdvancedDemo() + + fmt.Println("=== Demo Complete ===") +} + +func runBasicDemo() { + fmt.Println("1. BASIC PATTERN EXTRACTION") + fmt.Println(" Processing HTTP requests to find patterns...") + + clusterManager := clustering.NewClusterManager() + + // Simple HTTP logs + httpLogs := []string{ + "GET /api/users 200", + "POST /api/users 201", + "PUT /api/users 200", + "GET /api/orders 200", + "DELETE /api/users 204", + } + + // Process logs and show tokenization + for i, logMsg := range httpLogs { + fmt.Printf(" Log %d: %s\n", i+1, logMsg) + + // Tokenize and show breakdown + tokenList := automaton.TokenizeString(logMsg) + fmt.Printf(" → Tokens: %s\n", formatTokens(tokenList)) + + // Add to clustering + cluster := clusterManager.Add(tokenList) + fmt.Printf(" → Cluster size: %d\n\n", cluster.Size()) + } + + // Show discovered patterns + showPatterns(clusterManager, "HTTP API Requests") +} + +func runAdvancedDemo() { + fmt.Println("2. ADVANCED TOKENIZATION") + fmt.Println(" Showing specialized token detection...") + + clusterManager := clustering.NewClusterManager() + + // Advanced logs with different data types + advancedLogs := []string{ + "ERROR Database connection to 192.168.1.100 failed", + "ERROR Database connection to 192.168.1.101 failed", + "ERROR Database connection to 192.168.1.102 failed", + "INFO User admin@company.com logged in at 2024-01-15", + "INFO User john@company.com logged in at 2024-01-16", + "INFO User jane@company.com logged in at 2024-01-17", + } + + for i, logMsg := range advancedLogs { + fmt.Printf(" Log %d: %s\n", i+1, logMsg) + + tokenList := automaton.TokenizeString(logMsg) + fmt.Printf(" → Specialized tokens: %s\n", formatSpecializedTokens(tokenList)) + + cluster := clusterManager.Add(tokenList) + fmt.Printf(" → Cluster size: %d\n\n", cluster.Size()) + } + + showPatterns(clusterManager, "Advanced Tokenization") +} + +func formatTokens(tokenList *token.TokenList) string { + if tokenList.IsEmpty() { + return "none" + } + + var parts []string + for _, tok := range tokenList.Tokens { + parts = append(parts, fmt.Sprintf("%s", tok.Value)) + } + return strings.Join(parts, " | ") +} + +func formatSpecializedTokens(tokenList *token.TokenList) string { + if tokenList.IsEmpty() { + return "none" + } + + var parts []string + for _, tok := range tokenList.Tokens { + if tok.Type.String() != "Word" && tok.Type.String() != "Whitespace" { + parts = append(parts, fmt.Sprintf("%s(%s)", tok.Type, tok.Value)) + } + } + + if len(parts) == 0 { + return "no specialized tokens" + } + return strings.Join(parts, ", ") +} + +func showPatterns(clusterManager *clustering.ClusterManager, title string) { + fmt.Printf(" PATTERNS DISCOVERED in %s:\n", title) + + allClusters := clusterManager.GetAllClusters() + patternCount := 0 + + for _, cluster := range allClusters { + if cluster.Size() >= 3 { // Lower threshold for demo + patternStr := cluster.GetPatternString() + if patternStr != "" { + patternCount++ + fmt.Printf(" → Pattern %d: %s (found %d times)\n", + patternCount, patternStr, cluster.Size()) + } + } + } + + if patternCount == 0 { + fmt.Printf(" → No patterns found (need at least 3 similar messages)\n") + } + + // Show stats + stats := clusterManager.GetStats() + fmt.Printf(" → Stats: %d messages processed, %d clusters created\n\n", + stats.TotalTokenLists, stats.TotalClusters) +} diff --git a/pkg/logs/patterns/merging.md b/pkg/logs/patterns/merging.md new file mode 100644 index 000000000000..4e68836310fc --- /dev/null +++ b/pkg/logs/patterns/merging.md @@ -0,0 +1,626 @@ +# Java Mergeability Implementation Guide + +## šŸŽÆ Overview + +This guide shows how to implement the Java approach to pattern merging in your Go library. The Java approach uses **token-level mergeability** with discrete levels instead of continuous similarity scoring. + +## 🧠 Key Discovery: How Java Actually Works + +After thorough analysis of the Java codebase, the Java approach uses a **two-phase process**: + +### Phase 1: Real-time Document Processing +- **Tokenization**: Each log is tokenized using `DefaultLuceneTokenizingAutomatonBuilder` +- **Clustering Key**: Uses `PatternClusteringKey` which only considers: + - **Tags** (metadata) + - **Token count** (number of tokens) +- **Real-time Merging**: Documents with same clustering key go to same bucket, but **only merge if tokens are identical** + +### Phase 2: Batch Consolidation (The Magic!) +The key insight is in `MergeableRootNode.mergeClusters()` - this is where wildcards are actually created: + +```java +// Groups clusters by (tags, token_count) +clusters.stream() + .collect(Collectors.groupingBy( + cluster -> Pair.of(cluster.getTags(), cluster.getRootToken().size()))) + .values() + .forEach(similarClusters -> { + // For each group of similar clusters... + while (!similarClusters.isEmpty()) { + final MergeableNode cluster = similarClusters.remove(similarClusters.size() - 1); + final ListIterator> iter = similarClusters.listIterator(); + while (iter.hasNext()) { + final MergeableNode candidate = iter.next(); + if (cluster.mergeTokensIfFits(candidate)) { + iter.remove(); // Merge successful! + } + } + } + }); +``` + +### The `possiblyWildcard` Flag +- **Only Word tokens with numeric patterns have `possiblyWildcard = true`** +- This means **only words like `user123`, `session456` can merge into wildcards** +- Generic words like `bob`, `cat` are **not mergeable** and stay separate +- The `WildcardableWord.mergeWith()` method handles the actual wildcard creation + +### Example: `user123 logged in successfully` vs `user456 logged in successfully` +1. **Tokenization**: Both become `[Word("user123"), Word("logged"), Word("in"), Word("successfully")]` +2. **Clustering Key**: Both get `(tags, 4)` → Same bucket +3. **Real-time**: Can't merge (different tokens) +4. **Batch Consolidation**: + - `Word("user123")` vs `Word("user456")` → `MERGEABLE_AS_WILDCARD` (both have numeric patterns) + - `Word("logged")` vs `Word("logged")` → `FITS_AS_IT_IS` (same text) + - `Word("in")` vs `Word("in")` → `FITS_AS_IT_IS` (same text) + - `Word("successfully")` vs `Word("successfully")` → `FITS_AS_IT_IS` (same text) +5. **Result**: Pattern becomes `[user* logged in successfully]` + +### Example: `bob loves eat 25` vs `cat loves eat 62` +1. **Tokenization**: Both become `[Word("bob"), Word("loves"), Word("eat"), NumericValue(25)]` +2. **Clustering Key**: Both get `(tags, 4)` → Same bucket +3. **Real-time**: Can't merge (different tokens) +4. **Batch Consolidation**: + - `Word("bob")` vs `Word("cat")` → `UNMERGEABLE` (generic words, no numeric patterns) + - **Result**: Separate patterns (no merge) āœ… + +## šŸ—ļø Project Structure + +``` +your-go-library/ +ā”œā”€ā”€ internal/ +│ ā”œā”€ā”€ token/ +│ │ ā”œā”€ā”€ token.go # Token interface and MergeabilityLevel +│ │ ā”œā”€ā”€ word.go # Word token implementation +│ │ ā”œā”€ā”€ numeric.go # NumericValue token implementation +│ │ ā”œā”€ā”€ special.go # SpecialCharacter token implementation +│ │ └── token_list.go # TokenList implementation +│ ā”œā”€ā”€ tokenization/ +│ │ ā”œā”€ā”€ tokenizer.go # Tokenization engine +│ │ └── parser.go # Parser interface +│ ā”œā”€ā”€ clustering/ +│ │ ā”œā”€ā”€ clusterer.go # Clustering interfaces +│ │ ā”œā”€ā”€ realtime.go # RealTimeClusterer +│ │ └── consolidation.go # Batch consolidation +│ └── patterns/ +│ ā”œā”€ā”€ extractor.go # PatternExtractor +│ └── matcher.go # Pattern matching +ā”œā”€ā”€ pkg/ +│ └── patterns/ +│ └── patterns.go # Public API +└── go.mod +``` + +## šŸš€ Implementation Steps + +### Step 1: Core Token System (45 minutes) + +**File: `internal/token/token.go`** + +```go +package token + +type Token interface { + IsWildcard() bool + GetPatternString() string + GetMergeabilityLevel(other Token) MergeabilityLevel + MergeWith(other Token) Token +} + +type MergeabilityLevel int + +const ( + UNMERGEABLE MergeabilityLevel = iota + MERGEABLE_AS_NEW_TYPE + MERGEABLE_AS_WILDCARD + MERGEABLE_WITH_WIDER_RANGE + FITS + FITS_AS_IT_IS +) + +func (m MergeabilityLevel) Compare(other MergeabilityLevel) int { + return int(m) - int(other) +} + +func (m MergeabilityLevel) IsMergeable() bool { + return m > UNMERGEABLE +} +``` + +### Step 2: Word Token Implementation (30 minutes) + +**File: `internal/token/word.go`** + +```go +package token + +type Word struct { + text string + hasDigits bool + possiblyWildcard bool + wildcardSummary WildcardSummary +} + +func NewWord(text string, possiblyWildcard, withSummaries bool) *Word { + return &Word{ + text: text, + hasDigits: containsDigits(text), + possiblyWildcard: possiblyWildcard, + wildcardSummary: createWildcardSummary(text, withSummaries), + } +} + +func (w *Word) GetMergeabilityLevel(other Token) MergeabilityLevel { + if otherWord, ok := other.(*Word); ok { + return w.getMergeabilityWithWord(otherWord) + } else if numericValue, ok := other.(*NumericValue); ok { + return w.getMergeabilityWithNumeric(numericValue) + } + return UNMERGEABLE +} + +func (w *Word) getMergeabilityWithWord(other *Word) MergeabilityLevel { + if w.text != "" && other.text != "" { + if w.text == other.text { + if w.possiblyWildcard && !other.possiblyWildcard { + return FITS + } + return FITS_AS_IT_IS + } else if w.possiblyWildcard && other.possiblyWildcard { + return MERGEABLE_AS_WILDCARD // Both have numeric patterns + } else { + return UNMERGEABLE // Generic words don't merge + } + } + + if w.possiblyWildcard { + return MERGEABLE_AS_WILDCARD + } + + return UNMERGEABLE // Generic words are not mergeable +} + +func (w *Word) MergeWith(other Token) Token { + if otherWord, ok := other.(*Word); ok { + return w.mergeWithWord(otherWord) + } else if numericValue, ok := other.(*NumericValue); ok { + return w.mergeWithNumeric(numericValue) + } + return w +} + +func (w *Word) mergeWithWord(other *Word) *Word { + merged := &Word{ + text: w.text, + hasDigits: w.hasDigits || other.hasDigits, + possiblyWildcard: w.possiblyWildcard, + wildcardSummary: w.wildcardSummary, + } + + // If both have text and they're different, make wildcard + if w.text != "" && other.text != "" && w.text != other.text { + merged.possiblyWildcard = true + merged.wildcardSummary = mergeWildcardSummaries(w.wildcardSummary, other.wildcardSummary) + } + + return merged +} +``` + +### Step 3: TokenList Implementation (20 minutes) + +**File: `internal/token/token_list.go`** + +```go +package token + +type TokenList struct { + tokens []Token +} + +func NewTokenList(tokens []Token) *TokenList { + return &TokenList{tokens: tokens} +} + +func (tl *TokenList) GetMergeabilityLevel(other Token) MergeabilityLevel { + otherList, ok := other.(*TokenList) + if !ok { + return UNMERGEABLE + } + + if len(tl.tokens) != len(otherList.tokens) { + return UNMERGEABLE + } + + minLevel := FITS_AS_IT_IS + for i := 0; i < len(tl.tokens); i++ { + level := tl.tokens[i].GetMergeabilityLevel(otherList.tokens[i]) + if level.Compare(minLevel) < 0 { + if level == UNMERGEABLE { + return UNMERGEABLE + } + minLevel = level + } + } + return minLevel +} + +func (tl *TokenList) MergeWith(other Token) Token { + otherList := other.(*TokenList) + mergedTokens := make([]Token, len(tl.tokens)) + for i := 0; i < len(tl.tokens); i++ { + mergedTokens[i] = tl.tokens[i].MergeWith(otherList.tokens[i]) + } + return NewTokenList(mergedTokens) +} +``` + +### Step 4: Two-Phase Clustering System (60 minutes) + +**File: `internal/clustering/realtime.go`** + +```go +package clustering + +import ( + "sync" + "github.com/your-library/internal/token" +) + +type ClusteringKey struct { + Tags map[string]interface{} + TokenCount int +} + +type RealTimeClusterer struct { + clusters map[ClusteringKey][]*MergeableNode + mutex sync.RWMutex +} + +type MergeableNode struct { + rootToken *token.TokenList + messages []string + count int + tags map[string]interface{} +} + +func NewRealTimeClusterer() *RealTimeClusterer { + return &RealTimeClusterer{ + clusters: make(map[ClusteringKey][]*MergeableNode), + } +} + +func (rtc *RealTimeClusterer) ProcessDocument(message string, rootToken *token.TokenList, tags map[string]interface{}) *MergeableNode { + key := ClusteringKey{ + Tags: tags, + TokenCount: len(rootToken.GetTokens()), + } + + rtc.mutex.Lock() + defer rtc.mutex.Unlock() + + // Try to find existing cluster that can accept this document + if clusters, exists := rtc.clusters[key]; exists { + for _, cluster := range clusters { + if cluster.ProcessIfMergeable(rootToken) { + cluster.AddMessage(message) + return cluster + } + } + } + + // Create new cluster + newCluster := &MergeableNode{ + rootToken: rootToken, + messages: []string{message}, + count: 1, + tags: tags, + } + rtc.clusters[key] = append(rtc.clusters[key], newCluster) + return newCluster +} + +func (mn *MergeableNode) ProcessIfMergeable(rootToken *token.TokenList) bool { + if mn.rootToken.GetMergeabilityLevel(rootToken).IsMergeable() { + mn.rootToken = mn.rootToken.MergeWith(rootToken).(*token.TokenList) + return true + } + return false +} + +func (mn *MergeableNode) AddMessage(message string) { + mn.messages = append(mn.messages, message) + mn.count++ +} +``` + +### Step 5: Batch Consolidation (45 minutes) + +**File: `internal/clustering/consolidation.go`** + +```go +package clustering + +func (rtc *RealTimeClusterer) ConsolidateClusters() []*MergeableNode { + rtc.mutex.Lock() + defer rtc.mutex.Unlock() + + var consolidatedClusters []*MergeableNode + + // Group clusters by (tags, token_count) - same as Java + for _, clusters := range rtc.clusters { + consolidatedClusters = append(consolidatedClusters, + rtc.mergeClusters(clusters)...) + } + + return consolidatedClusters +} + +func (rtc *RealTimeClusterer) mergeClusters(clusters []*MergeableNode) []*MergeableNode { + var consolidatedClusters []*MergeableNode + + // Java-style consolidation algorithm + for len(clusters) > 0 { + cluster := clusters[len(clusters)-1] + clusters = clusters[:len(clusters)-1] + + var remainingClusters []*MergeableNode + for _, candidate := range clusters { + if cluster.MergeTokensIfFits(candidate) { + // Merge successful, candidate is absorbed + continue + } else if candidate.MergeTokensIfFits(cluster) { + // Candidate can absorb cluster, use candidate as base + cluster = candidate + continue + } else { + // No merge possible, keep candidate + remainingClusters = append(remainingClusters, candidate) + } + } + + consolidatedClusters = append(consolidatedClusters, cluster) + clusters = remainingClusters + } + + return consolidatedClusters +} + +func (mn *MergeableNode) MergeTokensIfFits(other *MergeableNode) bool { + if mn.rootToken.GetMergeabilityLevel(other.rootToken).IsMergeable() { + mn.rootToken = mn.rootToken.MergeWith(other.rootToken).(*token.TokenList) + mn.messages = append(mn.messages, other.messages...) + mn.count += other.count + return true + } + return false +} +``` + +### Step 6: Pattern Extractor Integration (30 minutes) + +**File: `pkg/patterns/patterns.go`** + +```go +package patterns + +import ( + "github.com/your-library/internal/clustering" + "github.com/your-library/internal/tokenization" +) + +type PatternExtractor struct { + tokenizer tokenization.Tokenizer + clusterer *clustering.RealTimeClusterer +} + +func NewPatternExtractor() *PatternExtractor { + return &PatternExtractor{ + tokenizer: tokenization.NewDefaultTokenizer(), + clusterer: clustering.NewRealTimeClusterer(), + } +} + +func (pe *PatternExtractor) ExtractPatterns(messages []string) ([]*Pattern, error) { + // Phase 1: Real-time processing + for _, message := range messages { + tokens, err := pe.tokenizer.Tokenize(message) + if err != nil { + return nil, err + } + + tokenList := token.NewTokenList(tokens) + pe.clusterer.ProcessDocument(message, tokenList, make(map[string]interface{})) + } + + // Phase 2: Batch consolidation + clusters := pe.clusterer.ConsolidateClusters() + + // Convert to patterns + patterns := make([]*Pattern, len(clusters)) + for i, cluster := range clusters { + patterns[i] = &Pattern{ + Template: cluster.rootToken.GetPatternString(), + Count: cluster.count, + Messages: cluster.messages, + } + } + + return patterns, nil +} + +type Pattern struct { + Template string + Count int + Messages []string +} +``` + +### Step 7: Basic Tokenization (30 minutes) + +**File: `internal/tokenization/tokenizer.go`** + +```go +package tokenization + +import ( + "strconv" + "strings" + "github.com/your-library/internal/token" +) + +type Tokenizer interface { + Tokenize(input string) ([]token.Token, error) +} + +type DefaultTokenizer struct{} + +func NewDefaultTokenizer() *DefaultTokenizer { + return &DefaultTokenizer{} +} + +func (dt *DefaultTokenizer) Tokenize(input string) ([]token.Token, error) { + var tokens []token.Token + + // Simple word-based tokenization + words := strings.Fields(input) + for _, word := range words { + if isNumeric(word) { + tokens = append(tokens, token.NewNumericValue(word, false)) + } else if hasNumericPattern(word) { + // Only words with numeric patterns can be wildcards + tokens = append(tokens, token.NewWord(word, true, false)) // possiblyWildcard=true + } else { + // Generic words are not mergeable + tokens = append(tokens, token.NewWord(word, false, false)) // possiblyWildcard=false + } + } + + return tokens, nil +} + +func hasNumericPattern(word string) bool { + // Check if word contains numbers (user123, session456, etc.) + return regexp.MustCompile(`\d`).MatchString(word) +} + +func isNumeric(s string) bool { + _, err := strconv.ParseFloat(s, 64) + return err == nil +} +``` + +## 🧪 Testing Implementation (30 minutes) + +**File: `internal/token/word_test.go`** + +```go +package token + +import ( + "testing" + "github.com/stretchr/testify/assert" +) + +func TestWordMergeability(t *testing.T) { + tests := []struct { + name string + token1 *Word + token2 *Word + expected MergeabilityLevel + }{ + { + name: "Same text, both wildcard", + token1: NewWord("GET", true, false), + token2: NewWord("GET", true, false), + expected: FITS_AS_IT_IS, + }, + { + name: "Different text, both wildcard", + token1: NewWord("GET", true, false), + token2: NewWord("POST", true, false), + expected: MERGEABLE_AS_WILDCARD, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.token1.GetMergeabilityLevel(tt.token2) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestPatternClustering(t *testing.T) { + extractor := NewPatternExtractor() + + logMessages := []string{ + "user123 logged in successfully", + "user456 logged in successfully", + "user789 logged in successfully", + } + + patterns, err := extractor.ExtractPatterns(logMessages) + require.NoError(t, err) + + // Should create one pattern with wildcards + assert.Len(t, patterns, 1) + assert.Equal(t, "user* logged in successfully", patterns[0].Template) + assert.Equal(t, 3, patterns[0].Count) +} +``` + +## šŸ“¦ Go Module Setup (5 minutes) + +**File: `go.mod`** + +```go +module github.com/your-org/your-patterns-library + +go 1.21 + +require ( + github.com/stretchr/testify v1.8.4 +) +``` + +## šŸŽÆ Key Differences from Go Approach + +### Go Approach (Current) +- **Similarity-based**: Uses Jaccard similarity with 50% threshold +- **Single-phase**: All processing happens in real-time +- **Continuous scoring**: Similarity values between 0.0 and 1.0 +- **Constant word similarity**: Additional check prevents merging very different patterns + +### Java Approach (Proposed) +- **Mergeability-based**: Uses discrete mergeability levels +- **Two-phase**: Real-time processing + batch consolidation +- **Binary decisions**: Either mergeable or not mergeable +- **Token-level rules**: Each token type defines its own mergeability logic +- **`possiblyWildcard` flag**: Enables wildcard creation for different word tokens + +### Why Java Approach Works Better + +1. **No Similarity Thresholds**: The `possiblyWildcard` flag eliminates the need for similarity calculations +2. **Batch Optimization**: Consolidation happens after all documents are processed, allowing better pattern discovery +3. **Predictable Behavior**: Discrete levels make the system more debuggable +4. **Performance**: O(1) token-level checks vs O(n²) similarity calculations +5. **Semantic Awareness**: Different token types have different mergeability rules + +## āœ… Benefits of This Implementation + +1. **Performance**: O(1) token-level checks vs O(n²) similarity calculations +2. **Predictability**: Discrete mergeability levels make behavior more predictable +3. **Type Safety**: Each token type defines its own mergeability rules +4. **Extensibility**: Easy to add new token types with custom mergeability +5. **Semantic Awareness**: Can distinguish between different types of content +6. **Backward Compatibility**: Can fall back to Go approach if needed +7. **Wildcard Creation**: The `possiblyWildcard` flag enables automatic wildcard creation during batch consolidation + +## šŸŽÆ Summary + +**Total Estimated Time: ~4 hours for complete implementation from scratch** + +This implementation provides a high-performance, predictable pattern merging system that scales well under load while maintaining semantic awareness of different token types. The discrete mergeability levels make the system more maintainable and debuggable compared to continuous similarity scoring. + +**The key insight is the `possiblyWildcard` flag that enables automatic wildcard creation during batch consolidation, eliminating the need for similarity thresholds.** \ No newline at end of file diff --git a/pkg/logs/patterns/token/signature.go b/pkg/logs/patterns/token/signature.go new file mode 100644 index 000000000000..d513de9f6d84 --- /dev/null +++ b/pkg/logs/patterns/token/signature.go @@ -0,0 +1,87 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "fmt" + "hash/fnv" + "strings" +) + +// Signature represents a structural signature of a TokenList +type Signature struct { + Position string + Length int + Hash uint64 +} + +// NewSignature creates a signature from a TokenList +func NewSignature(tl *TokenList) Signature { + if tl.IsEmpty() { + return Signature{ + Position: "", + Length: 0, + Hash: 0, + } + } + + position := positionSignature(tl) + hash := computeHash(position) + + return Signature{ + Position: position, + Length: len(tl.Tokens), + Hash: hash, + } +} + +// Equals checks if two signatures are identical +func (s *Signature) Equals(other Signature) bool { + return s.Position == other.Position && + s.Length == other.Length +} + +// computeHash generates a hash for the signature +func computeHash(input string) uint64 { + hash := fnv.New64a() + hash.Write([]byte(input)) + return hash.Sum64() +} + +// String returns a string representation of the signature +func (s *Signature) String() string { + return fmt.Sprintf("Sig{pos:%s, len:%d, hash:%x}", + s.Position, s.Length, s.Hash) +} + +// IsEmpty returns true if the signature represents an empty TokenList +func (s *Signature) IsEmpty() bool { + return s.Length == 0 +} + +// HasSameStructure checks if two signatures have the same positional structure +func (s *Signature) HasSameStructure(other Signature) bool { + return s.Position == other.Position && s.Length == other.Length +} + +// GetHashBucket returns the hash bucket for efficient clustering +func (s *Signature) GetHashBucket() uint64 { + return s.Hash +} + +// positionSignature generates position-based signature +func positionSignature(tl *TokenList) string { + if tl.IsEmpty() { + return "" + } + + var positionParts []string + for _, token := range tl.Tokens { + positionParts = append(positionParts, token.Type.String()) + } + return strings.Join(positionParts, "|") +} diff --git a/pkg/logs/patterns/token/signature_test.go b/pkg/logs/patterns/token/signature_test.go new file mode 100644 index 000000000000..cde682bdff73 --- /dev/null +++ b/pkg/logs/patterns/token/signature_test.go @@ -0,0 +1,229 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" +) + +func TestNewSignature(t *testing.T) { + // Empty TokenList + emptyTL := NewTokenList() + emptySig := NewSignature(emptyTL) + if emptySig.Position != "" || emptySig.Length != 0 || emptySig.Hash != 0 { + t.Error("Empty TokenList should have empty signature") + } + + // Non-empty TokenList + tokens := []Token{ + {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenHttpStatus, Value: "200"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + expectedPosition := "HttpMethod|Whitespace|AbsolutePath|Whitespace|HttpStatus" + if sig.Position != expectedPosition { + t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, sig.Position) + } + + if sig.Length != 5 { + t.Errorf("Expected length 5, got %d", sig.Length) + } + + if sig.Hash == 0 { + t.Error("Hash should not be 0 for non-empty TokenList") + } +} + +func TestSignature_Equals(t *testing.T) { + tokens1 := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tokens2 := []Token{ + {Type: TokenWord, Value: "goodbye"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tokens3 := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenNumeric, Value: "123"}, // Different type + } + + tl1 := NewTokenListWithTokens(tokens1) + tl2 := NewTokenListWithTokens(tokens2) + tl3 := NewTokenListWithTokens(tokens3) + + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + sig3 := NewSignature(tl3) + + // Same structure, different values - should be equal + if !sig1.Equals(sig2) { + t.Error("TokenLists with same structure should have equal signatures") + } + + // Different structure - should not be equal + if sig1.Equals(sig3) { + t.Error("TokenLists with different structure should not have equal signatures") + } + + // Test signature equality with itself + if !sig1.Equals(sig1) { + t.Error("Signature should equal itself") + } +} + +func TestSignature_String(t *testing.T) { + tokens := []Token{ + {Type: TokenWord, Value: "test"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + str := sig.String() + if str == "" { + t.Error("Signature string should not be empty") + } + + // Should contain key components + if !containsAll(str, []string{"pos:", "len:", "hash:"}) { + t.Errorf("Signature string should contain all components, got: %s", str) + } +} + +func TestSignature_IsEmpty(t *testing.T) { + // Empty signature + emptyTL := NewTokenList() + emptySig := NewSignature(emptyTL) + if !emptySig.IsEmpty() { + t.Error("Empty signature should return true for IsEmpty()") + } + + // Non-empty signature + tokens := []Token{{Type: TokenWord, Value: "test"}} + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + if sig.IsEmpty() { + t.Error("Non-empty signature should return false for IsEmpty()") + } +} + +func TestSignature_HasSameStructure(t *testing.T) { + // Same structure, different values + tokens1 := []Token{ + {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tokens2 := []Token{ + {Type: TokenHttpMethod, Value: "POST"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/users"}, + } + + tl1 := NewTokenListWithTokens(tokens1) + tl2 := NewTokenListWithTokens(tokens2) + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + + if !sig1.HasSameStructure(sig2) { + t.Error("Signatures with same structure should return true") + } + + // Different structure + tokens3 := []Token{ + {Type: TokenWord, Value: "different"}, + {Type: TokenNumeric, Value: "123"}, + } + tl3 := NewTokenListWithTokens(tokens3) + sig3 := NewSignature(tl3) + + if sig1.HasSameStructure(sig3) { + t.Error("Signatures with different structure should return false") + } +} + +func TestSignature_GetHashBucket(t *testing.T) { + tokens := []Token{ + {Type: TokenWord, Value: "test"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + hashBucket := sig.GetHashBucket() + if hashBucket != sig.Hash { + t.Error("GetHashBucket should return the signature hash") + } + if hashBucket == 0 { + t.Error("Hash bucket should not be 0 for non-empty signature") + } +} + +func TestComputeHash(t *testing.T) { + // Test that same input produces same hash + input1 := "test input" + input2 := "test input" + input3 := "different input" + + hash1 := computeHash(input1) + hash2 := computeHash(input2) + hash3 := computeHash(input3) + + if hash1 != hash2 { + t.Error("Same input should produce same hash") + } + if hash1 == hash3 { + t.Error("Different input should produce different hash (very likely)") + } + if hash1 == 0 { + t.Error("Hash should not be 0") + } +} + +func TestSignature_ConsistentHashing(t *testing.T) { + // Test that identical TokenLists produce identical signatures with same hash + tokens := []Token{ + {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + + tl1 := NewTokenListWithTokens(tokens) + tl2 := NewTokenListWithTokens(tokens) + + sig1 := NewSignature(tl1) + sig2 := NewSignature(tl2) + + if sig1.Hash != sig2.Hash { + t.Error("Identical TokenLists should produce identical signature hashes") + } + if !sig1.Equals(sig2) { + t.Error("Identical TokenLists should produce equal signatures") + } +} + +// Helper function to check if string contains all substrings +func containsAll(str string, substrings []string) bool { + for _, substr := range substrings { + found := false + for i := 0; i <= len(str)-len(substr); i++ { + if str[i:i+len(substr)] == substr { + found = true + break + } + } + if !found { + return false + } + } + return true +} diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go new file mode 100644 index 000000000000..37fc9fcd20fa --- /dev/null +++ b/pkg/logs/patterns/token/token.go @@ -0,0 +1,248 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "fmt" +) + +// TokenType represents the type of a token +type TokenType int + +const ( + // Basic token types + TokenUnknown TokenType = iota + TokenWord + TokenNumeric + TokenWhitespace + + // Network-related tokens + TokenIPv4 + TokenIPv6 + TokenEmail + TokenURI + TokenAbsolutePath + + // HTTP-related tokens + TokenHttpMethod + TokenHttpStatus + + // Log-related tokens + TokenSeverityLevel + TokenDate +) + +// MergeabilityLevel represents how two tokens can be merged +type MergeabilityLevel int + +const ( + Unmergeable MergeabilityLevel = iota + MergeableAsNewType + MergeableAsWildcard + MergeableWithWiderRange + Fits + FitsAsItIs +) + +// IsMergeable returns true if the mergeability level allows merging +func (m MergeabilityLevel) IsMergeable() bool { + return m > Unmergeable +} + +// Compare returns the comparison result with another mergeability level +func (m1 MergeabilityLevel) Compare(m2 MergeabilityLevel) int { + return int(m1) - int(m2) +} + +// DateComponents represents parsed components of a date token +type DateComponents struct { + Year string + Month string + Day string + Hour string + Minute string + Second string + Format string // Original format pattern +} + +// Token represents a single token in a log message +type Token struct { + Type TokenType + Value string + IsWildcard bool + PossiblyWildcard bool // Indicates if this token can merge into a wildcard during batch consolidation + + // Advanced token structure information + DateInfo *DateComponents // For TokenDate - parsed date components +} + +// NewToken creates a new token with the given type and value +func NewToken(tokenType TokenType, value string) Token { + return Token{ + Type: tokenType, + Value: value, + IsWildcard: false, + PossiblyWildcard: false, + } +} + +// NewTokenWithFlags creates a new token with explicit wildcard flags +func NewTokenWithFlags(tokenType TokenType, value string, isWildcard, possiblyWildcard bool) Token { + return Token{ + Type: tokenType, + Value: value, + IsWildcard: isWildcard, + PossiblyWildcard: possiblyWildcard, + } +} + +// NewWildcardToken creates a wildcard token of the given type +func NewWildcardToken(tokenType TokenType) Token { + return Token{ + Type: tokenType, + Value: "*", + IsWildcard: true, + PossiblyWildcard: true, + } +} + +// NewPossiblyWildcardToken creates a token that can potentially become a wildcard +func NewPossiblyWildcardToken(tokenType TokenType, value string) Token { + return Token{ + Type: tokenType, + Value: value, + IsWildcard: false, + PossiblyWildcard: true, + } +} + +// NewDateToken creates a date token with parsed components +func NewDateToken(value string, dateInfo *DateComponents) Token { + return Token{ + Type: TokenDate, + Value: value, + IsWildcard: false, + PossiblyWildcard: false, + DateInfo: dateInfo, + } +} + +// IsHTTP returns true if the token is HTTP-related +func (t *Token) IsHTTP() bool { + return t.Type == TokenHttpMethod || t.Type == TokenHttpStatus +} + +// IsNetwork returns true if the token is network-related +func (t *Token) IsNetwork() bool { + return t.Type == TokenIPv4 || t.Type == TokenIPv6 || t.Type == TokenEmail || t.Type == TokenURI +} + +// String returns the string representation of a TokenType +func (tt TokenType) String() string { + switch tt { + case TokenUnknown: + return "Unknown" + case TokenWord: + return "Word" + case TokenNumeric: + return "Numeric" + case TokenWhitespace: + return "Whitespace" + case TokenIPv4: + return "IPv4" + case TokenIPv6: + return "IPv6" + case TokenEmail: + return "Email" + case TokenURI: + return "URI" + case TokenAbsolutePath: + return "AbsolutePath" + case TokenHttpMethod: + return "HttpMethod" + case TokenHttpStatus: + return "HttpStatus" + case TokenSeverityLevel: + return "SeverityLevel" + case TokenDate: + return "Date" + default: + return fmt.Sprintf("TokenType(%d)", int(tt)) + } +} + +// String returns a string representation of the token +func (t *Token) String() string { + if t.IsWildcard { + return fmt.Sprintf("%s(*)", t.Type) + } + return fmt.Sprintf("%s(%s)", t.Type, t.Value) +} + +// GetMergeabilityLevel determines how this token can merge with another token +func (t1 *Token) GetMergeabilityLevel(t2 *Token) MergeabilityLevel { + // Same token type and value + if t1.Type == t2.Type && t1.Value == t2.Value { + return FitsAsItIs + } + + // Same token type but different values + if t1.Type == t2.Type { + // Special handling for structured date tokens + if t1.Type == TokenDate && t1.DateInfo != nil && t2.DateInfo != nil { + return getDateMergeabilityLevel(t1.DateInfo, t2.DateInfo) + } + + // For Word tokens, only merge if both have possiblyWildcard flag + // This prevents generic words like "bob" and "cat" from merging + if t1.Type == TokenWord { + if t1.PossiblyWildcard && t2.PossiblyWildcard { + return MergeableAsWildcard + } + // Generic words without numeric patterns don't merge + return Unmergeable + } + + // For non-Word tokens (HttpMethod, HttpStatus, AbsolutePath, Numeric, etc.) + // they are mergeable by default since they represent structured data + // e.g., "GET" vs "POST", "/api" vs "/users", "200" vs "404" + return MergeableAsWildcard + } + + // Different token types + return Unmergeable +} + +// getDateMergeabilityLevel determines how two date tokens can merge based on their structure +func getDateMergeabilityLevel(d1, d2 *DateComponents) MergeabilityLevel { + // Must have same format to be mergeable - different formats = different log sources + if d1.Format != d2.Format { + return Unmergeable + } + + // Simple rule: Only merge if same date, different time (same log source over time) + // Everything else is likely different log sources and shouldn't merge + sameDate := d1.Year == d2.Year && d1.Month == d2.Month && d1.Day == d2.Day + sameTime := d1.Hour == d2.Hour && d1.Minute == d2.Minute && d1.Second == d2.Second + + if sameDate && sameTime { + return FitsAsItIs + } + + if sameDate && !sameTime { + // Same date, different time = same log source at different times + return MergeableWithWiderRange + } + + // Different dates = different log sources/periods = don't merge + return Unmergeable +} + +// NOTE: MergeWith() and createPartialDateWildcard() have been moved to the +// clustering/merging package. Token now only provides data comparison via +// GetMergeabilityLevel(), while merge execution is handled as business logic +// in the merging package. diff --git a/pkg/logs/patterns/token/tokenlist.go b/pkg/logs/patterns/token/tokenlist.go new file mode 100644 index 000000000000..7312a8e0cb5c --- /dev/null +++ b/pkg/logs/patterns/token/tokenlist.go @@ -0,0 +1,69 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package token provides data structures and utilities for tokenizing log messages. +package token + +import ( + "strings" +) + +// TokenList represents a sequence of tokens +type TokenList struct { + Tokens []Token +} + +// NewTokenList creates a new empty TokenList +func NewTokenList() *TokenList { + return &TokenList{Tokens: make([]Token, 0)} +} + +// NewTokenListWithTokens creates a new TokenList with the provided tokens +func NewTokenListWithTokens(tokens []Token) *TokenList { + return &TokenList{Tokens: tokens} +} + +// Add appends one or more tokens to the list +func (tl *TokenList) Add(tokens ...Token) { + tl.Tokens = append(tl.Tokens, tokens...) +} + +// AddToken creates and adds a new token with the given type and value +func (tl *TokenList) AddToken(tokenType TokenType, value string) { + tl.Tokens = append(tl.Tokens, NewToken(tokenType, value)) +} + +// AddWildcardToken creates and adds a wildcard token of the given type +func (tl *TokenList) AddWildcardToken(tokenType TokenType) { + tl.Tokens = append(tl.Tokens, NewWildcardToken(tokenType)) +} + +// AddPossiblyWildcardToken creates and adds a token that can potentially become a wildcard +func (tl *TokenList) AddPossiblyWildcardToken(tokenType TokenType, value string) { + tl.Tokens = append(tl.Tokens, NewPossiblyWildcardToken(tokenType, value)) +} + +// Length returns the number of tokens +func (tl *TokenList) Length() int { + return len(tl.Tokens) +} + +// IsEmpty returns true if the list is empty +func (tl *TokenList) IsEmpty() bool { + return len(tl.Tokens) == 0 +} + +// String returns a string representation +func (tl *TokenList) String() string { + if tl.IsEmpty() { + return "[]" + } + + var parts []string + for _, token := range tl.Tokens { + parts = append(parts, token.String()) + } + return "[" + strings.Join(parts, ", ") + "]" +} diff --git a/pkg/logs/patterns/token/tokenlist_test.go b/pkg/logs/patterns/token/tokenlist_test.go new file mode 100644 index 000000000000..06063897a724 --- /dev/null +++ b/pkg/logs/patterns/token/tokenlist_test.go @@ -0,0 +1,129 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" +) + +func TestTokenList_NewTokenList(t *testing.T) { + // Empty token list + tl := NewTokenList() + if tl == nil { + t.Fatal("NewTokenList should not return nil") + } + if !tl.IsEmpty() { + t.Error("New TokenList should be empty") + } + if tl.Length() != 0 { + t.Errorf("New TokenList should have length 0, got %d", tl.Length()) + } + + // Token list with initial tokens + tokens := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, + } + tl2 := NewTokenListWithTokens(tokens) + if tl2.Length() != 3 { + t.Errorf("Expected length 3, got %d", tl2.Length()) + } + if tl2.IsEmpty() { + t.Error("TokenList with tokens should not be empty") + } +} + +func TestTokenList_Add(t *testing.T) { + tl := NewTokenList() + + token1 := Token{Type: TokenWord, Value: "hello"} + tl.Add(token1) + + if tl.Length() != 1 { + t.Errorf("Expected length 1, got %d", tl.Length()) + } + if tl.IsEmpty() { + t.Error("TokenList should not be empty after adding token") + } + if tl.Tokens[0].Value != "hello" { + t.Errorf("Expected token value 'hello', got '%s'", tl.Tokens[0].Value) + } +} + +func TestTokenList_String(t *testing.T) { + // Empty list + tl := NewTokenList() + if tl.String() != "[]" { + t.Errorf("Empty TokenList string should be '[]', got '%s'", tl.String()) + } + + // Non-empty list + tl.Add(Token{Type: TokenWord, Value: "hello"}) + tl.Add(Token{Type: TokenWhitespace, Value: " "}) + tl.Add(Token{Type: TokenWord, Value: "world"}) + + expected := "[Word(hello), Whitespace( ), Word(world)]" + if tl.String() != expected { + t.Errorf("Expected '%s', got '%s'", expected, tl.String()) + } +} + +func TestTokenList_PositionSignature(t *testing.T) { + // Empty token list + emptyTL := NewTokenList() + if positionSignature(emptyTL) != "" { + t.Error("Empty TokenList should have empty position signature") + } + + // Non-empty token list + tokens := []Token{ + {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tl := NewTokenListWithTokens(tokens) + + expectedPosition := "HttpMethod|Whitespace|AbsolutePath" + if positionSignature(tl) != expectedPosition { + t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, positionSignature(tl)) + } +} + +func TestTokenList_Signature(t *testing.T) { + // Test that TokenList.Signature() creates a proper signature + tokens := []Token{ + {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenWhitespace, Value: " "}, + {Type: TokenAbsolutePath, Value: "/api"}, + } + tl := NewTokenListWithTokens(tokens) + sig := NewSignature(tl) + + if sig.Length != 3 { + t.Errorf("Expected signature length 3, got %d", sig.Length) + } + if sig.Hash == 0 { + t.Error("Signature hash should not be 0") + } + if sig.Position == "" { + t.Error("Signature position should not be empty") + } +} + +// Helper function to check if string contains substring +func containsSubstring(str, substr string) bool { + return len(str) >= len(substr) && findSubstring(str, substr) >= 0 +} + +func findSubstring(str, substr string) int { + for i := 0; i <= len(str)-len(substr); i++ { + if str[i:i+len(substr)] == substr { + return i + } + } + return -1 +} diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go index 499209b0d313..d0d4d53df093 100644 --- a/pkg/logs/pipeline/pipeline.go +++ b/pkg/logs/pipeline/pipeline.go @@ -21,6 +21,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/processor" "github.com/DataDog/datadog-agent/pkg/logs/sender" compressioncommon "github.com/DataDog/datadog-agent/pkg/util/compression" + "github.com/DataDog/datadog-agent/pkg/util/log" ) // Pipeline processes and sends messages to the backend @@ -105,25 +106,25 @@ func getStrategy( compressor logscompression.Component, instanceID string, ) sender.Strategy { - if endpoints.UseHTTP || serverlessMeta.IsEnabled() { + // Use DumbStrategy for pattern extraction when UseProto is enabled + if endpoints.UseProto { + log.Infof("Pipeline: Using DumbStrategy for pattern extraction (UseProto=true)") var encoder compressioncommon.Compressor encoder = compressor.NewCompressor(compressioncommon.NoneKind, 0) if endpoints.Main.UseCompression { encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) } - - return sender.NewBatchStrategy( - inputChan, - outputChan, - flushChan, - serverlessMeta, - endpoints.BatchWait, - endpoints.BatchMaxSize, - endpoints.BatchMaxContentSize, - "logs", - encoder, - pipelineMonitor, - instanceID) + return sender.NewDumbStrategy(inputChan, outputChan, flushChan, sender.NewArraySerializer(), endpoints.BatchMaxContentSize, "logs", encoder) + } else if endpoints.UseHTTP || endpoints.UseGRPC || serverlessMeta.IsEnabled() { + log.Infof("Pipeline: Using BatchStrategy (UseHTTP=%v, UseGRPC=%v, Serverless=%v)", endpoints.UseHTTP, endpoints.UseGRPC, serverlessMeta.IsEnabled()) + var encoder compressioncommon.Compressor + encoder = compressor.NewCompressor(compressioncommon.NoneKind, 0) + if endpoints.Main.UseCompression { + encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) + } + return sender.NewBatchStrategy(inputChan, outputChan, flushChan, serverlessMeta, sender.NewArraySerializer(), endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID) } + + log.Infof("Pipeline: Using StreamStrategy (default)") return sender.NewStreamStrategy(inputChan, outputChan, compressor.NewCompressor(compressioncommon.NoneKind, 0)) } diff --git a/pkg/logs/pipeline/provider.go b/pkg/logs/pipeline/provider.go index 9737f8a5c007..353bc48d24b5 100644 --- a/pkg/logs/pipeline/provider.go +++ b/pkg/logs/pipeline/provider.go @@ -22,6 +22,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/sender" + grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc" httpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/http" tcpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/tcp" "github.com/DataDog/datadog-agent/pkg/logs/status/statusinterface" @@ -88,7 +89,9 @@ func NewProvider( var senderImpl sender.PipelineComponent serverlessMeta := sender.NewServerlessMeta(serverless) - if endpoints.UseHTTP { + if endpoints.UseGRPC { + senderImpl = grpcsender.NewGRPCSender(cfg, sink, endpoints, destinationsContext, metrics.NewTelemetryPipelineMonitor()) + } else if endpoints.UseHTTP { senderImpl = httpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, serverlessMeta, legacyMode) } else { senderImpl = tcpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, status, serverlessMeta, legacyMode) diff --git a/pkg/logs/sender/dumb_strategy.go b/pkg/logs/sender/dumb_strategy.go new file mode 100644 index 000000000000..5c3c89a9a36f --- /dev/null +++ b/pkg/logs/sender/dumb_strategy.go @@ -0,0 +1,274 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package sender provides log message sending functionality +package sender + +import ( + "bytes" + "encoding/json" + "unsafe" + + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" + "github.com/DataDog/datadog-agent/pkg/util/compression" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// dumbStrategy is a minimal batching strategy that forwards one message per payload. +type dumbStrategy struct { + inputChan chan *message.Message + clusterManager *clustering.ClusterManager + outputChan chan *message.Payload + flushChan chan struct{} + serializer Serializer + compression compression.Compressor + pipelineName string + + maxContentSize int + + stopChan chan struct{} + buffer []*message.Message +} + +// Simple pattern payload for POC - just the essential fields +type PatternPayload struct { + PatternID uint64 `json:"pattern_id"` + Pattern string `json:"pattern"` + ParamCount int `json:"param_count"` + WildcardPos []int `json:"wildcard_positions"` + // OriginalMsg string `json:"original_message"` // For debugging and double checking if pattern is correct base on the original message. Might remove it after POC. Protobuf might not be happy with this. +} + +// NewDumbStrategy returns a strategy that sends one message per payload using the +// provided serializer and compressor. Messages larger than maxContentSize are +// dropped to mimic batch strategy behaviour. +func NewDumbStrategy( + inputChan chan *message.Message, + outputChan chan *message.Payload, + flushChan chan struct{}, + serializer Serializer, + maxContentSize int, + pipelineName string, + compression compression.Compressor, +) Strategy { + return &dumbStrategy{ + inputChan: inputChan, + outputChan: outputChan, + flushChan: flushChan, + serializer: serializer, + compression: compression, + pipelineName: pipelineName, + maxContentSize: maxContentSize, + clusterManager: clustering.NewClusterManager(), + stopChan: make(chan struct{}), + buffer: make([]*message.Message, 0, 1), + } +} + +// Start begins processing messages from the input channel. +func (s *dumbStrategy) Start() { + go func() { + defer close(s.stopChan) + for { + select { + case msg, ok := <-s.inputChan: + if !ok { + s.flushBuffer() + return + } + s.bufferMessage(msg) + s.flushBuffer() + case <-s.flushChan: + s.flushBuffer() + } + } + }() +} + +// Stop stops the strategy and waits for the processing goroutine to exit. +func (s *dumbStrategy) Stop() { + close(s.inputChan) + <-s.stopChan +} + +func (s *dumbStrategy) bufferMessage(m *message.Message) { + if m == nil { + return + } + + if s.maxContentSize > 0 && len(m.GetContent()) > s.maxContentSize { + log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, len(m.GetContent()), s.maxContentSize) + tlmDroppedTooLarge.Inc(s.pipelineName) + return + } + + s.buffer = append(s.buffer, m) +} + +func (s *dumbStrategy) flushBuffer() { + if len(s.buffer) > 0 { + s.processMessage(s.buffer[0]) + s.buffer = s.buffer[:0] + } +} + +func (s *dumbStrategy) processMessage(m *message.Message) { + content := m.GetContent() + if len(content) == 0 { + return + } + + // Simple pattern extraction for POC + tokenList := automaton.TokenizeString(bytesToString(content)) + if tokenList != nil && !tokenList.IsEmpty() { + if cluster := s.clusterManager.Add(tokenList); cluster != nil { + cluster.GeneratePattern() + + // Build simple pattern payload + payload, err := s.buildSimplePatternPayload(m, cluster) + if err != nil { + log.Warn("Failed to build payload", err) + return + } + + s.outputChan <- payload + } + } +} + +// Simple pattern payload builder for POC +func (s *dumbStrategy) buildSimplePatternPayload(m *message.Message, cluster *clustering.Cluster) (*message.Payload, error) { + patternPayload := PatternPayload{ + PatternID: cluster.GetPatternID(), + Pattern: cluster.GetPatternString(), + ParamCount: len(cluster.GetWildcardPositions()), + WildcardPos: cluster.GetWildcardPositions(), + // OriginalMsg: bytesToString(m.GetContent()), // Keep for POC debugging + } + + // Use existing serialization with compression - intake handles decompression + return s.serializePayload(patternPayload, m) +} + +// ========== COMMENTED OUT COMPLEX LOGIC FOR POC ========== +/* +func (s *dumbStrategy) buildPayload(m *message.Message) (*message.Payload, error) { + if s.cluster != nil && s.cluster.NeedsSending() { + // Pattern needs to be sent (new or updated) + if s.cluster.IsNewPattern() { + return s.buildPatternCreationPayload(m) + } else if s.cluster.WasUpdatedSinceLastSent() { + return s.buildPatternUpdatePayload(m) + } + } else if s.cluster != nil { + // Pattern already sent, just send wildcards + return s.buildWildcardPayload(m) + } + + // No pattern, send raw message (fallback) + return s.buildRawPayload(m) +} + +func (s *dumbStrategy) buildPatternCreationPayload(m *message.Message) (*message.Payload, error) { + patternPayload := PatternPayload{ + StateChange: "pattern_create", + PatternID: s.cluster.GetPatternID(), + Pattern: s.cluster.GetPatternString(), + ParamCount: len(s.cluster.GetWildcardPositions()), + WildcardPos: s.cluster.GetWildcardPositions(), + OriginalMsg: bytesToString(m.GetContent()), + } + + s.cluster.MarkAsSent() + return s.serializePayload(patternPayload, m) +} + +func (s *dumbStrategy) buildPatternUpdatePayload(m *message.Message) (*message.Payload, error) { + patternPayload := PatternPayload{ + StateChange: "pattern_update", + PatternID: s.cluster.GetPatternID(), + Pattern: s.cluster.GetPatternString(), + ParamCount: len(s.cluster.GetWildcardPositions()), + WildcardPos: s.cluster.GetWildcardPositions(), + } + + s.cluster.MarkAsSent() + return s.serializePayload(patternPayload, m) +} + +func (s *dumbStrategy) buildWildcardPayload(m *message.Message) (*message.Payload, error) { + // Extract wildcard values from the current message + tokenList := automaton.TokenizeString(bytesToString(m.GetContent())) + var wildcardValues []string + if tokenList != nil { + wildcardValues = s.cluster.ExtractWildcardValues(tokenList) + } + + patternPayload := struct { + PatternID uint64 `json:"pattern_id"` + DynamicValues []string `json:"dynamic_values"` + }{ + PatternID: s.cluster.GetPatternID(), + DynamicValues: wildcardValues, + } + + return s.serializePayload(patternPayload, m) +} + +func (s *dumbStrategy) buildRawPayload(m *message.Message) (*message.Payload, error) { + rawPayload := struct { + Message string `json:"raw_message"` + }{ + Message: bytesToString(m.GetContent()), + } + + return s.serializePayload(rawPayload, m) +} +*/ + +func (s *dumbStrategy) serializePayload(payload interface{}, m *message.Message) (*message.Payload, error) { + s.serializer.Reset() + + patternBytes, err := json.Marshal(payload) + if err != nil { + return nil, err + } + + // Compress the JSON data directly + var encodedPayload bytes.Buffer + compressor := s.compression.NewStreamCompressor(&encodedPayload) + + if _, err := compressor.Write(patternBytes); err != nil { + compressor.Close() + return nil, err + } + + if err := compressor.Close(); err != nil { + return nil, err + } + + // Potentially seed some log payload instead here + + // Create payload with original message metadata + metaCopy := m.MessageMetadata + // Add pattern indicator to processing tags instead of encoding + metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern") + + return message.NewPayload( + []*message.MessageMetadata{&metaCopy}, // original message metadata with pattern tag + encodedPayload.Bytes(), // compressed pattern payload (sent as-is like HTTP/TCP) + s.compression.ContentEncoding(), // regular "gzip" or "zstd" + len(patternBytes), // uncompressed pattern JSON size + ), nil +} + +func bytesToString(b []byte) string { + if len(b) == 0 { + return "" + } + return unsafe.String(&b[0], len(b)) +} diff --git a/pkg/logs/sender/grpc/grpc_sender.go b/pkg/logs/sender/grpc/grpc_sender.go new file mode 100644 index 000000000000..e2d1dd7400e8 --- /dev/null +++ b/pkg/logs/sender/grpc/grpc_sender.go @@ -0,0 +1,286 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package grpc implements gRPC-based log sender +package grpc + +import ( + "context" + "crypto/tls" + "fmt" + "time" + "unsafe" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/version" + + "go.uber.org/atomic" +) + +// headerCredentials implements credentials.PerRPCCredentials to add headers to RPC calls +type headerCredentials struct { + endpoint config.Endpoint +} + +// GetRequestMetadata adds required headers to each RPC call +func (h *headerCredentials) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) { + headers := map[string]string{ + "dd-api-key": h.endpoint.GetAPIKey(), + } + + // Add protocol header if specified + if h.endpoint.Protocol != "" { + headers["dd-protocol"] = string(h.endpoint.Protocol) + } + + // Add origin headers if specified + if h.endpoint.Origin != "" { + headers["dd-evp-origin"] = string(h.endpoint.Origin) + headers["dd-evp-origin-version"] = version.AgentVersion + } + + return headers, nil +} + +// RequireTransportSecurity indicates whether the credentials require transport security +func (h *headerCredentials) RequireTransportSecurity() bool { + return false // We handle TLS separately via WithTransportCredentials +} + +// GRPCSender implements PipelineComponent interface for gRPC log transmission. +// It manages multiple StreamWorker instances (one per pipeline) using round-robin distribution. +// It is similar to Sender/Worker architecture +type GRPCSender struct { + // Configuration + endpoint config.Endpoint + destinationsContext *client.DestinationsContext + cfg pkgconfigmodel.Reader + numberOfWorkers int + + // Pipeline integration + pipelineMonitor metrics.PipelineMonitor + + // Stream management (similar to Sender's workers and queues) + workers []*StreamWorker + queues []chan *message.Payload + idx *atomic.Uint32 + + // Auditor integration + sink sender.Sink + + // Stream rotation signaling - maps input channels to StreamWorker instances for 1:1 worker mapping + channelToWorkerMap map[chan *message.Payload]*StreamWorker + + // Global batch ID counter shared by all workers to ensure uniqueness + globalBatchIDCounter *atomic.Uint32 + + // gRPC connection management (shared across all streams) + conn *grpc.ClientConn + client StatefulLogsServiceClient +} + +// NewGRPCSender creates a new gRPC sender that implements PipelineComponent +// numberOfPipelines determines how many StreamWorker to create (same as number of pipelines) +func NewGRPCSender( + cfg pkgconfigmodel.Reader, + sink sender.Sink, + endpoints *config.Endpoints, + destinationsCtx *client.DestinationsContext, + pipelineMonitor metrics.PipelineMonitor, +) *GRPCSender { + + // For now, use the first reliable endpoint + // TODO: Support multiple endpoints with failover + var endpoint config.Endpoint + if len(endpoints.GetReliableEndpoints()) > 0 { + endpoint = endpoints.GetReliableEndpoints()[0] + } else { + log.Error("No reliable gRPC endpoints configured") + return nil + } + + // Get number of pipelines from config (same pattern as other senders) + numberOfWorkers := cfg.GetInt("logs_config.pipelines") + if numberOfWorkers <= 0 { + numberOfWorkers = 1 // Default to 1 // TODO: probably not good + } + + // Get stream lifetime from config + streamLifetime := config.StreamLifetime(cfg) + + sender := &GRPCSender{ + endpoint: endpoint, + destinationsContext: destinationsCtx, + cfg: cfg, + numberOfWorkers: numberOfWorkers, + pipelineMonitor: pipelineMonitor, + workers: make([]*StreamWorker, 0, numberOfWorkers), + queues: make([]chan *message.Payload, numberOfWorkers), + idx: &atomic.Uint32{}, + sink: sink, + channelToWorkerMap: make(map[chan *message.Payload]*StreamWorker), + globalBatchIDCounter: &atomic.Uint32{}, + } + + // Note: outputChan will be set in each StreamWorker's Start() method when sink.Channel() is available + + // Create gRPC connection (shared by all streams inside StreamWorkers) + if err := sender.createConnection(); err != nil { + log.Errorf("Failed to create gRPC connection: %v", err) + return nil + } + + // Create multiple StreamWorker instances (like Sender creates Workers) + for i := 0; i < numberOfWorkers; i++ { + workerID := fmt.Sprintf("worker-%d", i) + + // Create input queue for this worker (like Sender creates queues) + sender.queues[i] = make(chan *message.Payload, 100) + + // Create StreamWorker instance + worker := NewStreamWorker( + workerID, + destinationsCtx, + sender.client, + sender.sink, // Pass sink, outputChan will be set in Start() + streamLifetime, + sender.globalBatchIDCounter, // Pass shared counter for globally unique batch IDs + ) + + // Override the worker's input channel to use our queue + worker.inputChan = sender.queues[i] + + // Map input channel to worker for 1:1 worker-processor connection + sender.channelToWorkerMap[sender.queues[i]] = worker + + sender.workers = append(sender.workers, worker) + } + + log.Infof("Created gRPC sender with %d streams for endpoint %s:%d", + numberOfWorkers, endpoint.Host, endpoint.Port) + return sender +} + +// createConnection establishes the shared gRPC connection +func (s *GRPCSender) createConnection() error { + log.Infof("Creating gRPC connection to %s:%d", s.endpoint.Host, s.endpoint.Port) + + // Build connection options + var opts []grpc.DialOption + + // Configure TLS + if s.endpoint.UseSSL() { + tlsConfig := &tls.Config{ + ServerName: s.endpoint.Host, + } + opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))) + } else { + opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + + // Configure keepalive + keepaliveParams := keepalive.ClientParameters{ + Time: 30 * time.Second, + Timeout: 5 * time.Second, + PermitWithoutStream: true, + } + opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) + + // Add user agent + userAgent := fmt.Sprintf("datadog-agent/%s", version.AgentVersion) + opts = append(opts, grpc.WithUserAgent(userAgent)) + + // Add headers via per-RPC credentials + headerCreds := &headerCredentials{endpoint: s.endpoint} + opts = append(opts, grpc.WithPerRPCCredentials(headerCreds)) + + // Create connection + address := fmt.Sprintf("%s:%d", s.endpoint.Host, s.endpoint.Port) + conn, err := grpc.NewClient(address, opts...) + if err != nil { + return fmt.Errorf("failed to create gRPC connection: %w", err) + } + + s.conn = conn + s.client = NewStatefulLogsServiceClient(conn) + + log.Infof("Successfully created gRPC connection to %s", address) + return nil +} + +// PipelineComponent interface implementation + +// In returns the input channel using round-robin distribution (same as Sender.In()) +func (s *GRPCSender) In() chan *message.Payload { + idx := s.idx.Inc() % uint32(len(s.queues)) + return s.queues[idx] +} + +// PipelineMonitor returns the pipeline monitor +func (s *GRPCSender) PipelineMonitor() metrics.PipelineMonitor { + return s.pipelineMonitor +} + +// GetSignalChannelForInputChannel returns the stream rotation signal channel for the worker +// that owns the given input channel. This enables 1:1 mapping between processors and workers. +// This is ugly and temporary, until we have a proper way to link worker's signal channel to +// the processor. +func (s *GRPCSender) GetSignalChannelForInputChannel(inputChan chan *message.Payload) chan any { + // Find the worker that owns this input channel + worker := s.channelToWorkerMap[inputChan] + if worker == nil { + return nil + } + + // Convert the typed channel to chan any using unsafe conversion + // This is safe because both channels have the same underlying type + return *(*chan any)(unsafe.Pointer(&worker.signalStreamRotate)) +} + +// Start starts all StreamWorker instances (same pattern as Sender.Start()) +func (s *GRPCSender) Start() { + log.Infof("Starting gRPC sender with %d workers", len(s.workers)) + + for _, worker := range s.workers { + worker.Start() + } + + log.Info("All StreamWorkers started") +} + +// Stop stops all StreamWorker instances and closes the connection +func (s *GRPCSender) Stop() { + log.Info("Stopping gRPC sender") + + // Stop all workers (same pattern as Sender.Stop()) + for _, worker := range s.workers { + worker.Stop() + } + + // Close all queues + for _, queue := range s.queues { + close(queue) + } + + // Close the shared connection + if s.conn != nil { + if err := s.conn.Close(); err != nil { + log.Warnf("Error closing gRPC connection: %v", err) + } + } + + log.Info("gRPC sender stopped") +} diff --git a/pkg/logs/sender/grpc/grpc_sender_test.go b/pkg/logs/sender/grpc/grpc_sender_test.go new file mode 100644 index 000000000000..afd044febef1 --- /dev/null +++ b/pkg/logs/sender/grpc/grpc_sender_test.go @@ -0,0 +1,642 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2025-present Datadog, Inc. + +package grpc + +import ( + "fmt" + "net" + "strconv" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + configmock "github.com/DataDog/datadog-agent/pkg/config/mock" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" +) + +// MockGRPCServer that implements StatefulLogsServiceServer +type MockGRPCServer struct { + UnimplementedStatefulLogsServiceServer + + // Control behavior + shouldFailSend bool + shouldFailRecv bool + shouldDisconnect bool + responseDelay time.Duration + batchResponses map[int32]BatchStatus_Status + mu sync.RWMutex + + // Track what was received + receivedBatches []*StatefulBatch + activeStreams []StatefulLogsService_LogsStreamServer + streamsMu sync.RWMutex +} + +func NewMockGRPCServer() *MockGRPCServer { + return &MockGRPCServer{ + batchResponses: make(map[int32]BatchStatus_Status), + receivedBatches: make([]*StatefulBatch, 0), + activeStreams: make([]StatefulLogsService_LogsStreamServer, 0), + } +} + +func (s *MockGRPCServer) LogsStream(stream StatefulLogsService_LogsStreamServer) error { + s.streamsMu.Lock() + s.activeStreams = append(s.activeStreams, stream) + streamIndex := len(s.activeStreams) - 1 + s.streamsMu.Unlock() + + defer func() { + s.streamsMu.Lock() + if streamIndex < len(s.activeStreams) { + s.activeStreams = append(s.activeStreams[:streamIndex], s.activeStreams[streamIndex+1:]...) + } + s.streamsMu.Unlock() + }() + + for { + // Receive batch from client first + batch, err := stream.Recv() + if err != nil { + return err + } + + s.mu.RLock() + shouldFail := s.shouldFailRecv + shouldDisconnect := s.shouldDisconnect + delay := s.responseDelay + s.mu.RUnlock() + + // Store the received batch (so tests can verify it was received) + s.mu.Lock() + s.receivedBatches = append(s.receivedBatches, batch) + + // Determine response status + responseStatus := BatchStatus_OK + if status, exists := s.batchResponses[int32(batch.BatchId)]; exists { + responseStatus = status + } + s.mu.Unlock() + + // Check for failures AFTER receiving but BEFORE responding + if shouldDisconnect { + // Disconnect after receiving batch but before sending response + // This simulates server dying mid-processing + return status.Error(codes.Unavailable, "server disconnected") + } + + if shouldFail { + // Fail after receiving batch but before sending response + return status.Error(codes.Internal, "simulated recv failure") + } + + // Add delay if configured + if delay > 0 { + time.Sleep(delay) + } + + // Send response back + response := &BatchStatus{ + BatchId: int32(batch.BatchId), + Status: responseStatus, + } + + if err := stream.Send(response); err != nil { + return err + } + } +} + +// Control methods for testing +func (s *MockGRPCServer) SetShouldFailSend(fail bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.shouldFailSend = fail +} + +func (s *MockGRPCServer) SetShouldFailRecv(fail bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.shouldFailRecv = fail +} + +func (s *MockGRPCServer) SetShouldDisconnect(disconnect bool) { + s.mu.Lock() + defer s.mu.Unlock() + s.shouldDisconnect = disconnect +} + +func (s *MockGRPCServer) SetResponseDelay(delay time.Duration) { + s.mu.Lock() + defer s.mu.Unlock() + s.responseDelay = delay +} + +func (s *MockGRPCServer) SetBatchResponse(batchID int32, status BatchStatus_Status) { + s.mu.Lock() + defer s.mu.Unlock() + s.batchResponses[batchID] = status +} + +func (s *MockGRPCServer) GetReceivedBatches() []*StatefulBatch { + s.mu.RLock() + defer s.mu.RUnlock() + result := make([]*StatefulBatch, len(s.receivedBatches)) + copy(result, s.receivedBatches) + return result +} + +func (s *MockGRPCServer) ClearReceivedBatches() { + s.mu.Lock() + defer s.mu.Unlock() + s.receivedBatches = s.receivedBatches[:0] +} + +func (s *MockGRPCServer) DisconnectAllStreams() { + s.streamsMu.Lock() + defer s.streamsMu.Unlock() + s.shouldDisconnect = true +} + +// Test helper to start mock gRPC server +func startMockGRPCServer(t *testing.T) (*MockGRPCServer, string, func()) { + listener, err := net.Listen("tcp", "localhost:0") + require.NoError(t, err) + + mockServer := NewMockGRPCServer() + grpcServer := grpc.NewServer() + RegisterStatefulLogsServiceServer(grpcServer, mockServer) + + go func() { + if err := grpcServer.Serve(listener); err != nil { + t.Logf("gRPC server error: %v", err) + } + }() + + address := listener.Addr().String() + + cleanup := func() { + grpcServer.Stop() + listener.Close() + } + + // Server is ready immediately after starting + + return mockServer, address, cleanup +} + +// MockSink for testing +type MockSink struct { + outputChan chan *message.Payload +} + +func (s *MockSink) Channel() chan *message.Payload { + return s.outputChan +} + +// Helper to create GRPCSender with mock server +func createTestGRPCSender(t *testing.T, address string) (*GRPCSender, *MockSink) { + cfg := configmock.New(t) + cfg.SetWithoutSource("logs_config.batch_wait", 100) // Short batch wait for testing + cfg.SetWithoutSource("logs_config.pipelines", 1) // Single pipeline + + // Parse host and port from address (e.g., "127.0.0.1:53662") + host, portStr, err := net.SplitHostPort(address) + require.NoError(t, err) + port, err := strconv.Atoi(portStr) + require.NoError(t, err) + + // Create endpoint using the constructor pattern from existing tests + endpoint := config.NewMockEndpointWithOptions(map[string]interface{}{ + "host": host, + "port": port, + "is_reliable": true, + "use_grpc": true, + "use_ssl": false, + }) + + endpoints := &config.Endpoints{ + UseGRPC: true, + Main: endpoint, + Endpoints: []config.Endpoint{endpoint}, + } + + sink := &MockSink{outputChan: make(chan *message.Payload, 100)} + destinationsCtx := client.NewDestinationsContext() + destinationsCtx.Start() + t.Cleanup(func() { destinationsCtx.Stop() }) + + pipelineMonitor := metrics.NewNoopPipelineMonitor("test") + + sender := NewGRPCSender(cfg, sink, endpoints, destinationsCtx, pipelineMonitor) + require.NotNil(t, sender) + + return sender, sink +} + +// Test end-to-end payload flow through GRPCSender +func TestGRPCSenderEndToEndFlow(t *testing.T) { + mockServer, address, cleanup := startMockGRPCServer(t) + defer cleanup() + + sender, sink := createTestGRPCSender(t, address) + + sender.Start() + defer sender.Stop() + + // Create test payload + msg := message.NewMessage([]byte("test message"), nil, "", 0) + payload := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, + Encoded: []byte("test message"), + Encoding: "identity", + UnencodedSize: 12, + IsSnapshot: false, + } + + // Send payload through GRPCSender input channel + inputChan := sender.In() + select { + case inputChan <- payload: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send payload to GRPCSender") + } + + // Wait for server to actually receive the batch (event-driven, not time-based) + require.Eventually(t, func() bool { + batches := mockServer.GetReceivedBatches() + return len(batches) >= 1 + }, 3*time.Second, 50*time.Millisecond, "Server should receive batch") + + // Verify server received the batch + batches := mockServer.GetReceivedBatches() + require.Len(t, batches, 1, "Server should have received one batch") + + batch := batches[0] + assert.Equal(t, uint32(1), batch.BatchId) + require.Len(t, batch.Data, 1) + assert.Equal(t, "test message", batch.Data[0].GetLogs().GetRaw()) + + // Verify payload was acknowledged to auditor + select { + case ackPayload := <-sink.outputChan: + assert.Equal(t, payload, ackPayload) + case <-time.After(1 * time.Second): + t.Fatal("Expected payload acknowledgment from auditor") + } +} + +// Test GRPCSender stream failure and recovery +func TestGRPCSenderFailureRecovery(t *testing.T) { + mockServer, address, cleanup := startMockGRPCServer(t) + defer cleanup() + + sender, sink := createTestGRPCSender(t, address) + + sender.Start() + defer sender.Stop() + + // Connection will be established on first send + + // Send first payload (should succeed) + msg1 := message.NewMessage([]byte("message 1"), nil, "", 0) + payload1 := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg1.MessageMetadata}, + Encoded: []byte("message 1"), + Encoding: "identity", + UnencodedSize: 9, + IsSnapshot: false, + } + + inputChan := sender.In() + select { + case inputChan <- payload1: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send first payload") + } + + // Wait for server to receive the batch + require.Eventually(t, func() bool { + return len(mockServer.GetReceivedBatches()) >= 1 + }, 2*time.Second, 50*time.Millisecond) + + // Verify first payload succeeded + batches := mockServer.GetReceivedBatches() + require.Len(t, batches, 1) + + select { + case ackPayload := <-sink.outputChan: + assert.Equal(t, payload1, ackPayload) + case <-time.After(1 * time.Second): + t.Fatal("Expected first payload acknowledgment") + } + + // Get initial generation from the single worker (since we have 1 pipeline) + require.Len(t, sender.workers, 1, "Should have exactly 1 worker for single pipeline") + initialGeneration := sender.workers[0].generationID + + // Simulate server failure + mockServer.SetShouldDisconnect(true) + + // Send second payload (should trigger failure and rotation) + msg2 := message.NewMessage([]byte("message 2"), nil, "", 0) + payload2 := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg2.MessageMetadata}, + Encoded: []byte("message 2"), + Encoding: "identity", + UnencodedSize: 9, + IsSnapshot: false, + } + + select { + case inputChan <- payload2: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send second payload") + } + + // Wait for failure to be detected and rotation to begin + require.Eventually(t, func() bool { + return sender.workers[0].generationID > initialGeneration + }, 3*time.Second, 100*time.Millisecond) + + // Verify generation incremented due to failure + currentGeneration := sender.workers[0].generationID + assert.Greater(t, currentGeneration, initialGeneration, "Generation should increment after failure") + + // Re-enable server (simulate recovery) + mockServer.SetShouldDisconnect(false) + mockServer.ClearReceivedBatches() + + // Server is now available for new connections + + // Send snapshot to complete rotation + msgSnapshot := message.NewMessage([]byte("snapshot"), nil, "", 0) + payloadSnapshot := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msgSnapshot.MessageMetadata}, + Encoded: []byte("snapshot"), + Encoding: "identity", + UnencodedSize: 8, + IsSnapshot: true, + } + + select { + case inputChan <- payloadSnapshot: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send snapshot payload") + } + + // Wait for snapshot to be received on new stream + require.Eventually(t, func() bool { + return len(mockServer.GetReceivedBatches()) >= 1 + }, 3*time.Second, 100*time.Millisecond) + + // Verify snapshot was sent on new stream + newBatches := mockServer.GetReceivedBatches() + require.GreaterOrEqual(t, len(newBatches), 1, "Should have received snapshot on new stream") + + // Find snapshot batch + var snapshotBatch *StatefulBatch + for _, batch := range newBatches { + if len(batch.Data) > 0 && batch.Data[0].GetLogs().GetRaw() == "snapshot" { + snapshotBatch = batch + break + } + } + require.NotNil(t, snapshotBatch, "Should have received snapshot batch") + + // Send another payload to verify traffic continues + msg3 := message.NewMessage([]byte("message 3"), nil, "", 0) + payload3 := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg3.MessageMetadata}, + Encoded: []byte("message 3"), + Encoding: "identity", + UnencodedSize: 9, + IsSnapshot: false, + } + + select { + case inputChan <- payload3: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send third payload") + } + + // Payload sent, acknowledgments will be collected below + + // Collect all acknowledgments we receive (may include message 2, snapshot, message 3) + var receivedPayloads []*message.Payload + timeout := time.After(3 * time.Second) + + // Collect acknowledgments for up to 3 seconds + for { + select { + case ackPayload := <-sink.outputChan: + receivedPayloads = append(receivedPayloads, ackPayload) + case <-timeout: + goto done + } + } + +done: + require.GreaterOrEqual(t, len(receivedPayloads), 2, "Should have received at least 2 acknowledgments") + + // Verify we got the expected payloads (snapshot and message 3 at minimum) + payloadContents := make([]string, len(receivedPayloads)) + for i, p := range receivedPayloads { + payloadContents[i] = string(p.Encoded) + } + assert.Contains(t, payloadContents, "snapshot", "Should have received snapshot") + assert.Contains(t, payloadContents, "message 3", "Should have received message 3") +} + +// Test multiple consecutive failures with GRPCSender +func TestGRPCSenderMultipleFailures(t *testing.T) { + mockServer, address, cleanup := startMockGRPCServer(t) + defer cleanup() + + sender, sink := createTestGRPCSender(t, address) + + sender.Start() + defer sender.Stop() + + // Get initial generation from the single worker + require.Len(t, sender.workers, 1, "Should have exactly 1 worker for single pipeline") + initialGeneration := sender.workers[0].generationID + + inputChan := sender.In() + + mockServer.ClearReceivedBatches() + + // Cause failure + mockServer.SetShouldDisconnect(true) + + // Send payload to trigger failure + msg := message.NewMessage([]byte("trigger"), nil, "", 0) + payload := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, + Encoded: []byte("trigger"), + Encoding: "identity", + UnencodedSize: len("trigger"), + IsSnapshot: false, + } + + select { + case inputChan <- payload: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send trigger payload") + } + + // Wait for failure detection (generation increment) + require.Eventually(t, func() bool { + return sender.workers[0].generationID == initialGeneration+1 + }, 2*time.Second, 100*time.Millisecond) + + // Send snapshot to complete rotation + // but this message should trigger another rotation + msgSnapshot := message.NewMessage([]byte("snapshot"), nil, "", 0) + payloadSnapshot := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msgSnapshot.MessageMetadata}, + Encoded: []byte("snapshot"), + Encoding: "identity", + UnencodedSize: len("snapshot"), + IsSnapshot: true, + } + + select { + case inputChan <- payloadSnapshot: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send snapshot") + } + + // Verify generation incremented (at least 2 times) + require.Eventually(t, func() bool { + return sender.workers[0].generationID == initialGeneration+2 + }, 2*time.Second, 100*time.Millisecond) + + mockServer.SetShouldDisconnect(false) + + // Send final payload to verify system is still working + msgFinal := message.NewMessage([]byte("final test"), nil, "", 0) + payloadFinal := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msgFinal.MessageMetadata}, + Encoded: []byte("final test"), + Encoding: "identity", + UnencodedSize: 10, + IsSnapshot: true, + } + + select { + case inputChan <- payloadFinal: + case <-time.After(1 * time.Second): + t.Fatal("Failed to send final payload") + } + + // Payload sent, wait for acknowledgment + + // Verify we get at least one acknowledgment (system is working) + // Due to async nature and multiple failures, we may have many pending acks + timeout := time.After(2 * time.Second) + var gotAck bool + for !gotAck { + select { + case <-sink.outputChan: + gotAck = true + case <-timeout: + t.Fatal("Expected at least one payload acknowledgment") + } + } + + // Verify system is still functional by checking no more failures + assert.True(t, gotAck, "System should still be processing payloads") +} + +// Test GRPCSender signal channel mapping functionality +func TestGRPCSenderSignalChannelMapping(t *testing.T) { + _, address, cleanup := startMockGRPCServer(t) + defer cleanup() + + sender, _ := createTestGRPCSender(t, address) + + sender.Start() + defer sender.Stop() + + // Test GetSignalChannelForInputChannel functionality + inputChan := sender.In() + signalChan := sender.GetSignalChannelForInputChannel(inputChan) + + require.NotNil(t, signalChan, "Should have signal channel for input channel") + + // Verify the mapping is correct + worker, exists := sender.channelToWorkerMap[inputChan] + require.True(t, exists, "Input channel should be mapped to a worker") + + // The signal channel should be the same underlying channel, even though types differ + // GetSignalChannelForInputChannel returns chan any (via unsafe conversion) + // while worker.signalStreamRotate is chan StreamRotateSignal + // We can verify they're the same by checking the channel addresses + assert.NotNil(t, signalChan, "Signal channel should not be nil") + assert.NotNil(t, worker.signalStreamRotate, "Worker signal channel should not be nil") +} + +// Test GRPCSender graceful shutdown +func TestGRPCSenderGracefulShutdown(t *testing.T) { + _, address, cleanup := startMockGRPCServer(t) + defer cleanup() + + sender, sink := createTestGRPCSender(t, address) + + sender.Start() + + // Send some payloads + inputChan := sender.In() + for i := 0; i < 3; i++ { + msg := message.NewMessage([]byte(fmt.Sprintf("message %d", i)), nil, "", 0) + payload := &message.Payload{ + MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, + Encoded: []byte(fmt.Sprintf("message %d", i)), + Encoding: "identity", + UnencodedSize: len(fmt.Sprintf("message %d", i)), + IsSnapshot: false, + } + + select { + case inputChan <- payload: + case <-time.After(1 * time.Second): + t.Fatalf("Failed to send payload %d", i) + } + } + + // Processing will start immediately + + // Stop sender gracefully + sender.Stop() + + // Shutdown is synchronous + + // Verify some acknowledgments came through (system processed what it could) + var ackCount int + timeout := time.After(1 * time.Second) + for { + select { + case <-sink.outputChan: + ackCount++ + case <-timeout: + goto done + } + } + +done: + // Should have processed at least some payloads before shutdown + assert.GreaterOrEqual(t, ackCount, 0, "Should have processed some payloads during graceful shutdown") +} diff --git a/pkg/logs/sender/grpc/stateful_encoding.pb.go b/pkg/logs/sender/grpc/stateful_encoding.pb.go new file mode 100644 index 000000000000..860c5bc6f01e --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding.pb.go @@ -0,0 +1,1014 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.6 +// protoc v4.24.3 +// source: pkg/logs/sender/grpc/stateful_encoding.proto + +package grpc + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// See Status Code Mappings section below for more details +type BatchStatus_Status int32 + +const ( + BatchStatus_UNKNOWN BatchStatus_Status = 0 + BatchStatus_OK BatchStatus_Status = 1 +) + +// Enum value maps for BatchStatus_Status. +var ( + BatchStatus_Status_name = map[int32]string{ + 0: "UNKNOWN", + 1: "OK", + } + BatchStatus_Status_value = map[string]int32{ + "UNKNOWN": 0, + "OK": 1, + } +) + +func (x BatchStatus_Status) Enum() *BatchStatus_Status { + p := new(BatchStatus_Status) + *p = x + return p +} + +func (x BatchStatus_Status) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes[0].Descriptor() +} + +func (BatchStatus_Status) Type() protoreflect.EnumType { + return &file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes[0] +} + +func (x BatchStatus_Status) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use BatchStatus_Status.Descriptor instead. +func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} +} + +type DictEntryDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDefine) Reset() { + *x = DictEntryDefine{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDefine) ProtoMessage() {} + +func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead. +func (*DictEntryDefine) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{0} +} + +func (x *DictEntryDefine) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *DictEntryDefine) GetValue() string { + if x != nil { + return x.Value + } + return "" +} + +type DictEntryDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDelete) Reset() { + *x = DictEntryDelete{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDelete) ProtoMessage() {} + +func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead. +func (*DictEntryDelete) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{1} +} + +func (x *DictEntryDelete) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +type PatternDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + Template string `protobuf:"bytes,2,opt,name=template,proto3" json:"template,omitempty"` + ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` + PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDefine) Reset() { + *x = PatternDefine{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDefine) ProtoMessage() {} + +func (x *PatternDefine) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead. +func (*PatternDefine) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{2} +} + +func (x *PatternDefine) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *PatternDefine) GetTemplate() string { + if x != nil { + return x.Template + } + return "" +} + +func (x *PatternDefine) GetParamCount() uint32 { + if x != nil { + return x.ParamCount + } + return 0 +} + +func (x *PatternDefine) GetPosList() []uint32 { + if x != nil { + return x.PosList + } + return nil +} + +type PatternUpdate struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + NewTemplate string `protobuf:"bytes,2,opt,name=new_template,json=newTemplate,proto3" json:"new_template,omitempty"` + ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` + PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternUpdate) Reset() { + *x = PatternUpdate{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternUpdate) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternUpdate) ProtoMessage() {} + +func (x *PatternUpdate) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternUpdate.ProtoReflect.Descriptor instead. +func (*PatternUpdate) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{3} +} + +func (x *PatternUpdate) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *PatternUpdate) GetNewTemplate() string { + if x != nil { + return x.NewTemplate + } + return "" +} + +func (x *PatternUpdate) GetParamCount() uint32 { + if x != nil { + return x.ParamCount + } + return 0 +} + +func (x *PatternUpdate) GetPosList() []uint32 { + if x != nil { + return x.PosList + } + return nil +} + +type PatternDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDelete) Reset() { + *x = PatternDelete{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDelete) ProtoMessage() {} + +func (x *PatternDelete) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead. +func (*PatternDelete) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{4} +} + +func (x *PatternDelete) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +type Log struct { + state protoimpl.MessageState `protogen:"open.v1"` + Timestamp uint64 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + // Types that are valid to be assigned to Content: + // + // *Log_Structured + // *Log_Raw + Content isLog_Content `protobuf_oneof:"content"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Log) Reset() { + *x = Log{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Log) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Log) ProtoMessage() {} + +func (x *Log) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Log.ProtoReflect.Descriptor instead. +func (*Log) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{5} +} + +func (x *Log) GetTimestamp() uint64 { + if x != nil { + return x.Timestamp + } + return 0 +} + +func (x *Log) GetContent() isLog_Content { + if x != nil { + return x.Content + } + return nil +} + +func (x *Log) GetStructured() *StructuredLog { + if x != nil { + if x, ok := x.Content.(*Log_Structured); ok { + return x.Structured + } + } + return nil +} + +func (x *Log) GetRaw() string { + if x != nil { + if x, ok := x.Content.(*Log_Raw); ok { + return x.Raw + } + } + return "" +} + +type isLog_Content interface { + isLog_Content() +} + +type Log_Structured struct { + Structured *StructuredLog `protobuf:"bytes,2,opt,name=structured,proto3,oneof"` +} + +type Log_Raw struct { + Raw string `protobuf:"bytes,3,opt,name=raw,proto3,oneof"` +} + +func (*Log_Structured) isLog_Content() {} + +func (*Log_Raw) isLog_Content() {} + +type StructuredLog struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + DynamicValues []*DynamicValue `protobuf:"bytes,2,rep,name=dynamic_values,json=dynamicValues,proto3" json:"dynamic_values,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StructuredLog) Reset() { + *x = StructuredLog{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StructuredLog) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StructuredLog) ProtoMessage() {} + +func (x *StructuredLog) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. +func (*StructuredLog) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{6} +} + +func (x *StructuredLog) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *StructuredLog) GetDynamicValues() []*DynamicValue { + if x != nil { + return x.DynamicValues + } + return nil +} + +// TODO not sure we need numeric type +type DynamicValue struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Value: + // + // *DynamicValue_IntValue + // *DynamicValue_FloatValue + // *DynamicValue_StringValue + // *DynamicValue_DictIndex + Value isDynamicValue_Value `protobuf_oneof:"value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DynamicValue) Reset() { + *x = DynamicValue{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DynamicValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DynamicValue) ProtoMessage() {} + +func (x *DynamicValue) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. +func (*DynamicValue) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{7} +} + +func (x *DynamicValue) GetValue() isDynamicValue_Value { + if x != nil { + return x.Value + } + return nil +} + +func (x *DynamicValue) GetIntValue() int64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_IntValue); ok { + return x.IntValue + } + } + return 0 +} + +func (x *DynamicValue) GetFloatValue() float64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_FloatValue); ok { + return x.FloatValue + } + } + return 0 +} + +func (x *DynamicValue) GetStringValue() string { + if x != nil { + if x, ok := x.Value.(*DynamicValue_StringValue); ok { + return x.StringValue + } + } + return "" +} + +func (x *DynamicValue) GetDictIndex() uint64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_DictIndex); ok { + return x.DictIndex + } + } + return 0 +} + +type isDynamicValue_Value interface { + isDynamicValue_Value() +} + +type DynamicValue_IntValue struct { + IntValue int64 `protobuf:"varint,1,opt,name=int_value,json=intValue,proto3,oneof"` +} + +type DynamicValue_FloatValue struct { + FloatValue float64 `protobuf:"fixed64,2,opt,name=float_value,json=floatValue,proto3,oneof"` +} + +type DynamicValue_StringValue struct { + StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"` +} + +type DynamicValue_DictIndex struct { + DictIndex uint64 `protobuf:"varint,4,opt,name=dict_index,json=dictIndex,proto3,oneof"` +} + +func (*DynamicValue_IntValue) isDynamicValue_Value() {} + +func (*DynamicValue_FloatValue) isDynamicValue_Value() {} + +func (*DynamicValue_StringValue) isDynamicValue_Value() {} + +func (*DynamicValue_DictIndex) isDynamicValue_Value() {} + +type Datum struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Data: + // + // *Datum_PatternDefine + // *Datum_PatternUpdate + // *Datum_PatternDelete + // *Datum_DictEntryDefine + // *Datum_DictEntryDelete + // *Datum_Logs + Data isDatum_Data `protobuf_oneof:"data"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Datum) Reset() { + *x = Datum{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Datum) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Datum) ProtoMessage() {} + +func (x *Datum) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Datum.ProtoReflect.Descriptor instead. +func (*Datum) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{8} +} + +func (x *Datum) GetData() isDatum_Data { + if x != nil { + return x.Data + } + return nil +} + +func (x *Datum) GetPatternDefine() *PatternDefine { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDefine); ok { + return x.PatternDefine + } + } + return nil +} + +func (x *Datum) GetPatternUpdate() *PatternUpdate { + if x != nil { + if x, ok := x.Data.(*Datum_PatternUpdate); ok { + return x.PatternUpdate + } + } + return nil +} + +func (x *Datum) GetPatternDelete() *PatternDelete { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDelete); ok { + return x.PatternDelete + } + } + return nil +} + +func (x *Datum) GetDictEntryDefine() *DictEntryDefine { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDefine); ok { + return x.DictEntryDefine + } + } + return nil +} + +func (x *Datum) GetDictEntryDelete() *DictEntryDelete { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDelete); ok { + return x.DictEntryDelete + } + } + return nil +} + +func (x *Datum) GetLogs() *Log { + if x != nil { + if x, ok := x.Data.(*Datum_Logs); ok { + return x.Logs + } + } + return nil +} + +type isDatum_Data interface { + isDatum_Data() +} + +type Datum_PatternDefine struct { + PatternDefine *PatternDefine `protobuf:"bytes,1,opt,name=pattern_define,json=patternDefine,proto3,oneof"` +} + +type Datum_PatternUpdate struct { + PatternUpdate *PatternUpdate `protobuf:"bytes,2,opt,name=pattern_update,json=patternUpdate,proto3,oneof"` +} + +type Datum_PatternDelete struct { + PatternDelete *PatternDelete `protobuf:"bytes,3,opt,name=pattern_delete,json=patternDelete,proto3,oneof"` +} + +type Datum_DictEntryDefine struct { + DictEntryDefine *DictEntryDefine `protobuf:"bytes,4,opt,name=dict_entry_define,json=dictEntryDefine,proto3,oneof"` +} + +type Datum_DictEntryDelete struct { + DictEntryDelete *DictEntryDelete `protobuf:"bytes,5,opt,name=dict_entry_delete,json=dictEntryDelete,proto3,oneof"` +} + +type Datum_Logs struct { + Logs *Log `protobuf:"bytes,6,opt,name=logs,proto3,oneof"` +} + +func (*Datum_PatternDefine) isDatum_Data() {} + +func (*Datum_PatternUpdate) isDatum_Data() {} + +func (*Datum_PatternDelete) isDatum_Data() {} + +func (*Datum_DictEntryDefine) isDatum_Data() {} + +func (*Datum_DictEntryDelete) isDatum_Data() {} + +func (*Datum_Logs) isDatum_Data() {} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +type StatefulBatch struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Data []*Datum `protobuf:"bytes,2,rep,name=data,proto3" json:"data,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StatefulBatch) Reset() { + *x = StatefulBatch{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StatefulBatch) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StatefulBatch) ProtoMessage() {} + +func (x *StatefulBatch) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[9] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. +func (*StatefulBatch) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{9} +} + +func (x *StatefulBatch) GetBatchId() uint32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *StatefulBatch) GetData() []*Datum { + if x != nil { + return x.Data + } + return nil +} + +type BatchStatus struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId int32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=intake.BatchStatus_Status" json:"status,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *BatchStatus) Reset() { + *x = BatchStatus{} + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *BatchStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BatchStatus) ProtoMessage() {} + +func (x *BatchStatus) ProtoReflect() protoreflect.Message { + mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[10] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. +func (*BatchStatus) Descriptor() ([]byte, []int) { + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{10} +} + +func (x *BatchStatus) GetBatchId() int32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *BatchStatus) GetStatus() BatchStatus_Status { + if x != nil { + return x.Status + } + return BatchStatus_UNKNOWN +} + +var File_pkg_logs_sender_grpc_stateful_encoding_proto protoreflect.FileDescriptor + +const file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc = "" + + "\n" + + ",pkg/logs/sender/grpc/stateful_encoding.proto\x12\x06intake\"7\n" + + "\x0fDictEntryDefine\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + + "\x0fDictEntryDelete\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\"\x86\x01\n" + + "\rPatternDefine\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12\x1a\n" + + "\btemplate\x18\x02 \x01(\tR\btemplate\x12\x1f\n" + + "\vparam_count\x18\x03 \x01(\rR\n" + + "paramCount\x12\x19\n" + + "\bpos_list\x18\x04 \x03(\rR\aposList\"\x8d\x01\n" + + "\rPatternUpdate\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12!\n" + + "\fnew_template\x18\x02 \x01(\tR\vnewTemplate\x12\x1f\n" + + "\vparam_count\x18\x03 \x01(\rR\n" + + "paramCount\x12\x19\n" + + "\bpos_list\x18\x04 \x03(\rR\aposList\".\n" + + "\rPatternDelete\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" + + "\x03Log\x12\x1c\n" + + "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x127\n" + + "\n" + + "structured\x18\x02 \x01(\v2\x15.intake.StructuredLogH\x00R\n" + + "structured\x12\x12\n" + + "\x03raw\x18\x03 \x01(\tH\x00R\x03rawB\t\n" + + "\acontent\"k\n" + + "\rStructuredLog\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12;\n" + + "\x0edynamic_values\x18\x02 \x03(\v2\x14.intake.DynamicValueR\rdynamicValues\"\x9f\x01\n" + + "\fDynamicValue\x12\x1d\n" + + "\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" + + "\vfloat_value\x18\x02 \x01(\x01H\x00R\n" + + "floatValue\x12#\n" + + "\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" + + "\n" + + "dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" + + "\x05value\"\x80\x03\n" + + "\x05Datum\x12>\n" + + "\x0epattern_define\x18\x01 \x01(\v2\x15.intake.PatternDefineH\x00R\rpatternDefine\x12>\n" + + "\x0epattern_update\x18\x02 \x01(\v2\x15.intake.PatternUpdateH\x00R\rpatternUpdate\x12>\n" + + "\x0epattern_delete\x18\x03 \x01(\v2\x15.intake.PatternDeleteH\x00R\rpatternDelete\x12E\n" + + "\x11dict_entry_define\x18\x04 \x01(\v2\x17.intake.DictEntryDefineH\x00R\x0fdictEntryDefine\x12E\n" + + "\x11dict_entry_delete\x18\x05 \x01(\v2\x17.intake.DictEntryDeleteH\x00R\x0fdictEntryDelete\x12!\n" + + "\x04logs\x18\x06 \x01(\v2\v.intake.LogH\x00R\x04logsB\x06\n" + + "\x04data\"M\n" + + "\rStatefulBatch\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12!\n" + + "\x04data\x18\x02 \x03(\v2\r.intake.DatumR\x04data\"{\n" + + "\vBatchStatus\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\x05R\abatchId\x122\n" + + "\x06status\x18\x02 \x01(\x0e2\x1a.intake.BatchStatus.StatusR\x06status\"\x1d\n" + + "\x06Status\x12\v\n" + + "\aUNKNOWN\x10\x00\x12\x06\n" + + "\x02OK\x10\x012S\n" + + "\x13StatefulLogsService\x12<\n" + + "\n" + + "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B7Z5github.com/DataDog/datadog-agent/pkg/logs/sender/grpcb\x06proto3" + +var ( + file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescOnce sync.Once + file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData []byte +) + +func file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP() []byte { + file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescOnce.Do(func() { + file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc), len(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc))) + }) + return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData +} + +var file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes = []any{ + (BatchStatus_Status)(0), // 0: intake.BatchStatus.Status + (*DictEntryDefine)(nil), // 1: intake.DictEntryDefine + (*DictEntryDelete)(nil), // 2: intake.DictEntryDelete + (*PatternDefine)(nil), // 3: intake.PatternDefine + (*PatternUpdate)(nil), // 4: intake.PatternUpdate + (*PatternDelete)(nil), // 5: intake.PatternDelete + (*Log)(nil), // 6: intake.Log + (*StructuredLog)(nil), // 7: intake.StructuredLog + (*DynamicValue)(nil), // 8: intake.DynamicValue + (*Datum)(nil), // 9: intake.Datum + (*StatefulBatch)(nil), // 10: intake.StatefulBatch + (*BatchStatus)(nil), // 11: intake.BatchStatus +} +var file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs = []int32{ + 7, // 0: intake.Log.structured:type_name -> intake.StructuredLog + 8, // 1: intake.StructuredLog.dynamic_values:type_name -> intake.DynamicValue + 3, // 2: intake.Datum.pattern_define:type_name -> intake.PatternDefine + 4, // 3: intake.Datum.pattern_update:type_name -> intake.PatternUpdate + 5, // 4: intake.Datum.pattern_delete:type_name -> intake.PatternDelete + 1, // 5: intake.Datum.dict_entry_define:type_name -> intake.DictEntryDefine + 2, // 6: intake.Datum.dict_entry_delete:type_name -> intake.DictEntryDelete + 6, // 7: intake.Datum.logs:type_name -> intake.Log + 9, // 8: intake.StatefulBatch.data:type_name -> intake.Datum + 0, // 9: intake.BatchStatus.status:type_name -> intake.BatchStatus.Status + 10, // 10: intake.StatefulLogsService.LogsStream:input_type -> intake.StatefulBatch + 11, // 11: intake.StatefulLogsService.LogsStream:output_type -> intake.BatchStatus + 11, // [11:12] is the sub-list for method output_type + 10, // [10:11] is the sub-list for method input_type + 10, // [10:10] is the sub-list for extension type_name + 10, // [10:10] is the sub-list for extension extendee + 0, // [0:10] is the sub-list for field type_name +} + +func init() { file_pkg_logs_sender_grpc_stateful_encoding_proto_init() } +func file_pkg_logs_sender_grpc_stateful_encoding_proto_init() { + if File_pkg_logs_sender_grpc_stateful_encoding_proto != nil { + return + } + file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5].OneofWrappers = []any{ + (*Log_Structured)(nil), + (*Log_Raw)(nil), + } + file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ + (*DynamicValue_IntValue)(nil), + (*DynamicValue_FloatValue)(nil), + (*DynamicValue_StringValue)(nil), + (*DynamicValue_DictIndex)(nil), + } + file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8].OneofWrappers = []any{ + (*Datum_PatternDefine)(nil), + (*Datum_PatternUpdate)(nil), + (*Datum_PatternDelete)(nil), + (*Datum_DictEntryDefine)(nil), + (*Datum_DictEntryDelete)(nil), + (*Datum_Logs)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc), len(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc)), + NumEnums: 1, + NumMessages: 11, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes, + DependencyIndexes: file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs, + EnumInfos: file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes, + MessageInfos: file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes, + }.Build() + File_pkg_logs_sender_grpc_stateful_encoding_proto = out.File + file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes = nil + file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs = nil +} diff --git a/pkg/logs/sender/grpc/stateful_encoding.proto b/pkg/logs/sender/grpc/stateful_encoding.proto new file mode 100644 index 000000000000..e555c152ed95 --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding.proto @@ -0,0 +1,109 @@ +syntax = "proto3"; + +package intake; +option go_package = "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"; + +// --------------------------------------------------------------------------- +// Dictionary-encoded +// --------------------------------------------------------------------------- + +message DictEntryDefine { + uint64 id = 1; + string value = 2; +} + +message DictEntryDelete { + uint64 id = 1; +} + +// --------------------------------------------------------------------------- +// Pattern dictionary +// --------------------------------------------------------------------------- + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +message PatternDefine { + uint64 pattern_id = 1; + string template = 2; + uint32 param_count = 3; + repeated uint32 pos_list = 4; +} + +message PatternUpdate { + uint64 pattern_id = 1; + string new_template = 2; + uint32 param_count = 3; + repeated uint32 pos_list = 4; +} + +message PatternDelete { + uint64 pattern_id = 1; +} + +// --------------------------------------------------------------------------- +// Log payload +// --------------------------------------------------------------------------- + +message Log { + uint64 timestamp = 1; + oneof content { + StructuredLog structured = 2; + string raw = 3; + } +} + +message StructuredLog { + uint64 pattern_id = 1; + repeated DynamicValue dynamic_values = 2; +} + +// TODO not sure we need numeric type +message DynamicValue { + oneof value { + int64 int_value = 1; + double float_value = 2; + string string_value = 3; + uint64 dict_index = 4; + } +} + +// --------------------------------------------------------------------------- +// Streaming envelope +// --------------------------------------------------------------------------- + +message Datum { + oneof data { + PatternDefine pattern_define = 1; + PatternUpdate pattern_update = 2; + PatternDelete pattern_delete = 3; + DictEntryDefine dict_entry_define = 4; + DictEntryDelete dict_entry_delete = 5; + Log logs = 6; + } +} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +message StatefulBatch { + uint32 batch_id = 1; + repeated Datum data = 2; +} + +message BatchStatus { + int32 batch_id = 1; + + // See Status Code Mappings section below for more details + enum Status { + UNKNOWN=0; + OK=1; + } + Status status = 2; +} + +// --------------------------------------------------------------------------- +// gRPC service definition (bi-directional streaming) +// --------------------------------------------------------------------------- + +service StatefulLogsService { + rpc LogsStream(stream StatefulBatch) returns (stream BatchStatus); +} diff --git a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go new file mode 100644 index 000000000000..36b0dde717f1 --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go @@ -0,0 +1,115 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v4.24.3 +// source: pkg/logs/sender/grpc/stateful_encoding.proto + +package grpc + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + StatefulLogsService_LogsStream_FullMethodName = "/intake.StatefulLogsService/LogsStream" +) + +// StatefulLogsServiceClient is the client API for StatefulLogsService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type StatefulLogsServiceClient interface { + LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) +} + +type statefulLogsServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { + return &statefulLogsServiceClient{cc} +} + +func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &StatefulLogsService_ServiceDesc.Streams[0], StatefulLogsService_LogsStream_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[StatefulBatch, BatchStatus]{ClientStream: stream} + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type StatefulLogsService_LogsStreamClient = grpc.BidiStreamingClient[StatefulBatch, BatchStatus] + +// StatefulLogsServiceServer is the server API for StatefulLogsService service. +// All implementations must embed UnimplementedStatefulLogsServiceServer +// for forward compatibility. +type StatefulLogsServiceServer interface { + LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error + mustEmbedUnimplementedStatefulLogsServiceServer() +} + +// UnimplementedStatefulLogsServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedStatefulLogsServiceServer struct{} + +func (UnimplementedStatefulLogsServiceServer) LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error { + return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") +} +func (UnimplementedStatefulLogsServiceServer) mustEmbedUnimplementedStatefulLogsServiceServer() {} +func (UnimplementedStatefulLogsServiceServer) testEmbeddedByValue() {} + +// UnsafeStatefulLogsServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to StatefulLogsServiceServer will +// result in compilation errors. +type UnsafeStatefulLogsServiceServer interface { + mustEmbedUnimplementedStatefulLogsServiceServer() +} + +func RegisterStatefulLogsServiceServer(s grpc.ServiceRegistrar, srv StatefulLogsServiceServer) { + // If the following call pancis, it indicates UnimplementedStatefulLogsServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&StatefulLogsService_ServiceDesc, srv) +} + +func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(StatefulLogsServiceServer).LogsStream(&grpc.GenericServerStream[StatefulBatch, BatchStatus]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type StatefulLogsService_LogsStreamServer = grpc.BidiStreamingServer[StatefulBatch, BatchStatus] + +// StatefulLogsService_ServiceDesc is the grpc.ServiceDesc for StatefulLogsService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var StatefulLogsService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "intake.StatefulLogsService", + HandlerType: (*StatefulLogsServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "LogsStream", + Handler: _StatefulLogsService_LogsStream_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "pkg/logs/sender/grpc/stateful_encoding.proto", +} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go new file mode 100644 index 000000000000..b4b729b3f486 --- /dev/null +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -0,0 +1,592 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "context" + "errors" + "fmt" + "io" + "sync" + "time" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "go.uber.org/atomic" + + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// TODO For PoC Stage 1 +// - handle unrecoverable errors - auth/perm, protocol, stream-level gRPC status +// - check snapshot's generationID, only act on the current generation +// - implementback-off from stream creation on successive failures +// - handle createNewStream failures +// - telemetries (send/recv, failure, rotations) + +// TODO for PoC Stage 2 +// - implement backpressure + +// RotationType represents the type of stream rotation +type RotationType int + +const ( + RotationTypeNone RotationType = iota + RotationTypeHard + RotationTypeGraceful +) + +// StreamRotateSignal represents a signal to upstream about stream rotation +type StreamRotateSignal struct { + Type RotationType + GenerationID uint64 +} + +// ReceiverSignal represents a signal from receiver to supervisor +type ReceiverSignal struct { + GenerationID uint64 + Error error +} + +// StreamInfo holds all stream-related information +type StreamInfo struct { + Stream StatefulLogsService_LogsStreamClient + Ctx context.Context + Cancel context.CancelFunc +} + +// StreamWorker manages a single gRPC bidirectional stream with Master-Slave threading model +// Architecture: One supervisor/sender goroutine + one persistent receiver goroutine per worker +type StreamWorker struct { + // Configuration + workerID string + destinationsContext *client.DestinationsContext + + // Pipeline integration + inputChan chan *message.Payload + outputChan chan *message.Payload // For auditor acknowledgments + sink sender.Sink // For getting auditor channel + + // gRPC connection management (shared with other streams) + client StatefulLogsServiceClient + + // Stream management + currentStream *StreamInfo + generationID uint64 + recvFailureCh chan ReceiverSignal // Signal receiver failure with generationID + streamLifetime time.Duration + batchIDCounter *atomic.Uint32 // Shared across all workers for global uniqueness + + // Rotation management + inRotation bool + rotationType RotationType + drainedStream *StreamInfo // Old stream being drained after graceful rotation + + // Upstream signaling + signalStreamRotate chan StreamRotateSignal + + // Auditor acknowledgment tracking + pendingPayloads map[uint32]*message.Payload // batchID -> payload + pendingPayloadsMu sync.Mutex // Protects pendingPayloads map + + // Control + stopChan chan struct{} + done chan struct{} +} + +// NewStreamWorker creates a new gRPC stream worker +func NewStreamWorker( + workerID string, + destinationsCtx *client.DestinationsContext, + client StatefulLogsServiceClient, + sink sender.Sink, + streamLifetime time.Duration, + batchIDCounter *atomic.Uint32, +) *StreamWorker { + worker := &StreamWorker{ + workerID: workerID, + destinationsContext: destinationsCtx, + inputChan: make(chan *message.Payload, 100), // Buffered input + outputChan: nil, // Will be set in Start() + sink: sink, // For getting auditor channel + client: client, + recvFailureCh: make(chan ReceiverSignal), // Unbuffered receiver failure signal + streamLifetime: streamLifetime, // Stream recreation interval + batchIDCounter: batchIDCounter, // Shared counter for globally unique batch IDs + inRotation: false, + rotationType: RotationTypeNone, + signalStreamRotate: make(chan StreamRotateSignal, 1), // Size-1 buffer for drop-old semantics + pendingPayloads: make(map[uint32]*message.Payload), // Initialize batch tracking map + stopChan: make(chan struct{}), + done: make(chan struct{}), + } + + return worker +} + +// Start begins the supervisor goroutine +func (s *StreamWorker) Start() { + log.Infof("šŸš€ ========== Starting gRPC stream worker %s ==========", s.workerID) + s.outputChan = s.sink.Channel() + log.Infof("šŸ”Œ Worker %s: outputChan configured: %v", s.workerID, s.outputChan != nil) + + // Create initial stream + log.Infof("šŸ”§ Worker %s: Creating initial gRPC stream...", s.workerID) + if stream, err := s.createNewStream(); err == nil { + s.currentStream = stream + log.Infof("āœ… Worker %s: Created initial stream (generation %d)", s.workerID, s.generationID) + log.Infof("šŸ”„ Worker %s: Starting receiverLoop goroutine (generation %d)", s.workerID, s.generationID) + go s.receiverLoop(stream, s.generationID) + } else { + log.Errorf("āŒ Worker %s: Failed to create initial stream: %v", s.workerID, err) + } + + // Start supervisor/sender goroutine (master) + log.Infof("šŸ‘· Worker %s: Starting supervisorLoop goroutine", s.workerID) + go s.supervisorLoop() + log.Infof("šŸš€ Worker %s: Start() complete", s.workerID) +} + +// Stop gracefully shuts down the stream +func (s *StreamWorker) Stop() { + log.Infof("Stopping gRPC stream worker %s", s.workerID) + close(s.stopChan) + <-s.done + log.Infof("Worker %s: Stopped", s.workerID) +} + +// supervisorLoop is the master goroutine that handles sending and stream lifecycle +func (s *StreamWorker) supervisorLoop() { + defer close(s.done) + + streamTimer := time.NewTimer(s.streamLifetime) + defer streamTimer.Stop() + + for { + select { + case payload := <-s.inputChan: + if s.inRotation && s.rotationType == RotationTypeHard { + // In hard rotation state + if payload.IsSnapshot { + // Received full state snapshot from upstream, ready to + // start using the new stream now, with the snapshot as + // the first message + s.finishHardRotate(streamTimer) + } else { + // These are the messages that's written into channel + // buffer but encoded with previous state, we can't send them + // as-is into the new stream. Dropping them and rely on + // upstream to re-encode and resend them + continue + } + } else if s.inRotation && s.rotationType == RotationTypeGraceful { + // In graceful rotation state + if payload.IsSnapshot { + // Received full state snapshot from upstream, ready to + // switch to the new stream + s.finishGracefulRotate(streamTimer) + } + // If payload is not a snapshot, we continously send them to + // the old stream. + } + + // Send payload + if err := s.sendPayload(payload); err != nil { + // Send failed, hard rotate stream + log.Warnf("Worker %s: Send failed, initiating hard rotation: %v", s.workerID, err) + s.beginHardRotate() + } + + case signal := <-s.recvFailureCh: + if signal.GenerationID != s.generationID { + // Signal from old stream generation, we must have rotated. + // - In case of hard rotation, this is timing thing, old receiver is reporting + // the same transport failure as previously detected by the supervisor. since + // we have hard rotated, we can ignore the signal. + // - In case of graceful rotation, this is the drained stream reporting the failure. + // since we've already switched to functioning new stream, we will ignore the signal. + // If there really were acks that we missed because drained stream died, we rely on + // the upstream to detect and resend them + log.Infof("Worker %s: Ignoring signal from old generation %d (current: %d)", + s.workerID, signal.GenerationID, s.generationID) + continue + } + + // Receiver reported failure, hard rotate stream + log.Warnf("Worker %s: Receiver reported failure, initiating hard rotation: %v", s.workerID, signal.Error) + s.beginHardRotate() + + case <-streamTimer.C: + // Life time expired, graceful rotate stream + if !s.inRotation { + log.Infof("ā° ========== Worker %s: STREAM LIFETIME EXPIRED - GRACEFUL ROTATION ==========", s.workerID) + s.beginGracefulRotate() + } + + case <-s.stopChan: + // Graceful shutdown + if s.currentStream != nil { + s.closeStream(s.currentStream) + } + s.closeStream(s.drainedStream) + return + } + } +} + +// sendStreamRotateSignal sends a rotation signal to upstream with size-1 drop-old semantics +// This ensures the supervisor never blocks and the upstream always gets the latest signal +func (s *StreamWorker) sendStreamRotateSignal(rt RotationType) { + v := StreamRotateSignal{ + Type: rt, + GenerationID: s.generationID, + } + select { + case s.signalStreamRotate <- v: + return // queued immediately + default: + // drop one old value if present + select { + case <-s.signalStreamRotate: + // dropped old + default: + // nothing to drop (likely consumer grabbed it); try send again + } + s.signalStreamRotate <- v + } +} + +// beginHardRotate immediately closes and recreates the stream +func (s *StreamWorker) beginHardRotate() { + log.Infof("Worker %s: Beginning hard rotation (generation %d)", s.workerID, s.generationID) + + // Signal "hard rotate" to upstream + s.sendStreamRotateSignal(RotationTypeHard) + + // Close current stream + s.closeStream(s.currentStream) + s.currentStream = nil + + // Create new stream + if streamInfo, err := s.createNewStream(); err == nil { + s.currentStream = streamInfo + // Start new receiver goroutine with new stream + go s.receiverLoop(streamInfo, s.generationID) + } else { + log.Errorf("Worker %s: Failed to create new stream during hard rotation: %v", s.workerID, err) + } + + // Set rotation state + s.inRotation = true + s.rotationType = RotationTypeHard +} + +// finishHardRotate completes the hard rotation process +func (s *StreamWorker) finishHardRotate(streamTimer *time.Timer) { + log.Infof("Worker %s: Hard rotation finished, resuming normal operation", s.workerID) + s.inRotation = false + s.rotationType = RotationTypeNone + // Reset timer after successful rotation + streamTimer.Reset(s.streamLifetime) +} + +// beginGracefulRotate starts graceful rotation by signaling upstream +func (s *StreamWorker) beginGracefulRotate() { + log.Infof("šŸ”„ Worker %s: BEGIN GRACEFUL ROTATION (generation %d)", s.workerID, s.generationID) + + // Signal "graceful rotate" to upstream + s.sendStreamRotateSignal(RotationTypeGraceful) + log.Infof("šŸ“” Worker %s: Sent graceful rotation signal to upstream", s.workerID) + + // Set rotation state + s.inRotation = true + s.rotationType = RotationTypeGraceful + log.Infof("šŸ”„ Worker %s: In graceful rotation mode, waiting for snapshot...", s.workerID) +} + +// finishGracefulRotate completes graceful rotation by switching to new stream +func (s *StreamWorker) finishGracefulRotate(streamTimer *time.Timer) { + log.Infof("Worker %s: Finishing graceful rotation", s.workerID) + + // Move current stream to drained + s.drainedStream = s.currentStream + s.currentStream = nil + + // Create new stream + if streamInfo, err := s.createNewStream(); err == nil { + s.currentStream = streamInfo + log.Infof("Worker %s: Graceful rotation completed, new stream created (generation %d)", s.workerID, s.generationID) + // Start new receiver goroutine with new stream + go s.receiverLoop(streamInfo, s.generationID) + } else { + log.Errorf("Worker %s: Failed to create new stream during graceful rotation: %v", s.workerID, err) + } + + // Start drain timer (10 seconds) - automatically closes drained stream when it expires + drainedStreamToClose := s.drainedStream + time.AfterFunc(10*time.Second, func() { + log.Infof("Worker %s: Closing drained stream after 10 second grace period", s.workerID) + s.closeStream(drainedStreamToClose) + }) + + // Reset rotation state + s.inRotation = false + s.rotationType = RotationTypeNone + + // Reset timer after successful rotation + streamTimer.Reset(s.streamLifetime) +} + +// createNewStream creates a new gRPC stream and returns StreamInfo +func (s *StreamWorker) createNewStream() (*StreamInfo, error) { + // Increment generation for new stream + s.generationID++ + log.Infof("Worker %s: Creating new stream (generation %d)", s.workerID, s.generationID) + + // Create per-stream context derived from destinations context + ctx, cancel := context.WithCancel(s.destinationsContext.Context()) + + // Create the stream (headers are added automatically via PerRPCCredentials) + stream, err := s.client.LogsStream(ctx) + if err != nil { + cancel() // Clean up context on error + log.Errorf("Worker %s: Failed to create gRPC stream (generation %d): %v", s.workerID, s.generationID, err) + return nil, fmt.Errorf("failed to create stream: %w", err) + } + + log.Infof("Worker %s: Successfully created gRPC stream (generation %d)", s.workerID, s.generationID) + return &StreamInfo{ + Stream: stream, + Ctx: ctx, + Cancel: cancel, + }, nil +} + +// closeStream safely closes a stream and cancels its context +func (s *StreamWorker) closeStream(streamInfo *StreamInfo) { + if streamInfo != nil { + streamInfo.Stream.CloseSend() + streamInfo.Cancel() + } +} + +// sendPayload sends a payload through the current stream +func (s *StreamWorker) sendPayload(payload *message.Payload) error { + if s.currentStream == nil { + return fmt.Errorf("no active stream") + } + + batch := s.payloadToBatch(payload) + + // Send the batch (headers were sent at stream creation time) + if err := s.currentStream.Stream.Send(batch); err != nil { + return fmt.Errorf("failed to send batch: %w", err) + } + + // Track payload by batch ID for auditor acknowledgment when we receive BatchStatus + s.pendingPayloadsMu.Lock() + s.pendingPayloads[batch.BatchId] = payload + // Removed debug log to reduce noise + s.pendingPayloadsMu.Unlock() + + return nil +} + +// receiverLoop runs in the receiver goroutine to process server responses for a specific stream +// This goroutine exits when the stream fails and signals the supervisor +func (s *StreamWorker) receiverLoop(streamInfo *StreamInfo, generationID uint64) { + stream := streamInfo.Stream + log.Infof("šŸ”„ Worker %s: receiverLoop STARTED (generation %d)", s.workerID, generationID) + recvCount := 0 + for { + msg, err := stream.Recv() + if err == nil { + // Normal message (e.g., BatchStatus) + recvCount++ + if recvCount%100 == 1 { + log.Infof("āœ… Worker %s: Received %d BatchStatus messages so far (latest: batch_id=%d)", s.workerID, recvCount, msg.BatchId) + } + s.handleBatchStatus(msg) + continue + } + + // Clean inbound close (server OK in trailers): policy = signal receiver failure + if errors.Is(err, io.EOF) { + log.Warnf("Worker %s: Stream closed by server (generation %d)", s.workerID, generationID) + s.signalRecvFailure(generationID, err) + return // Exit this receiver goroutine + } + + // Local cancel/deadline (supervisor rotated, worker shutdown): just exit + if errors.Is(streamInfo.Ctx.Err(), context.Canceled) || errors.Is(streamInfo.Ctx.Err(), context.DeadlineExceeded) { + log.Infof("Worker %s: Stream context cancelled, receiver exiting (generation %d)", s.workerID, generationID) + return // Exit this receiver goroutine + } + + // Stream-level gRPC status (non-OK): RPC is over → signal receiver failure or block terminal + if st, ok := status.FromError(err); ok { + switch st.Code() { + case codes.Unauthenticated, codes.PermissionDenied: + // Terminal until fixed; do not signal receiver failure here + s.handleIrrecoverableError("auth/perm: " + st.Message()) + return // Exit this receiver goroutine + case codes.InvalidArgument, codes.FailedPrecondition, codes.OutOfRange, codes.Unimplemented: + // Terminal protocol/semantic issue; do not signal receiver failure + s.handleIrrecoverableError("protocol: " + st.Message()) + return // Exit this receiver goroutine + default: + // All other non-OK statuses: signal receiver failure + s.signalRecvFailure(generationID, err) + return // Exit this receiver goroutine + } + } + + // Transport error without status (RST/GOAWAY/TLS, socket close): signal receiver failure + log.Warnf("Worker %s: Transport error (generation %d): %v", s.workerID, generationID, err) + s.signalRecvFailure(generationID, err) + return // Exit this receiver goroutine + } +} + +// signalRecvFailure signals the supervisor to rotate the stream +func (s *StreamWorker) signalRecvFailure(generationID uint64, err error) { + // Always signal with generation ID - supervisor will decide whether to act + signal := ReceiverSignal{ + GenerationID: generationID, + Error: err, + } + + // This signaling is blocking by design, it's okey to block the receiver, + // since the only way we get here is through an irrecoverable error. + // The stopChan is used to unblock the receiver when the worker is shutting down. + select { + case s.recvFailureCh <- signal: + case <-s.stopChan: + } +} + +// handleIrrecoverableError blocks the receiver when encountering terminal errors +func (s *StreamWorker) handleIrrecoverableError(reason string) { + // TODO: Implement proper blocking logic with exponential backoff and cancellable sleep +} + +// handleBatchStatus processes a normal BatchStatus response +func (s *StreamWorker) handleBatchStatus(response *BatchStatus) { + batchID := uint32(response.BatchId) + + // Debug: Print the full server response + log.Debugf("šŸ”µ Worker %s: SERVER RESPONSE: batch_id=%d, status=%v (enum=%d), full_response=%+v", + s.workerID, response.BatchId, response.Status, int32(response.Status), response) + + // Find the specific payload for this batch ID + s.pendingPayloadsMu.Lock() + payload, exists := s.pendingPayloads[batchID] + if exists { + delete(s.pendingPayloads, batchID) // Clean up immediately while holding lock + } else { + log.Warnf("āŒ Worker %s: Payload for batch_id=%d NOT FOUND in pendingPayloads (total pending: %d)", s.workerID, batchID, len(s.pendingPayloads)) + } + s.pendingPayloadsMu.Unlock() + + if exists { + if response.Status == BatchStatus_OK { + // Update metrics for successful send + metrics.LogsSent.Add(int64(payload.Count())) + metrics.TlmLogsSent.Add(float64(payload.Count())) + + // Handle acknowledgments - send successful payloads to auditor + if s.outputChan != nil { + select { + case s.outputChan <- payload: + // Success - no log to reduce noise + default: + log.Warnf("Worker %s: Auditor channel full, dropping ack for batch %d", s.workerID, batchID) + } + } else { + log.Warnf("āŒ Worker %s: outputChan is nil, cannot send ack for batch_id=%d", s.workerID, batchID) + } + } else { + log.Warnf("Worker %s: Received non-OK status for batch %d: %v", s.workerID, batchID, response.Status) + } + } +} + +// payloadToBatch converts a message payload to a StatefulBatch +func (s *StreamWorker) payloadToBatch(payload *message.Payload) *StatefulBatch { + batchID := s.batchIDCounter.Inc() + + batch := &StatefulBatch{ + BatchId: batchID, + Data: make([]*Datum, 0, payload.Count()), + } + + // Check if this is a pattern payload by looking at metadata tags + isPattern := false + for _, meta := range payload.MessageMetas { + for _, tag := range meta.ProcessingTags { + if tag == "data_type:pattern" { + isPattern = true + break + } + } + if isPattern { + break + } + } + + if isPattern { + // Handle pattern payload - hardcode for POC testing + datum := s.createHardcodedPatternDatum() + if datum != nil { + batch.Data = append(batch.Data, datum) + } + // Commented out to reduce log noise + // log.Infof("šŸ“¤ PATTERN BATCH SENT: %v", batch) + } else { + // Handle regular log payload + datum := &Datum{ + Data: &Datum_Logs{ + Logs: &Log{ + Content: &Log_Raw{ + Raw: string(payload.Encoded), // Send compressed data as-is + }, + }, + }, + } + batch.Data = append(batch.Data, datum) + } + + return batch +} + +// PatternPayload represents the JSON structure from dumb_strategy +type PatternPayload struct { + PatternID uint64 `json:"pattern_id"` + Pattern string `json:"pattern"` + ParamCount int `json:"param_count"` + WildcardPos []int `json:"wildcard_positions"` +} + +// createHardcodedPatternDatum creates a hardcoded pattern for POC testing +func (s *StreamWorker) createHardcodedPatternDatum() *Datum { + // log.Infof("Worker %s: Sending hardcoded pattern for POC testing", s.workerID) + + return &Datum{ + Data: &Datum_PatternDefine{ + PatternDefine: &PatternDefine{ + PatternId: 12345, + Template: "User * logged in from *", + ParamCount: 2, + PosList: []uint32{1, 4}, + }, + }, + } +} From 26b5c822c2977bb31696ca62ce111e43c571dc30 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Thu, 30 Oct 2025 10:02:57 -0400 Subject: [PATCH 02/16] updated merging --- AGENTS.md | 229 ++------ CLAUDE.md | 1 + pkg/logs/patterns/automaton/rules.go | 13 +- pkg/logs/patterns/automaton/rules_test.go | 504 ++++++++++++++++++ pkg/logs/patterns/automaton/tokenizer.go | 282 ++-------- pkg/logs/patterns/automaton/trie_test.go | 249 +++++++++ pkg/logs/patterns/clustering/cluster.go | 9 +- pkg/logs/patterns/clustering/cluster_test.go | 256 +++++++++ .../patterns/clustering/merging/merging.go | 121 +---- .../clustering/merging/merging_test.go | 199 +++---- pkg/logs/patterns/token/token.go | 195 +++---- pkg/logs/patterns/token/token_test.go | 88 +++ pkg/logs/patterns/token/tokenlist.go | 14 +- 13 files changed, 1362 insertions(+), 798 deletions(-) create mode 100644 pkg/logs/patterns/automaton/rules_test.go create mode 100644 pkg/logs/patterns/automaton/trie_test.go create mode 100644 pkg/logs/patterns/clustering/cluster_test.go create mode 100644 pkg/logs/patterns/token/token_test.go diff --git a/AGENTS.md b/AGENTS.md index 9f6fce30a3cb..5ae851e43fe3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,196 +1,33 @@ -# Datadog Agent - Project Overview for AI coding assistant - -## Project Summary -The Datadog Agent is a comprehensive monitoring and observability agent written primarily in Go. It collects metrics, traces, logs, and security events from systems and applications, forwarding them to the Datadog platform. This is the main repository for Agent versions 6 and 7. - -## Project Structure - -### Core Directories -- `/cmd/` - Entry points for various agent components - - `agent/` - Main agent binary - - `cluster-agent/` - Kubernetes cluster agent - - `dogstatsd/` - StatsD metrics daemon - - `trace-agent/` - APM trace collection agent - - `system-probe/` - System-level monitoring (eBPF) - - `security-agent/` - Security monitoring - - `process-agent/` - Process monitoring - -- `/pkg/` - Core Go packages and libraries - - `aggregator/` - Metrics aggregation - - `collector/` - Check scheduling and execution - - `config/` - Configuration management - - `logs/` - Log collection and processing - - `metrics/` - Metrics types and handling - - `network/` - Network monitoring - - `security/` - Security monitoring components - - `trace/` - APM tracing components - -- `/comp/` - Component-based architecture modules - - `core/` - Core components - - `metadata/` - Metadata collection - - `logs/` - Log components - - `trace/` - Trace components - -- `/tasks/` - Python invoke tasks for development - - Build, test, lint, and deployment automation - -- `/rtloader/` - Runtime loader for Python checks - -## Development Workflow - -### Common Commands - -#### Building - -```bash -# install dda on mac OS -brew install --cask dda - -# Install development tools -dda inv install-tools - -# Build the main agent -dda inv agent.build --build-exclude=systemd - -# Build specific components -dda inv dogstatsd.build -dda inv trace-agent.build -dda inv system-probe.build -``` - -#### Testing -```bash -# Run all tests -dda inv test - -# Test specific package -dda inv test --targets=./pkg/aggregator - -# Run Go linters -dda inv linter.go - -# Run all linters -dda inv linter.all -``` - -#### Running Locally -```bash -# Create dev config with testing API key -echo "api_key: 0000001" > dev/dist/datadog.yaml - -# Run the agent -./bin/agent/agent run -c bin/agent/dist/datadog.yaml -``` - -### Development Configuration -The development configuration file should be placed at `dev/dist/datadog.yaml`. After building, it gets copied to `bin/agent/dist/datadog.yaml`. - -## Key Components - -### Check System -- Checks are Python or Go modules that collect metrics -- Located in `cmd/agent/dist/checks/` -- Can be autodiscovered via Kubernetes annotations/labels - -### Configuration -- Main config: `datadog.yaml` -- Check configs: `conf.d/.d/conf.yaml` -- Supports environment variable overrides with `DD_` prefix - -## Testing Strategy - -### Unit Tests -- Go tests using standard `go test` -- Python tests using pytest -- Run with `dda inv test --targets=` - -### End-to-End Tests -- E2E framework in `test/new-e2e/` - -### Linting -- Go: golangci-lint via `dda inv linter.go` -- Python: various linters via `dda inv linter.python` -- YAML: yamllint -- Shell: shellcheck - -## Build System - -### Invoke Tasks -The project uses Python's Invoke framework with custom tasks. Main task categories: -- `agent.*` - Core agent tasks -- `test` - Testing tasks -- `linter.*` - Linting tasks -- `docker.*` - Docker image tasks -- `release.*` - Release management - -### Build Tags -Go build tags control feature inclusion, some examples are: -- `kubeapiserver` - Kubernetes API server support -- `containerd` - containerd support -- `docker` - Docker support -- `ebpf` - eBPF support -- `python` - Python check support -- and MANY more, refer to ./tasks/build_tags.py for a full reference. - -## Important Files - -### Configuration -- `datadog.yaml` - Main agent configuration -- `modules.yml` - Go module definitions -- `release.json` - Release version information -- `.gitlab-ci.yml` - CI/CD pipeline configuration - -### Documentation -- `/docs/` - Internal documentation -- `/docs/dev/` - Developer guides -- `README.md` - Project overview -- `CONTRIBUTING.md` - Contribution guidelines - -## CI/CD Pipeline - -### GitLab CI -- Primary CI system -- Defined in `.gitlab-ci.yml` and `.gitlab/` directory -- Runs tests, builds, and deployments - -### GitHub Actions -- Secondary CI for specific workflows -- Tests about the pull-request settings or repository configuration -- Release automation workflows - -## Security Considerations - -### Sensitive Data -- Never commit API keys or secrets -- Use secret backend for credentials - -## Module System -The project uses Go modules with multiple sub-modules. -TODO: Describe specific strategies for managing modules, including any invoke -tasks. - -## Platform Support -- **Linux**: Full support (amd64, arm64) -- **Windows**: Full support (Server 2016+, Windows 10+) -- **macOS**: Supported -- **AIX**: No support in this codebase -- **Container**: Docker, Kubernetes, ECS, containerd, and more - -## Best Practices - -1. **Always run linters before committing**: `dda inv linter.go` -2. **Always test your changes**: `dda inv test --targets=` -3. **Follow Go conventions**: Use gofmt, follow project structure -4. **Update documentation**: Keep docs in sync with code changes -6. **Check for security implications**: Review security-sensitive changes carefully - -## Troubleshooting Development Issues - -### Common Build Issues -- **Missing tools**: Run `dda inv install-tools` -- **CMake errors**: Remove `dda inv rtloader.clean` - -### Testing Issues -- **Flaky tests**: Check `flakes.yaml` for known issues -- **Coverage issues**: Use `--coverage` flag - +# Repository Guidelines + +## Project Structure & Module Organization +- `cmd/` hosts the binaries: `cmd/agent` for the core Agent, `cmd/cluster-agent`, `cmd/dogstatsd`, `cmd/trace-agent`, and eBPF tooling under `cmd/system-probe`. +- Shared Go packages live in `pkg/` (e.g., `pkg/aggregator`, `pkg/collector`, `pkg/config`), while componentized logic is in `comp/` for incremental adoption. +- Python invoke tasks reside in `tasks/`; docs and contributor references are under `docs/` and `docs/dev/`. +- Development configs live in `dev/dist/`; the main runtime config copies to `bin/agent/dist/datadog.yaml` after builds. + +## Build, Test, and Development Commands +- `dda inv install-tools` installs the Go, Python, and system tools required for local builds. +- `dda inv agent.build --build-exclude=systemd` produces the primary agent binary without systemd assets; swap in component-specific targets such as `dda inv dogstatsd.build` or `dda inv trace-agent.build` when iterating on those services. +- `dda inv test --targets=./pkg/aggregator` scopes unit tests to a package; omit `--targets` to exercise the full suite. +- `dda inv linter.go` runs `golangci-lint`; prefer `dda inv linter.all` before large merges to surface cross-language issues early. + +## Coding Style & Naming Conventions +- Format Go sources with `gofmt` (tabs for indentation, camelCase for identifiers) and rely on `golangci-lint` to enforce project rules. +- Python tooling in `tasks/` follows PEP 8; run `dda inv linter.python` if you touch those scripts. +- Favor descriptive package paths (`pkg/network/`, `comp/core/telemetry`) and snake_case filenames for YAML configs. + +## Testing Guidelines +- Go tests use the standard framework; function names must follow `TestXxx`. Table-driven subtests are preferred for coverage clarity. +- Python checks leverage `pytest`; mirror module names with `test_*.py` files. +- Investigate coverage gaps with `dda inv test --targets= --coverage` and document notable exclusions in the PR. + +## Commit & Pull Request Guidelines +- Recent history shows conventional prefixes (`feat:`, `fix:`, `docs:`) and ticket tags (`[CXP-####]`); follow that pattern and keep subjects under 72 characters. +- Reference issues in the body, outline testing performed, and attach logs or screenshots when UI or observability output changes. +- Pull requests should describe scope, risks, and rollout considerations; note configuration updates so reviewers can flag downstream impacts. + +## Security & Configuration Tips +- Never commit secrets; use the secret backend or redacted fixtures for tests. +- Store experimental configuration under `dev/` and guard runtime features with the appropriate Go build tags (see `tasks/build_tags.py`). +- Review changes touching `system-probe` or `security-agent` with dedicated owners—these components ship kernel-space code and warrant extra scrutiny. diff --git a/CLAUDE.md b/CLAUDE.md index 4e006f51f5fb..a41daf630e23 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,2 +1,3 @@ @AGENTS.md @CLAUDE_PERSONAL.md +@stateful_encoding_design.md \ No newline at end of file diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go index de799c13e3c1..1183cd7e797c 100644 --- a/pkg/logs/patterns/automaton/rules.go +++ b/pkg/logs/patterns/automaton/rules.go @@ -98,6 +98,7 @@ func (rm *RuleManager) RemoveRule(name string) bool { } // ApplyRules applies terminal rules in priority order to classify a token +// Returns TokenWord if no rule matches (generic word fallback) func (rm *RuleManager) ApplyRules(value string) token.TokenType { for _, rule := range rm.rules { if rule.Pattern.MatchString(value) { @@ -288,8 +289,8 @@ func GetPredefinedRules() []*TerminalRule { TokenType: token.TokenDate, Priority: PriorityHigh, Category: "time", - Description: "Matches RFC3339 datetime format with nanosecond precision", - Examples: []string{"2006-01-02T15:04:05.999999999Z07:00", "2024-12-25T14:30:00.123456789+02:00"}, + Description: "Supplementary pattern from multiline handler for edge-case RFC3339 formats", + Examples: []string{"2024-12-25T14:30:00.123456789Z07:00"}, }, { Name: "StandardTimestamp", @@ -317,8 +318,8 @@ func GetPredefinedRules() []*TerminalRule { TokenType: token.TokenDate, Priority: PriorityMedium, Category: "time", - Description: "Matches RFC1123Z datetime format with numeric timezone", - Examples: []string{"Mon, 02 Jan 2006 15:04:05 -0700", "Wed, 25 Dec 2024 14:30:00 +0200"}, + Description: "Supplementary pattern from multiline handler for RFC1123Z edge cases", + Examples: []string{"Mon, 02 Jan 2006 15:04:05 -0700", "Wed, 25 Dec 2024 14:30:00 -0800"}, }, { Name: "RFC850DateTime", @@ -344,8 +345,8 @@ func GetPredefinedRules() []*TerminalRule { TokenType: token.TokenDate, Priority: PriorityMedium, Category: "time", - Description: "Matches RFC822Z datetime format with numeric timezone", - Examples: []string{"02 Jan 06 15:04 -0700", "25 Dec 24 14:30 +0200"}, + Description: "Supplementary pattern from multiline handler for RFC822Z edge cases", + Examples: []string{"02 Jan 06 15:04 -0700", "25 Dec 24 14:30 -0800"}, }, // Medium Priority - Unix/System Formats diff --git a/pkg/logs/patterns/automaton/rules_test.go b/pkg/logs/patterns/automaton/rules_test.go new file mode 100644 index 000000000000..9c318d424100 --- /dev/null +++ b/pkg/logs/patterns/automaton/rules_test.go @@ -0,0 +1,504 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestNewRuleManager(t *testing.T) { + rm := NewRuleManager() + + if rm.rules == nil { + t.Error("Expected rules slice to be initialized") + } + if rm.categories == nil { + t.Error("Expected categories map to be initialized") + } + if len(rm.rules) != 0 { + t.Errorf("Expected empty rules slice, got %d rules", len(rm.rules)) + } +} + +func TestRuleManager_AddRule(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "TestIPv4", + `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, + "network", + "Test IPv4 pattern", + token.TokenIPv4, + 100, + []string{"192.168.1.1", "10.0.0.1"}, + ) + + if err != nil { + t.Fatalf("Failed to add rule: %v", err) + } + + if len(rm.rules) != 1 { + t.Errorf("Expected 1 rule, got %d", len(rm.rules)) + } + + rule := rm.rules[0] + if rule.Name != "TestIPv4" { + t.Errorf("Expected rule name 'TestIPv4', got '%s'", rule.Name) + } + if rule.TokenType != token.TokenIPv4 { + t.Errorf("Expected token type TokenIPv4, got %v", rule.TokenType) + } + if rule.Priority != 100 { + t.Errorf("Expected priority 100, got %d", rule.Priority) + } + if rule.Category != "network" { + t.Errorf("Expected category 'network', got '%s'", rule.Category) + } +} + +func TestRuleManager_AddRule_InvalidPattern(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "BadRule", + `[invalid(regex`, + "test", + "Invalid regex", + token.TokenWord, + 50, + []string{}, + ) + + if err == nil { + t.Error("Expected error for invalid regex pattern") + } +} + +func TestRuleManager_AddRule_InvalidExample(t *testing.T) { + rm := NewRuleManager() + + err := rm.AddRule( + "TestRule", + `^\d+$`, + "test", + "Numeric pattern", + token.TokenNumeric, + 50, + []string{"123", "abc"}, // "abc" doesn't match ^\d+$ + ) + + if err == nil { + t.Error("Expected error for example that doesn't match pattern") + } +} + +func TestRuleManager_RemoveRule(t *testing.T) { + rm := NewRuleManager() + + // Add a rule first + rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) + + if len(rm.rules) != 1 { + t.Fatalf("Expected 1 rule before removal") + } + + // Remove the rule + removed := rm.RemoveRule("TestRule") + if !removed { + t.Error("Expected RemoveRule to return true") + } + + if len(rm.rules) != 0 { + t.Errorf("Expected 0 rules after removal, got %d", len(rm.rules)) + } + + // Try to remove non-existent rule + removed = rm.RemoveRule("NonExistent") + if removed { + t.Error("Expected RemoveRule to return false for non-existent rule") + } +} + +func TestRuleManager_GetRule(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) + + rule := rm.GetRule("TestRule") + if rule == nil { + t.Fatal("Expected to find rule 'TestRule'") + } + if rule.Name != "TestRule" { + t.Errorf("Expected rule name 'TestRule', got '%s'", rule.Name) + } + + notFound := rm.GetRule("NonExistent") + if notFound != nil { + t.Error("Expected nil for non-existent rule") + } +} + +func TestRuleManager_PriorityOrdering(t *testing.T) { + rm := NewRuleManager() + + // Add rules in different priority order + rm.AddRule("Low", `low`, "test", "Low priority", token.TokenWord, 10, []string{"low"}) + rm.AddRule("High", `high`, "test", "High priority", token.TokenWord, 100, []string{"high"}) + rm.AddRule("Medium", `medium`, "test", "Medium priority", token.TokenWord, 50, []string{"medium"}) + + rules := rm.ListRules() + if len(rules) != 3 { + t.Fatalf("Expected 3 rules, got %d", len(rules)) + } + + // Should be ordered by priority (highest first) + expectedOrder := []string{"High", "Medium", "Low"} + expectedPriorities := []int{100, 50, 10} + + for i, rule := range rules { + if rule.Name != expectedOrder[i] { + t.Errorf("Rule %d: expected name '%s', got '%s'", i, expectedOrder[i], rule.Name) + } + if rule.Priority != expectedPriorities[i] { + t.Errorf("Rule %d: expected priority %d, got %d", i, expectedPriorities[i], rule.Priority) + } + } +} + +func TestRuleManager_ApplyRules(t *testing.T) { + rm := NewRuleManager() + + // Add rules with different priorities + rm.AddRule("IPv4", `^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, + "network", "IPv4", token.TokenIPv4, 100, []string{"192.168.1.1"}) + rm.AddRule("Numeric", `^\d+$`, "numeric", "Numbers", token.TokenNumeric, 30, []string{"123"}) + + tests := []struct { + input string + expected token.TokenType + }{ + {"192.168.1.1", token.TokenIPv4}, // Higher priority rule should match + {"123", token.TokenNumeric}, + {"999.999.999.999", token.TokenWord}, // Invalid IPv4, no rule matches - generic word + {"abc", token.TokenWord}, // No rule matches - generic word + } + + for _, test := range tests { + result := rm.ApplyRules(test.input) + if result != test.expected { + t.Errorf("ApplyRules('%s'): expected %v, got %v", test.input, test.expected, result) + } + } +} + +func TestRuleManager_GetRulesByCategory(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) + rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"}) + rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"}) + + networkRules := rm.GetRulesByCategory("network") + if len(networkRules) != 2 { + t.Errorf("Expected 2 network rules, got %d", len(networkRules)) + } + + numericRules := rm.GetRulesByCategory("numeric") + if len(numericRules) != 1 { + t.Errorf("Expected 1 numeric rule, got %d", len(numericRules)) + } + + emptyRules := rm.GetRulesByCategory("nonexistent") + if len(emptyRules) != 0 { + t.Errorf("Expected 0 rules for nonexistent category, got %d", len(emptyRules)) + } +} + +func TestRuleManager_GetCategories(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("Rule1", `r1`, "network", "Rule 1", token.TokenWord, 50, []string{"r1"}) + rm.AddRule("Rule2", `r2`, "time", "Rule 2", token.TokenWord, 50, []string{"r2"}) + rm.AddRule("Rule3", `r3`, "network", "Rule 3", token.TokenWord, 50, []string{"r3"}) + + categories := rm.GetCategories() + if len(categories) != 2 { + t.Errorf("Expected 2 categories, got %d", len(categories)) + } + + // Categories should be sorted + expectedCategories := []string{"network", "time"} + for i, expected := range expectedCategories { + if i >= len(categories) || categories[i] != expected { + t.Errorf("Expected category %d to be '%s', got '%s'", i, expected, categories[i]) + } + } +} + +func TestRuleManager_GetRuleStats(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) + rm.AddRule("Email", `email`, "network", "Email", token.TokenEmail, 90, []string{"email"}) + rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"}) + + stats := rm.GetRuleStats() + + if stats.TotalRules != 3 { + t.Errorf("Expected TotalRules=3, got %d", stats.TotalRules) + } + if stats.Categories != 2 { + t.Errorf("Expected Categories=2, got %d", stats.Categories) + } + if stats.ByCategory["network"] != 2 { + t.Errorf("Expected 2 network rules, got %d", stats.ByCategory["network"]) + } + if stats.ByCategory["numeric"] != 1 { + t.Errorf("Expected 1 numeric rule, got %d", stats.ByCategory["numeric"]) + } + if stats.ByTokenType[token.TokenIPv4] != 1 { + t.Errorf("Expected 1 IPv4 token rule, got %d", stats.ByTokenType[token.TokenIPv4]) + } +} + +func TestGetPredefinedRules(t *testing.T) { + rules := GetPredefinedRules() + + if len(rules) == 0 { + t.Error("Expected predefined rules to be non-empty") + } + + // Check that we have the expected rule types + foundRules := make(map[string]bool) + for _, rule := range rules { + foundRules[rule.Name] = true + + // Validate rule structure + if rule.Pattern == nil { + t.Errorf("Rule '%s' has nil pattern", rule.Name) + } + if rule.Name == "" { + t.Error("Found rule with empty name") + } + if rule.Category == "" { + t.Errorf("Rule '%s' has empty category", rule.Name) + } + if len(rule.Examples) == 0 { + t.Errorf("Rule '%s' has no examples", rule.Name) + } + + // Test examples against pattern + for _, example := range rule.Examples { + if !rule.Pattern.MatchString(example) { + t.Errorf("Rule '%s': example '%s' doesn't match pattern", rule.Name, example) + } + } + } + + expectedRules := []string{"IPv4Address", "EmailAddress", "URI", "HTTPStatus", "Numeric"} + for _, expected := range expectedRules { + if !foundRules[expected] { + t.Errorf("Expected predefined rule '%s' not found", expected) + } + } +} + +func TestRuleManager_LoadPredefinedRules(t *testing.T) { + rm := NewRuleManager() + + err := rm.LoadPredefinedRules() + if err != nil { + t.Fatalf("Failed to load predefined rules: %v", err) + } + + rules := rm.ListRules() + if len(rules) == 0 { + t.Error("Expected predefined rules to be loaded") + } + + // Verify some key rules exist + ipv4Rule := rm.GetRule("IPv4Address") + if ipv4Rule == nil { + t.Error("Expected IPv4Address rule to be loaded") + } + + emailRule := rm.GetRule("EmailAddress") + if emailRule == nil { + t.Error("Expected EmailAddress rule to be loaded") + } + + // Test that rules are working + result := rm.ApplyRules("192.168.1.1") + if result != token.TokenIPv4 { + t.Errorf("Expected IPv4 token for '192.168.1.1', got %v", result) + } + + result = rm.ApplyRules("test@example.com") + if result != token.TokenEmail { + t.Errorf("Expected Email token for 'test@example.com', got %v", result) + } +} + +// Test the priority management functions +func TestRuleManager_GetRuleByPriority(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("High1", `high1`, "test", "High 1", token.TokenWord, 100, []string{"high1"}) + rm.AddRule("High2", `high2`, "test", "High 2", token.TokenWord, 100, []string{"high2"}) + rm.AddRule("Medium", `medium`, "test", "Medium", token.TokenWord, 50, []string{"medium"}) + + highRules := rm.GetRuleByPriority(100) + if len(highRules) != 2 { + t.Errorf("Expected 2 rules with priority 100, got %d", len(highRules)) + } + + mediumRules := rm.GetRuleByPriority(50) + if len(mediumRules) != 1 { + t.Errorf("Expected 1 rule with priority 50, got %d", len(mediumRules)) + } + + noRules := rm.GetRuleByPriority(999) + if len(noRules) != 0 { + t.Errorf("Expected 0 rules with priority 999, got %d", len(noRules)) + } +} + +func TestRuleManager_GetHighestPriorityRules(t *testing.T) { + rm := NewRuleManager() + + // Empty rule manager + highRules := rm.GetHighestPriorityRules() + if len(highRules) != 0 { + t.Errorf("Expected 0 highest priority rules for empty manager, got %d", len(highRules)) + } + + rm.AddRule("High1", `high1`, "test", "High 1", token.TokenWord, 100, []string{"high1"}) + rm.AddRule("High2", `high2`, "test", "High 2", token.TokenWord, 100, []string{"high2"}) + rm.AddRule("Medium", `medium`, "test", "Medium", token.TokenWord, 50, []string{"medium"}) + + highRules = rm.GetHighestPriorityRules() + if len(highRules) != 2 { + t.Errorf("Expected 2 highest priority rules, got %d", len(highRules)) + } + + for _, rule := range highRules { + if rule.Priority != 100 { + t.Errorf("Expected priority 100, got %d", rule.Priority) + } + } +} + +func TestRuleManager_UpdateRulePriority(t *testing.T) { + rm := NewRuleManager() + + rm.AddRule("TestRule", `test`, "test", "Test", token.TokenWord, 50, []string{"test"}) + + err := rm.UpdateRulePriority("TestRule", 100) + if err != nil { + t.Fatalf("Failed to update rule priority: %v", err) + } + + rule := rm.GetRule("TestRule") + if rule == nil { + t.Fatal("Rule not found after priority update") + } + if rule.Priority != 100 { + t.Errorf("Expected priority 100, got %d", rule.Priority) + } + + // Test updating non-existent rule + err = rm.UpdateRulePriority("NonExistent", 200) + if err == nil { + t.Error("Expected error when updating non-existent rule") + } +} + +func TestRuleManager_CategoryDescription(t *testing.T) { + rm := NewRuleManager() + + // Test empty description + desc := rm.GetCategoryDescription("network") + if desc != "" { + t.Errorf("Expected empty description for non-existent category, got '%s'", desc) + } + + // Set category description + rm.SetCategoryDescription("network", "Network-related rules") + desc = rm.GetCategoryDescription("network") + if desc != "Network-related rules" { + t.Errorf("Expected 'Network-related rules', got '%s'", desc) + } + + // Add a rule to existing category and check description is preserved + rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) + desc = rm.GetCategoryDescription("network") + if desc != "Network-related rules" { + t.Errorf("Expected description to be preserved, got '%s'", desc) + } + + // Update existing category description + rm.SetCategoryDescription("network", "Updated network description") + desc = rm.GetCategoryDescription("network") + if desc != "Updated network description" { + t.Errorf("Expected 'Updated network description', got '%s'", desc) + } +} + +// Test global functions that provide external access to terminal rules +func TestGlobalTerminalRuleFunctions(t *testing.T) { + // Test GetTerminalRules + rules := GetTerminalRules() + if len(rules) == 0 { + t.Error("Expected GetTerminalRules to return non-empty list") + } + + // Test GetRulesByCategory + networkRules := GetRulesByCategory("network") + if len(networkRules) == 0 { + t.Error("Expected GetRulesByCategory('network') to return rules") + } + + // Test GetRuleCategories + categories := GetRuleCategories() + if len(categories) == 0 { + t.Error("Expected GetRuleCategories to return non-empty list") + } + + // Test AddTerminalRule + err := AddTerminalRule( + "TestGlobalRule", + `^test$`, + "test", + "Global test rule", + token.TokenWord, + 25, + []string{"test"}, + ) + if err != nil { + t.Errorf("Failed to add terminal rule: %v", err) + } + + // Verify the rule was added + allRules := GetTerminalRules() + found := false + for _, rule := range allRules { + if rule.Name == "TestGlobalRule" { + found = true + break + } + } + if !found { + t.Error("TestGlobalRule not found after adding") + } + + // Test GetRuleStats + stats := GetRuleStats() + if stats.TotalRules == 0 { + t.Error("Expected GetRuleStats to return non-zero total rules") + } +} diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go index f99c56f19688..84eae7ae8ff9 100644 --- a/pkg/logs/patterns/automaton/tokenizer.go +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -8,7 +8,6 @@ package automaton import ( - "regexp" "unicode" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" @@ -55,7 +54,7 @@ func (t *Tokenizer) Tokenize() *token.TokenList { } } - t.flushBuffer() + t.handleLastToken() t.classifyTokens() return token.NewTokenListWithTokens(t.tokens) @@ -69,18 +68,26 @@ func (t *Tokenizer) classifyTokens() { continue } + // Skip classification for punctuation (already marked as NotWildcard in createSpecialToken) + if tok.Wildcard == token.NotWildcard { + continue + } + classifiedType := t.classifyToken(tok.Value) - if classifiedType == token.TokenUnknown { + + // If classification returns TokenWord or TokenUnknown, keep current state + // TokenWord = "generic word, no specific classification" + // TokenUnknown = "should not happen, but keep current state" + if classifiedType == token.TokenWord || classifiedType == token.TokenUnknown { continue } - // Update token type + // Update token type to the more specific classification t.tokens[i].Type = classifiedType - // Parse date components for date tokens - if classifiedType == token.TokenDate { - t.tokens[i].DateInfo = parseDateComponents(tok.Value) - } + // Set wildcard potential based on classified type + t.tokens[i].Wildcard = getWildcardPotential(classifiedType) + } } @@ -186,209 +193,23 @@ func (t *Tokenizer) classifyToken(value string) token.TokenType { return globalTrie.Match(value) } -// parseDateComponents extracts structural information from date strings -// Uses the same comprehensive patterns as the multiline aggregation package -func parseDateComponents(dateStr string) *token.DateComponents { - // Comprehensive date patterns from multiline aggregation package - patterns := []struct { - regex *regexp.Regexp - format string - parser func([]string) *token.DateComponents - }{ - // RFC3339: 2006-01-02T15:04:05Z07:00 - { - regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})(\.\d+)?(Z|[\+\-]\d{2}:?\d{2})?`), - "RFC3339", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Year: matches[1], Month: matches[2], Day: matches[3], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "RFC3339", - } - }, - }, - // Standard timestamp: 2021-07-08 05:08:19,214 - { - regexp.MustCompile(`^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(,\d+)?`), - "YYYY-MM-DD HH:mm:ss", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Year: matches[1], Month: matches[2], Day: matches[3], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "YYYY-MM-DD HH:mm:ss", - } - }, - }, - // Date only: 2021-01-31 (with strict month/day validation) - { - regexp.MustCompile(`^(\d{4})-(1[012]|0?[1-9])-([12][0-9]|3[01]|0?[1-9])`), - "YYYY-MM-DD", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Year: matches[1], Month: matches[2], Day: matches[3], - Format: "YYYY-MM-DD", - } - }, - }, - // Slash format: 2023/02/20 14:33:24 - { - regexp.MustCompile(`^(\d{4})/(\d{2})/(\d{2}) (\d{2}):(\d{2}):(\d{2})`), - "YYYY/MM/DD HH:mm:ss", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Year: matches[1], Month: matches[2], Day: matches[3], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "YYYY/MM/DD HH:mm:ss", - } - }, - }, - // Java SimpleFormatter: January 31, 2021 2:30:45 PM - { - regexp.MustCompile(`^([A-Za-z_]+) (\d+), (\d{4}) (\d+):(\d+):(\d+) (AM|PM)`), - "Month DD, YYYY HH:mm:ss AM/PM", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Month: matches[1], Day: matches[2], Year: matches[3], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "Month DD, YYYY HH:mm:ss AM/PM", - } - }, - }, - // ANSIC: Mon Jan _2 15:04:05 2006 - { - regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+) (\d+)`), - "ANSIC", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Month: matches[2], Day: matches[3], Year: matches[7], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "ANSIC", - } - }, - }, - // UnixDate: Mon Jan _2 15:04:05 MST 2006 - { - regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) +(\d+) (\d+):(\d+):(\d+)( [A-Za-z_]+ (\d+))?`), - "UnixDate", - func(matches []string) *token.DateComponents { - year := matches[7] - if year == "" && len(matches) > 8 { - year = matches[8] - } - return &token.DateComponents{ - Month: matches[2], Day: matches[3], Year: year, - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "UnixDate", - } - }, - }, - // RubyDate: Mon Jan 02 15:04:05 -0700 2006 - { - regexp.MustCompile(`^([A-Za-z_]+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([\-\+]\d+) (\d+)`), - "RubyDate", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Month: matches[2], Day: matches[3], Year: matches[8], - Hour: matches[4], Minute: matches[5], Second: matches[6], - Format: "RubyDate", - } - }, - }, - // RFC822: 02 Jan 06 15:04 MST - { - regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) ([A-Za-z_]+)`), - "RFC822", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Day: matches[1], Month: matches[2], Year: matches[3], - Hour: matches[4], Minute: matches[5], - Format: "RFC822", - } - }, - }, - // RFC822Z: 02 Jan 06 15:04 -0700 - { - regexp.MustCompile(`^(\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+) (-\d+)`), - "RFC822Z", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Day: matches[1], Month: matches[2], Year: matches[3], - Hour: matches[4], Minute: matches[5], - Format: "RFC822Z", - } - }, - }, - // RFC850: Monday, 02-Jan-06 15:04:05 MST - { - regexp.MustCompile(`^([A-Za-z_]+), (\d+)-([A-Za-z_]+)-(\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), - "RFC850", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Day: matches[2], Month: matches[3], Year: matches[4], - Hour: matches[5], Minute: matches[6], Second: matches[7], - Format: "RFC850", - } - }, - }, - // RFC1123: Mon, 02 Jan 2006 15:04:05 MST - { - regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) ([A-Za-z_]+)`), - "RFC1123", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Day: matches[2], Month: matches[3], Year: matches[4], - Hour: matches[5], Minute: matches[6], Second: matches[7], - Format: "RFC1123", - } - }, - }, - // RFC1123Z: Mon, 02 Jan 2006 15:04:05 -0700 - { - regexp.MustCompile(`^([A-Za-z_]+), (\d+) ([A-Za-z_]+) (\d+) (\d+):(\d+):(\d+) (-\d+)`), - "RFC1123Z", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Day: matches[2], Month: matches[3], Year: matches[4], - Hour: matches[5], Minute: matches[6], Second: matches[7], - Format: "RFC1123Z", - } - }, - }, - // RFC3339Nano: 2006-01-02T15:04:05.999999999Z07:00 - { - regexp.MustCompile(`^(\d+)-(\d+)-(\d+)([A-Za-z_]+)(\d+):(\d+):(\d+)\.(\d+)([A-Za-z_]+)(\d+):(\d+)`), - "RFC3339Nano", - func(matches []string) *token.DateComponents { - return &token.DateComponents{ - Year: matches[1], Month: matches[2], Day: matches[3], - Hour: matches[5], Minute: matches[6], Second: matches[7], - Format: "RFC3339Nano", - } - }, - }, +// getWildcardPotential determines if a token type can potentially become a wildcard +// Returns either NotWildcard (0%) or PotentialWildcard (50%) +// Note: IsWildcard (100%) is only set during pattern merging, never during tokenization +func getWildcardPotential(tokenType token.TokenType) token.WildcardStatus { + // Only whitespace cannot become a wildcard + if tokenType == token.TokenWhitespace { + return token.NotWildcard } - for _, pattern := range patterns { - if matches := pattern.regex.FindStringSubmatch(dateStr); matches != nil { - return pattern.parser(matches) - } - } - - return nil // Couldn't parse -} - -// hasNumericPattern checks if a word contains numbers -func hasNumericPattern(word string) bool { - return regexp.MustCompile(`\d`).MatchString(word) -} - -// shouldSetPossiblyWildcard determines if a token should have the possiblyWildcard flag -// Words with numeric patterns (user123, admin456) can be wildcarded during merging -func shouldSetPossiblyWildcard(tokenType token.TokenType, value string) bool { - return tokenType == token.TokenWord && hasNumericPattern(value) + // Everything else can potentially become wildcards + // Dates wildcard if they have the same format (both TokenDate means same structure) + return token.PotentialWildcard } +// ================================================ // Helper functions +// ================================================ // isURLScheme checks if current buffer looks like a URL scheme func (t *Tokenizer) isURLScheme() bool { @@ -414,52 +235,55 @@ func (t *Tokenizer) bufferToString() string { return string(t.buffer) } -func (t *Tokenizer) flushBuffer() { +func (t *Tokenizer) handleLastToken() { if len(t.buffer) > 0 { - // Create remaining content as word token - t.createWordToken() + // Create token from remaining buffer content based on current state + switch t.state { + case StateNumeric: + t.createNumericToken() + case StateWhitespace: + t.createWhitespaceToken() + case StateSpecial: + t.createSpecialToken() + default: + t.createWordToken() + } } } +// ================================================ // Token creation methods +// ================================================ func (t *Tokenizer) createWordToken() { value := t.bufferToString() - tokenType := t.classifyToken(value) - - tok := token.NewTokenWithFlags(tokenType, value, false, shouldSetPossiblyWildcard(tokenType, value)) + // Create as basic Word type - classification happens later in classifyTokens() + tok := token.NewToken(token.TokenWord, value, token.PotentialWildcard) t.tokens = append(t.tokens, tok) t.clearBuffer() } func (t *Tokenizer) createNumericToken() { value := t.bufferToString() - t.tokens = append(t.tokens, token.Token{ - Type: token.TokenNumeric, - Value: value, - PossiblyWildcard: true, // Numeric tokens can be merged (25 vs 62 → *) - }) + // Numeric tokens are potential wildcards - will be classified later + tok := token.NewToken(token.TokenNumeric, value, token.PotentialWildcard) + t.tokens = append(t.tokens, tok) t.clearBuffer() } func (t *Tokenizer) createWhitespaceToken() { value := t.bufferToString() - t.tokens = append(t.tokens, token.Token{ - Type: token.TokenWhitespace, - Value: value, - PossiblyWildcard: false, // Whitespace tokens are not mergeable - }) + // Whitespace never becomes wildcard + tok := token.NewToken(token.TokenWhitespace, value, token.NotWildcard) + t.tokens = append(t.tokens, tok) t.clearBuffer() } func (t *Tokenizer) createSpecialToken() { value := t.bufferToString() - tokenType := t.classifyToken(value) - - t.tokens = append(t.tokens, token.Token{ - Type: tokenType, - Value: value, - PossiblyWildcard: shouldSetPossiblyWildcard(tokenType, value), - }) + // Special characters (punctuation, symbols) should not wildcard - only merge if identical + // Examples: ":", "[", "@" - structural markers that must stay consistent + tok := token.NewToken(token.TokenWord, value, token.NotWildcard) + t.tokens = append(t.tokens, tok) t.clearBuffer() } diff --git a/pkg/logs/patterns/automaton/trie_test.go b/pkg/logs/patterns/automaton/trie_test.go new file mode 100644 index 000000000000..1bcbf3dcf2cd --- /dev/null +++ b/pkg/logs/patterns/automaton/trie_test.go @@ -0,0 +1,249 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestGlobalTrie_ExactMatch(t *testing.T) { + tests := []struct { + input string + expected token.TokenType + }{ + {"GET", token.TokenHttpMethod}, + {"POST", token.TokenHttpMethod}, + {"ERROR", token.TokenSeverityLevel}, + {"INFO", token.TokenSeverityLevel}, + {"debug", token.TokenSeverityLevel}, // lowercase + {" ", token.TokenWhitespace}, + {"\t", token.TokenWhitespace}, + {"unknown", token.TokenWord}, // no rule matches - generic word + } + + for _, test := range tests { + result := globalTrie.Match(test.input) + if result != test.expected { + t.Errorf("globalTrie.Match('%s'): expected %v, got %v", + test.input, test.expected, result) + } + } +} + +func TestGlobalTrie_TerminalRules(t *testing.T) { + tests := []struct { + input string + expected token.TokenType + }{ + {"200", token.TokenHttpStatus}, + {"404", token.TokenHttpStatus}, + {"500", token.TokenHttpStatus}, + {"192.168.1.1", token.TokenIPv4}, + {"10.0.0.1", token.TokenIPv4}, + {"test@example.com", token.TokenEmail}, + {"user@domain.org", token.TokenEmail}, + {"/api/users", token.TokenAbsolutePath}, + {"/var/log/app.log", token.TokenAbsolutePath}, + {"2023-12-25", token.TokenDate}, + {"2023-12-25T14:30:00", token.TokenDate}, + {"1234", token.TokenNumeric}, // 4 digits won't match HTTP status + {"0", token.TokenNumeric}, + {"https://example.com", token.TokenURI}, + {"http://api.domain.com/path", token.TokenURI}, + } + + for _, test := range tests { + result := globalTrie.Match(test.input) + if result != test.expected { + t.Errorf("globalTrie.Match('%s'): expected %v, got %v", + test.input, test.expected, result) + } + } +} + +func TestTrieStats(t *testing.T) { + stats := globalTrie.GetStats() + + if stats.ExactPatterns == 0 { + t.Error("Expected some exact patterns in trie") + } + if stats.TerminalRules == 0 { + t.Error("Expected some terminal rules") + } + if stats.TrieNodes == 0 { + t.Error("Expected some trie nodes") + } + + t.Logf("Trie Stats: %d exact patterns, %d terminal rules, %d nodes, max depth %d", + stats.ExactPatterns, stats.TerminalRules, stats.TrieNodes, stats.MaxDepth) +} + +func TestTrie_AddExactPattern(t *testing.T) { + // Create a new trie for testing + testTrie := NewTrie() + + // Add a custom pattern + testTrie.AddExactPattern("CUSTOM", token.TokenWord) + + // Test that it matches + result := testTrie.Match("CUSTOM") + if result != token.TokenWord { + t.Errorf("Expected TokenWord for 'CUSTOM', got %v", result) + } + + // Test that unknown patterns fall back to TokenWord (generic word) + result = testTrie.Match("unknown") + if result != token.TokenWord { + t.Errorf("Expected TokenWord for 'unknown', got %v", result) + } +} + +func TestTrie_AddTerminalRule(t *testing.T) { + // Test adding terminal rule to global rule manager instead + err := AddTerminalRule( + "TestRule", + `^TEST\d+$`, + "test", + "Test rule for testing", + token.TokenNumeric, + PriorityHigh, // Higher priority than existing rules + []string{"TEST123"}, + ) + if err != nil { + t.Fatalf("Failed to add terminal rule: %v", err) + } + + // Test that it matches using global trie + result := globalTrie.Match("TEST123") + if result != token.TokenNumeric { + t.Errorf("Expected TokenNumeric for 'TEST123', got %v", result) + } + + // Test that non-matching patterns don't match + result = globalTrie.Match("TESTXYZ") + if result == token.TokenNumeric { + t.Error("Should not match non-numeric pattern") + } + + // Clean up - remove the test rule + globalRuleManager.RemoveRule("TestRule") +} + +func TestTrie_InvalidTerminalRule(t *testing.T) { + // Try to add invalid regex to global rule manager + err := AddTerminalRule( + "InvalidRule", + `[invalid(regex`, + "test", + "Invalid rule", + token.TokenWord, + PriorityMedium, + []string{}, + ) + if err == nil { + t.Error("Expected error for invalid regex pattern") + } +} + +func TestTrie_ExactMatchPriority(t *testing.T) { + testTrie := NewTrie() + + // Add exact pattern + testTrie.AddExactPattern("TEST", token.TokenWord) + + // Add terminal rule that would also match + testTrie.AddTerminalRule(`^TEST$`, token.TokenNumeric, PriorityHigh) + + // Exact match should take priority + result := testTrie.Match("TEST") + if result != token.TokenWord { + t.Errorf("Exact match should take priority, expected TokenWord, got %v", result) + } +} + +func TestTrie_EmptyInput(t *testing.T) { + result := globalTrie.Match("") + if result != token.TokenUnknown { + t.Errorf("Empty input should return TokenUnknown, got %v", result) + } +} + +func TestValidationFunctions(t *testing.T) { + // Test IPv4 validation + validIPv4 := []string{"192.168.1.1", "10.0.0.1", "255.255.255.255", "0.0.0.0"} + invalidIPv4 := []string{"256.1.1.1", "192.168.1", "192.168.1.1.1", "abc.def.ghi.jkl"} + + for _, ip := range validIPv4 { + if !validateIPv4(ip) { + t.Errorf("validateIPv4('%s') should return true", ip) + } + } + + for _, ip := range invalidIPv4 { + if validateIPv4(ip) { + t.Errorf("validateIPv4('%s') should return false", ip) + } + } + + // Test email validation + validEmails := []string{"test@example.com", "user@domain.org", "admin@company.co.uk"} + invalidEmails := []string{"invalid", "test@", "@domain.com", "test@@domain.com"} + + for _, email := range validEmails { + if !validateEmail(email) { + t.Errorf("validateEmail('%s') should return true", email) + } + } + + for _, email := range invalidEmails { + if validateEmail(email) { + t.Errorf("validateEmail('%s') should return false", email) + } + } + + // Test date validation + validDates := []string{"2023-12-25", "2023-12-25T14:30:00", "12/25/2023", "2023-12-25T14:30:00.123Z"} + invalidDates := []string{"invalid", "123", "abc", ""} + + for _, date := range validDates { + if !validateDate(date) { + t.Errorf("validateDate('%s') should return true", date) + } + } + + for _, date := range invalidDates { + if validateDate(date) { + t.Errorf("validateDate('%s') should return false", date) + } + } +} + +func TestTrieNodeStructure(t *testing.T) { + testTrie := NewTrie() + testTrie.AddExactPattern("ABC", token.TokenWord) + + // Verify trie structure + stats := testTrie.GetStats() + if stats.TrieNodes < 4 { // root + A + B + C + t.Errorf("Expected at least 4 trie nodes, got %d", stats.TrieNodes) + } + if stats.ExactPatterns < 1 { + t.Errorf("Expected at least 1 exact pattern, got %d", stats.ExactPatterns) + } +} + +func TestTrieDepthCalculation(t *testing.T) { + testTrie := NewTrie() + testTrie.AddExactPattern("A", token.TokenWord) + testTrie.AddExactPattern("ABCDEFGHIJ", token.TokenWord) // 10 chars deep + + stats := testTrie.GetStats() + if stats.MaxDepth < 10 { + t.Errorf("Expected max depth >= 10, got %d", stats.MaxDepth) + } +} diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index f49ce1cbd7c3..0e5473a9d591 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -129,7 +129,7 @@ func (c *Cluster) GeneratePattern() *token.TokenList { for i := 0; i < pattern.Length(); i++ { tok := pattern.Tokens[i] - if tok.IsWildcard { + if tok.Wildcard == token.IsWildcard { c.WildcardMap[i] = true // Special handling for path wildcards @@ -201,7 +201,12 @@ func (c *Cluster) GetPatternString() string { var parts []string for _, tok := range c.Pattern.Tokens { - parts = append(parts, tok.Value) + // Use "*" for wildcard positions, actual value otherwise + if tok.Wildcard == token.IsWildcard { + parts = append(parts, "*") + } else { + parts = append(parts, tok.Value) + } } return strings.Join(parts, "") } diff --git a/pkg/logs/patterns/clustering/cluster_test.go b/pkg/logs/patterns/clustering/cluster_test.go new file mode 100644 index 000000000000..f6152bc46067 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_test.go @@ -0,0 +1,256 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestCluster_NewCluster(t *testing.T) { + // Create a simple TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + cluster := NewCluster(signature, tokenList) + + if cluster.Size() != 1 { + t.Errorf("Expected cluster size 1, got %d", cluster.Size()) + } + + if !cluster.Signature.Equals(signature) { + t.Error("Cluster signature doesn't match expected signature") + } + + if cluster.Pattern != nil { + t.Error("Pattern should be nil initially (computed lazily)") + } +} + +func TestCluster_Add(t *testing.T) { + // Create first TokenList + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList1 := token.NewTokenListWithTokens(tokens1) + signature1 := token.NewSignature(tokenList1) + + cluster := NewCluster(signature1, tokenList1) + + // Create second TokenList with same signature but different values + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + } + tokenList2 := token.NewTokenListWithTokens(tokens2) + + // Should add successfully (same signature) + if !cluster.Add(tokenList2) { + t.Error("Failed to add TokenList with matching signature") + } + + if cluster.Size() != 2 { + t.Errorf("Expected cluster size 2, got %d", cluster.Size()) + } + + // Create third TokenList with different signature + tokens3 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + tokenList3 := token.NewTokenListWithTokens(tokens3) + + // Should fail to add (different signature) + if cluster.Add(tokenList3) { + t.Error("Should not add TokenList with different signature") + } + + if cluster.Size() != 2 { + t.Errorf("Expected cluster size to remain 2, got %d", cluster.Size()) + } +} + +func TestCluster_GeneratePattern_NoWildcards(t *testing.T) { + // Create cluster with identical TokenLists + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList1 := token.NewTokenListWithTokens(tokens) + tokenList2 := token.NewTokenListWithTokens(tokens) // Identical + + cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) + cluster.Add(tokenList2) + + pattern := cluster.GeneratePattern() + + if pattern == nil { + t.Fatal("Pattern should not be nil") + } + + // Should have no wildcards since all values are identical + if cluster.HasWildcards() { + t.Error("Should not have wildcards for identical TokenLists") + } + + // Pattern should match original tokens + if pattern.Length() != 3 { + t.Errorf("Expected pattern length 3, got %d", pattern.Length()) + } + + if pattern.Tokens[0].Value != "GET" { + t.Errorf("Expected first token 'GET', got '%s'", pattern.Tokens[0].Value) + } +} + +func TestCluster_GeneratePattern_WithWildcards(t *testing.T) { + // Create cluster with different values at some positions + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, // Different value + {Value: " ", Type: token.TokenWhitespace}, // Same value + {Value: "/users", Type: token.TokenAbsolutePath}, // Different value + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) + cluster.Add(tokenList2) + + pattern := cluster.GeneratePattern() + + if pattern == nil { + t.Fatal("Pattern should not be nil") + } + + // Should have wildcards at positions 0 and 2 + if !cluster.HasWildcards() { + t.Error("Should have wildcards for different values") + } + + wildcardPositions := cluster.GetWildcardPositions() + expectedPositions := map[int]bool{0: true, 2: true} + + if len(wildcardPositions) != 2 { + t.Errorf("Expected 2 wildcard positions, got %d", len(wildcardPositions)) + } + + for _, pos := range wildcardPositions { + if !expectedPositions[pos] { + t.Errorf("Unexpected wildcard position: %d", pos) + } + } + + // Check pattern tokens + // Position 0: Wildcard token (empty value, Wildcard field indicates status) + if pattern.Tokens[0].Wildcard != token.IsWildcard { + t.Error("Position 0 should be a wildcard") + } + + if pattern.Tokens[1].Value != " " || pattern.Tokens[1].Wildcard == token.IsWildcard { + t.Error("Position 1 should not be a wildcard") + } + + // Position 2: Path wildcard (special case - value is set to path pattern) + if pattern.Tokens[2].Value != "/*" || pattern.Tokens[2].Wildcard != token.IsWildcard { + t.Error("Position 2 should be a wildcard with path pattern") + } +} + +func TestCluster_GeneratePattern_SingleTokenList(t *testing.T) { + // Create cluster with single TokenList + tokens := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + tokenList := token.NewTokenListWithTokens(tokens) + + cluster := NewCluster(token.NewSignature(tokenList), tokenList) + pattern := cluster.GeneratePattern() + + if pattern == nil { + t.Fatal("Pattern should not be nil") + } + + // Single TokenList should have no wildcards + if cluster.HasWildcards() { + t.Error("Single TokenList should not have wildcards") + } + + // Pattern should be identical to original + if pattern.Length() != tokenList.Length() { + t.Error("Pattern length should match original TokenList") + } + + for i, tok := range pattern.Tokens { + if tok.Value != tokenList.Tokens[i].Value { + t.Errorf("Pattern token %d value mismatch: expected '%s', got '%s'", + i, tokenList.Tokens[i].Value, tok.Value) + } + } +} + +func TestCluster_GeneratePattern_Caching(t *testing.T) { + // Create cluster + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) + cluster.Add(tokenList2) + + // Generate pattern twice + pattern1 := cluster.GeneratePattern() + pattern2 := cluster.GeneratePattern() + + // Should return the same cached instance + if pattern1 != pattern2 { + t.Error("Pattern should be cached and return same instance") + } + + // Add another TokenList - should invalidate cache + tokens3 := []token.Token{ + {Value: "PUT", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/items", Type: token.TokenAbsolutePath}, + } + tokenList3 := token.NewTokenListWithTokens(tokens3) + cluster.Add(tokenList3) + + pattern3 := cluster.GeneratePattern() + + // Should be a new instance (cache was invalidated) + if pattern1 == pattern3 { + t.Error("Pattern cache should be invalidated after adding new TokenList") + } +} diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go index aae8bcd66842..b10f2a316615 100644 --- a/pkg/logs/patterns/clustering/merging/merging.go +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -9,8 +9,6 @@ package merging import ( - "strings" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) @@ -35,7 +33,7 @@ func ShouldProtectPosition(position int, tokenType token.TokenType) bool { // CanMergeTokenLists checks if two TokenLists can be merged into a unified pattern. // Returns true only if all token positions are either identical or mergeable according -// to their mergeability levels and protection rules. +// to their comparison results and protection rules. func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { if tl1.Length() != tl2.Length() { return false @@ -45,20 +43,20 @@ func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { tok1 := &tl1.Tokens[i] tok2 := &tl2.Tokens[i] - level := tok1.GetMergeabilityLevel(tok2) + result := tok1.Compare(tok2) - // If tokens match exactly, continue - if level == token.FitsAsItIs { - continue + // If tokens conflict, reject + if result == token.Conflict { + return false } - // If tokens can't merge at all, reject - if !level.IsMergeable() { - return false + // If tokens are identical, continue + if result == token.Identical { + continue } - // Check protection rules - if position is protected and tokens differ, reject - if ShouldProtectPosition(i, tok1.Type) { + // For wildcard result, check protection rules + if result == token.Wildcard && ShouldProtectPosition(i, tok1.Type) { return false } } @@ -70,7 +68,7 @@ func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { // with wildcards at positions where tokens differ but are mergeable. // Returns nil if the TokenLists cannot be merged. func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { - if !CanMergeTokenLists(tl1, tl2) { + if tl1.Length() != tl2.Length() { return nil } @@ -80,99 +78,28 @@ func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { tok1 := &tl1.Tokens[i] tok2 := &tl2.Tokens[i] - level := tok1.GetMergeabilityLevel(tok2) + result := tok1.Compare(tok2) - if level == token.FitsAsItIs { - // Tokens are identical, keep as-is - merged.Add(*tok1) - continue - } + switch result { + case token.Conflict: + return nil // Abort entire merge + + case token.Identical: + merged.Add(*tok1) // Keep same - // Handle different merge types - switch level { - case token.MergeableWithWiderRange: - // Special handling for structured tokens (e.g., dates with partial wildcards) - if tok1.Type == token.TokenDate && tok1.DateInfo != nil && tok2.DateInfo != nil { - merged.Add(createPartialDateWildcard(tok1.DateInfo, tok2.DateInfo)) - } else { - // Fallback to full wildcard - merged.AddWildcardToken(tok1.Type) + case token.Wildcard: + // Check protection rules before wildcarding + if ShouldProtectPosition(i, tok1.Type) { + return nil // Abort: protected position cannot be wildcarded } - case token.MergeableAsWildcard: - // Create a full wildcard for this position - merged.AddWildcardToken(tok1.Type) - default: - // Shouldn't reach here if CanMergeTokenLists passed, but be defensive - merged.Add(*tok1) + // Create wildcard, preserving the first token's value as representative + merged.AddToken(tok1.Type, tok1.Value, token.IsWildcard) } } return merged } -// createPartialDateWildcard creates a date token with wildcards in differing components. -// This allows for more precise patterns like "2024-01-* 10:30:45" instead of just "*". -func createPartialDateWildcard(d1, d2 *token.DateComponents) token.Token { - // Create a pattern where differing components become wildcards - var pattern strings.Builder - - switch d1.Format { - case "RFC3339", "ISO8601": - // Format: YYYY-MM-DDTHH:MM:SS - if d1.Year == d2.Year { - pattern.WriteString(d1.Year) - } else { - pattern.WriteString("*") - } - pattern.WriteString("-") - - if d1.Month == d2.Month { - pattern.WriteString(d1.Month) - } else { - pattern.WriteString("*") - } - pattern.WriteString("-") - - if d1.Day == d2.Day { - pattern.WriteString(d1.Day) - } else { - pattern.WriteString("*") - } - pattern.WriteString("T") - - if d1.Hour == d2.Hour { - pattern.WriteString(d1.Hour) - } else { - pattern.WriteString("*") - } - pattern.WriteString(":") - - if d1.Minute == d2.Minute { - pattern.WriteString(d1.Minute) - } else { - pattern.WriteString("*") - } - pattern.WriteString(":") - - if d1.Second == d2.Second { - pattern.WriteString(d1.Second) - } else { - pattern.WriteString("*") - } - - default: - // For other formats, just use a full wildcard - return token.NewWildcardToken(token.TokenDate) - } - - return token.Token{ - Type: token.TokenDate, - Value: pattern.String(), - IsWildcard: true, - DateInfo: d1, // Keep the first date's structure for reference - } -} - // FindMergeableGroups analyzes a list of TokenLists and groups them by mergeability. // This is used to detect heterogeneous clusters that should be split into multiple patterns. // Returns a list of groups where each group contains mutually mergeable TokenLists. diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go index 305fc443733f..355655e5eab4 100644 --- a/pkg/logs/patterns/clustering/merging/merging_test.go +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -56,15 +56,15 @@ func TestShouldProtectPosition(t *testing.T) { func TestCanMergeTokenLists_IdenticalLists(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "world"), + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "world"), + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), }) assert.True(t, CanMergeTokenLists(tl1, tl2)) @@ -72,15 +72,15 @@ func TestCanMergeTokenLists_IdenticalLists(t *testing.T) { func TestCanMergeTokenLists_PossiblyWildcardTokens(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), }) assert.True(t, CanMergeTokenLists(tl1, tl2)) @@ -89,15 +89,15 @@ func TestCanMergeTokenLists_PossiblyWildcardTokens(t *testing.T) { func TestCanMergeTokenLists_GenericWords(t *testing.T) { // Generic words without possiblyWildcard flag should not merge tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "bob"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "likes"), + token.NewToken(token.TokenWord, "bob", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "cat"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "likes"), + token.NewToken(token.TokenWord, "cat", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), }) assert.False(t, CanMergeTokenLists(tl1, tl2)) @@ -105,13 +105,13 @@ func TestCanMergeTokenLists_GenericWords(t *testing.T) { func TestCanMergeTokenLists_DifferentLengths(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "world"), + token.NewToken(token.TokenWord, "hello", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "world", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello"), + token.NewToken(token.TokenWord, "hello", token.NotWildcard), }) assert.False(t, CanMergeTokenLists(tl1, tl2)) @@ -120,15 +120,15 @@ func TestCanMergeTokenLists_DifferentLengths(t *testing.T) { func TestCanMergeTokenLists_FirstWordProtection(t *testing.T) { // First word protection should prevent merge even with possiblyWildcard tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewPossiblyWildcardToken(token.TokenWord, "user123"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "logged"), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), }) assert.False(t, CanMergeTokenLists(tl1, tl2), "First word should be protected from wildcarding") @@ -136,103 +136,59 @@ func TestCanMergeTokenLists_FirstWordProtection(t *testing.T) { func TestMergeTokenLists_CreateWildcard(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), }) merged := MergeTokenLists(tl1, tl2) assert.NotNil(t, merged) assert.Equal(t, 3, merged.Length()) assert.Equal(t, "logged", merged.Tokens[0].Value) - assert.False(t, merged.Tokens[0].IsWildcard) + assert.Equal(t, token.NotWildcard, merged.Tokens[0].Wildcard) assert.Equal(t, " ", merged.Tokens[1].Value) - assert.Equal(t, "*", merged.Tokens[2].Value) - assert.True(t, merged.Tokens[2].IsWildcard) + // Wildcard token has empty value - the Wildcard field tracks status + assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard) + assert.Equal(t, token.TokenWord, merged.Tokens[2].Type) } func TestMergeTokenLists_UnmergeableReturnsNil(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "bob"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "likes"), + token.NewToken(token.TokenWord, "bob", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "cat"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "likes"), + token.NewToken(token.TokenWord, "cat", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "likes", token.NotWildcard), }) merged := MergeTokenLists(tl1, tl2) assert.Nil(t, merged, "Unmergeable TokenLists should return nil") } -func TestMergeTokenLists_DateMerging(t *testing.T) { - dateInfo1 := &token.DateComponents{ - Year: "2024", - Month: "01", - Day: "15", - Hour: "10", - Minute: "30", - Second: "45", - Format: "RFC3339", - } - - dateInfo2 := &token.DateComponents{ - Year: "2024", - Month: "01", - Day: "15", - Hour: "14", - Minute: "22", - Second: "30", - Format: "RFC3339", - } - - tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "Log"), - token.NewToken(token.TokenWhitespace, " "), - token.NewDateToken("2024-01-15T10:30:45Z", dateInfo1), - }) - - tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "Log"), - token.NewToken(token.TokenWhitespace, " "), - token.NewDateToken("2024-01-15T14:22:30Z", dateInfo2), - }) - - merged := MergeTokenLists(tl1, tl2) - assert.NotNil(t, merged) - assert.Equal(t, 3, merged.Length()) - - // Date token should have partial wildcard for time components - dateToken := merged.Tokens[2] - assert.True(t, dateToken.IsWildcard) - assert.Equal(t, token.TokenDate, dateToken.Type) - // Should preserve date, wildcard time: 2024-01-15T*:*:* - assert.Contains(t, dateToken.Value, "2024-01-15") -} - func TestFindMergeableGroups_SingleGroup(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), }) tl3 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewPossiblyWildcardToken(token.TokenWord, "guest789"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "guest789", token.PotentialWildcard), }) groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3}) @@ -243,24 +199,24 @@ func TestFindMergeableGroups_SingleGroup(t *testing.T) { func TestFindMergeableGroups_MultipleGroups(t *testing.T) { // Group 1: mergeable user logs tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewPossiblyWildcardToken(token.TokenWord, "user123"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewPossiblyWildcardToken(token.TokenWord, "admin456"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), }) // Group 2: unmergeable generic words tl3 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWord, "cat"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "cat", token.NotWildcard), }) tl4 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged"), - token.NewToken(token.TokenWord, "dog"), + token.NewToken(token.TokenWord, "logged", token.NotWildcard), + token.NewToken(token.TokenWord, "dog", token.NotWildcard), }) groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3, tl4}) @@ -283,7 +239,7 @@ func TestFindMergeableGroups_EmptyInput(t *testing.T) { func TestFindMergeableGroups_SingleTokenList(t *testing.T) { tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello"), + token.NewToken(token.TokenWord, "hello", token.NotWildcard), }) groups := FindMergeableGroups([]*token.TokenList{tl1}) @@ -294,15 +250,15 @@ func TestFindMergeableGroups_SingleTokenList(t *testing.T) { func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { // Try to merge when first token is a word but differs tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewPossiblyWildcardToken(token.TokenWord, "Login"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "successful"), + token.NewToken(token.TokenWord, "Login", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "successful", token.NotWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewPossiblyWildcardToken(token.TokenWord, "Logout"), - token.NewToken(token.TokenWhitespace, " "), - token.NewToken(token.TokenWord, "successful"), + token.NewToken(token.TokenWord, "Logout", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "successful", token.NotWildcard), }) // Should fail because first word is protected @@ -313,33 +269,34 @@ func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { func TestMergeTokenLists_ProgressiveMerging(t *testing.T) { // Test merging multiple TokenLists progressively tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "Request"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenNumeric, "123"), + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "123", token.PotentialWildcard), }) tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "Request"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenNumeric, "456"), + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "456", token.PotentialWildcard), }) tl3 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "Request"), - token.NewToken(token.TokenWhitespace, " "), - token.NewPossiblyWildcardToken(token.TokenNumeric, "789"), + token.NewToken(token.TokenWord, "Request", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "789", token.PotentialWildcard), }) // Merge first two merged12 := MergeTokenLists(tl1, tl2) assert.NotNil(t, merged12) - assert.True(t, merged12.Tokens[2].IsWildcard) + assert.Equal(t, token.IsWildcard, merged12.Tokens[2].Wildcard) // Merge result with third merged123 := MergeTokenLists(merged12, tl3) assert.NotNil(t, merged123) assert.Equal(t, 3, merged123.Length()) assert.Equal(t, "Request", merged123.Tokens[0].Value) - assert.Equal(t, "*", merged123.Tokens[2].Value) - assert.True(t, merged123.Tokens[2].IsWildcard) + // Wildcard token has empty value - the Wildcard field tracks status + assert.Equal(t, token.IsWildcard, merged123.Tokens[2].Wildcard) + assert.Equal(t, token.TokenNumeric, merged123.Tokens[2].Type) } diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go index 37fc9fcd20fa..84023dbfe779 100644 --- a/pkg/logs/patterns/token/token.go +++ b/pkg/logs/patterns/token/token.go @@ -36,98 +36,54 @@ const ( TokenDate ) -// MergeabilityLevel represents how two tokens can be merged -type MergeabilityLevel int +// WildcardStatus describes a token's relationship to wildcards +type WildcardStatus int const ( - Unmergeable MergeabilityLevel = iota - MergeableAsNewType - MergeableAsWildcard - MergeableWithWiderRange - Fits - FitsAsItIs + // NotWildcard - This token cannot become a wildcard + // Examples: dates, whitespace + NotWildcard WildcardStatus = iota + + // PotentialWildcard - This token can become a wildcard + // Examples: all words ("connection", "user123"), HTTP methods, IPs, numbers + // Note: First word position is protected during merge + PotentialWildcard + + // IsWildcard - This token is already a wildcard + // Example: wildcard position in a pattern + IsWildcard ) -// IsMergeable returns true if the mergeability level allows merging -func (m MergeabilityLevel) IsMergeable() bool { - return m > Unmergeable -} +// MergeResult describes the result of comparing two tokens +type MergeResult int -// Compare returns the comparison result with another mergeability level -func (m1 MergeabilityLevel) Compare(m2 MergeabilityLevel) int { - return int(m1) - int(m2) -} +const ( + // Conflict - Tokens cannot merge, abort pattern creation + // Examples: different types, generic words with different values + Conflict MergeResult = iota -// DateComponents represents parsed components of a date token -type DateComponents struct { - Year string - Month string - Day string - Hour string - Minute string - Second string - Format string // Original format pattern -} + // Identical - Tokens are the same, keep as-is + // Examples: "Error" vs "Error", wildcard vs any value of same type + Identical + + // Wildcard - Tokens can merge into wildcard + // Examples: "connection" vs "replication", "user123" vs "admin456", "GET" vs "POST" + Wildcard +) // Token represents a single token in a log message type Token struct { - Type TokenType - Value string - IsWildcard bool - PossiblyWildcard bool // Indicates if this token can merge into a wildcard during batch consolidation - - // Advanced token structure information - DateInfo *DateComponents // For TokenDate - parsed date components + Type TokenType + Value string + Wildcard WildcardStatus // NotWildcard, PotentialWildcard, or IsWildcard } -// NewToken creates a new token with the given type and value -func NewToken(tokenType TokenType, value string) Token { +// NewToken creates a token with the specified wildcard status +func NewToken(tokenType TokenType, value string, wildcard WildcardStatus) Token { return Token{ - Type: tokenType, - Value: value, - IsWildcard: false, - PossiblyWildcard: false, - } -} - -// NewTokenWithFlags creates a new token with explicit wildcard flags -func NewTokenWithFlags(tokenType TokenType, value string, isWildcard, possiblyWildcard bool) Token { - return Token{ - Type: tokenType, - Value: value, - IsWildcard: isWildcard, - PossiblyWildcard: possiblyWildcard, - } -} - -// NewWildcardToken creates a wildcard token of the given type -func NewWildcardToken(tokenType TokenType) Token { - return Token{ - Type: tokenType, - Value: "*", - IsWildcard: true, - PossiblyWildcard: true, - } -} - -// NewPossiblyWildcardToken creates a token that can potentially become a wildcard -func NewPossiblyWildcardToken(tokenType TokenType, value string) Token { - return Token{ - Type: tokenType, - Value: value, - IsWildcard: false, - PossiblyWildcard: true, - } -} - -// NewDateToken creates a date token with parsed components -func NewDateToken(value string, dateInfo *DateComponents) Token { - return Token{ - Type: TokenDate, - Value: value, - IsWildcard: false, - PossiblyWildcard: false, - DateInfo: dateInfo, + Type: tokenType, + Value: value, + Wildcard: wildcard, } } @@ -177,72 +133,41 @@ func (tt TokenType) String() string { // String returns a string representation of the token func (t *Token) String() string { - if t.IsWildcard { - return fmt.Sprintf("%s(*)", t.Type) - } return fmt.Sprintf("%s(%s)", t.Type, t.Value) } -// GetMergeabilityLevel determines how this token can merge with another token -func (t1 *Token) GetMergeabilityLevel(t2 *Token) MergeabilityLevel { - // Same token type and value - if t1.Type == t2.Type && t1.Value == t2.Value { - return FitsAsItIs +// Compare checks if two tokens can merge and returns the result +func (t1 *Token) Compare(t2 *Token) MergeResult { + // Different types cannot merge + if t1.Type != t2.Type { + return Conflict } - // Same token type but different values - if t1.Type == t2.Type { - // Special handling for structured date tokens - if t1.Type == TokenDate && t1.DateInfo != nil && t2.DateInfo != nil { - return getDateMergeabilityLevel(t1.DateInfo, t2.DateInfo) - } - - // For Word tokens, only merge if both have possiblyWildcard flag - // This prevents generic words like "bob" and "cat" from merging - if t1.Type == TokenWord { - if t1.PossiblyWildcard && t2.PossiblyWildcard { - return MergeableAsWildcard - } - // Generic words without numeric patterns don't merge - return Unmergeable - } - - // For non-Word tokens (HttpMethod, HttpStatus, AbsolutePath, Numeric, etc.) - // they are mergeable by default since they represent structured data - // e.g., "GET" vs "POST", "/api" vs "/users", "200" vs "404" - return MergeableAsWildcard + // Identical value + if t1.Value == t2.Value { + return Identical } - // Different token types - return Unmergeable -} - -// getDateMergeabilityLevel determines how two date tokens can merge based on their structure -func getDateMergeabilityLevel(d1, d2 *DateComponents) MergeabilityLevel { - // Must have same format to be mergeable - different formats = different log sources - if d1.Format != d2.Format { - return Unmergeable + // t1 is wildcard - matches any value of same type + if t1.Wildcard == IsWildcard { + return Identical } - // Simple rule: Only merge if same date, different time (same log source over time) - // Everything else is likely different log sources and shouldn't merge - sameDate := d1.Year == d2.Year && d1.Month == d2.Month && d1.Day == d2.Day - sameTime := d1.Hour == d2.Hour && d1.Minute == d2.Minute && d1.Second == d2.Second - - if sameDate && sameTime { - return FitsAsItIs + // Different values - check if they can merge into wildcard + // Whitespace never wildcards (structural) + if t1.Type == TokenWhitespace { + return Conflict } - if sameDate && !sameTime { - // Same date, different time = same log source at different times - return MergeableWithWiderRange + // Words only wildcard if both are PotentialWildcard + if t1.Type == TokenWord { + if t1.Wildcard == PotentialWildcard && t2.Wildcard == PotentialWildcard { + return Wildcard + } + return Conflict } - // Different dates = different log sources/periods = don't merge - return Unmergeable + // Structured types (HTTP, IP, Numeric, Date, etc.) wildcard if same type + // Same TokenDate type means same format structure (e.g., both RFC3339) + return Wildcard } - -// NOTE: MergeWith() and createPartialDateWildcard() have been moved to the -// clustering/merging package. Token now only provides data comparison via -// GetMergeabilityLevel(), while merge execution is handled as business logic -// in the merging package. diff --git a/pkg/logs/patterns/token/token_test.go b/pkg/logs/patterns/token/token_test.go new file mode 100644 index 000000000000..9a941be5a18e --- /dev/null +++ b/pkg/logs/patterns/token/token_test.go @@ -0,0 +1,88 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package token + +import ( + "testing" +) + +func TestTokenType_String(t *testing.T) { + tests := []struct { + tokenType TokenType + expected string + }{ + {TokenUnknown, "Unknown"}, + {TokenWord, "Word"}, + {TokenNumeric, "Numeric"}, + {TokenWhitespace, "Whitespace"}, + {TokenIPv4, "IPv4"}, + {TokenIPv6, "IPv6"}, + {TokenEmail, "Email"}, + {TokenURI, "URI"}, + {TokenAbsolutePath, "AbsolutePath"}, + {TokenHttpMethod, "HttpMethod"}, + {TokenHttpStatus, "HttpStatus"}, + {TokenSeverityLevel, "SeverityLevel"}, + {TokenDate, "Date"}, + } + + for _, test := range tests { + result := test.tokenType.String() + if result != test.expected { + t.Errorf("TokenType %v: expected %s, got %s", test.tokenType, test.expected, result) + } + } +} + +func TestToken_IsHTTP(t *testing.T) { + httpToken := Token{Type: TokenHttpMethod, Value: "GET"} + if !httpToken.IsHTTP() { + t.Error("HttpMethod token should be HTTP") + } + + statusToken := Token{Type: TokenHttpStatus, Value: "200"} + if !statusToken.IsHTTP() { + t.Error("HttpStatus token should be HTTP") + } + + wordToken := Token{Type: TokenWord, Value: "test"} + if wordToken.IsHTTP() { + t.Error("Word token should not be HTTP") + } +} + +func TestToken_IsNetwork(t *testing.T) { + ipv4Token := Token{Type: TokenIPv4, Value: "192.168.1.1"} + if !ipv4Token.IsNetwork() { + t.Error("IPv4 token should be network") + } + + emailToken := Token{Type: TokenEmail, Value: "test@example.com"} + if !emailToken.IsNetwork() { + t.Error("Email token should be network") + } + + wordToken := Token{Type: TokenWord, Value: "test"} + if wordToken.IsNetwork() { + t.Error("Word token should not be network") + } +} + +func TestToken_String(t *testing.T) { + // Regular token + token := Token{Type: TokenWord, Value: "hello"} + expected := "Word(hello)" + if token.String() != expected { + t.Errorf("Expected %s, got %s", expected, token.String()) + } + + // Wildcard token - still shows the value, not "*" + wildcardToken := Token{Type: TokenWord, Value: "test", Wildcard: IsWildcard} + expectedWildcard := "Word(test)" + if wildcardToken.String() != expectedWildcard { + t.Errorf("Expected %s, got %s", expectedWildcard, wildcardToken.String()) + } +} diff --git a/pkg/logs/patterns/token/tokenlist.go b/pkg/logs/patterns/token/tokenlist.go index 7312a8e0cb5c..8cdbfa915fe7 100644 --- a/pkg/logs/patterns/token/tokenlist.go +++ b/pkg/logs/patterns/token/tokenlist.go @@ -31,18 +31,8 @@ func (tl *TokenList) Add(tokens ...Token) { } // AddToken creates and adds a new token with the given type and value -func (tl *TokenList) AddToken(tokenType TokenType, value string) { - tl.Tokens = append(tl.Tokens, NewToken(tokenType, value)) -} - -// AddWildcardToken creates and adds a wildcard token of the given type -func (tl *TokenList) AddWildcardToken(tokenType TokenType) { - tl.Tokens = append(tl.Tokens, NewWildcardToken(tokenType)) -} - -// AddPossiblyWildcardToken creates and adds a token that can potentially become a wildcard -func (tl *TokenList) AddPossiblyWildcardToken(tokenType TokenType, value string) { - tl.Tokens = append(tl.Tokens, NewPossiblyWildcardToken(tokenType, value)) +func (tl *TokenList) AddToken(tokenType TokenType, value string, wildcard WildcardStatus) { + tl.Tokens = append(tl.Tokens, NewToken(tokenType, value, wildcard)) } // Length returns the number of tokens From b8af4938066811356c8edf770c86081aa1d233fa Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Mon, 3 Nov 2025 16:45:37 -0500 Subject: [PATCH 03/16] good for e2e testing --- pkg/logs/message/message.go | 11 +- pkg/logs/patterns/automaton/rules.go | 124 +---- pkg/logs/patterns/automaton/rules_test.go | 371 +++---------- pkg/logs/patterns/automaton/tokenizer.go | 69 ++- pkg/logs/patterns/automaton/tokenizer_test.go | 360 +++++++++++++ pkg/logs/patterns/clustering/cluster.go | 76 ++- .../patterns/clustering/cluster_manager.go | 216 ++++++++ .../clustering/cluster_manager_test.go | 489 ++++++++++++++++++ .../patterns/clustering/merging/merging.go | 20 +- .../clustering/merging/merging_test.go | 2 +- pkg/logs/patterns/token/token.go | 59 +-- pkg/logs/patterns/token/token_test.go | 139 ++--- pkg/logs/patterns/token/tokentype_string.go | 36 ++ pkg/logs/sender/dumb_strategy.go | 230 ++++---- pkg/logs/sender/grpc/grpc_sender.go | 5 +- pkg/logs/sender/grpc/stream_worker.go | 153 +++++- 16 files changed, 1664 insertions(+), 696 deletions(-) create mode 100644 pkg/logs/patterns/automaton/tokenizer_test.go create mode 100644 pkg/logs/patterns/clustering/cluster_manager.go create mode 100644 pkg/logs/patterns/clustering/cluster_manager_test.go create mode 100644 pkg/logs/patterns/token/tokentype_string.go diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 5cb9f9050ed8..86f5504c6c10 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -127,7 +127,9 @@ type MessageContent struct { //nolint:revive content []byte // structured content structuredContent StructuredContent - State MessageContentState + // rendered content preserved for pattern extraction (before encoding overwrites content) + renderedContent []byte + State MessageContentState } // MessageContentState is used to represent the MessageContent state. @@ -189,6 +191,7 @@ func (m *MessageContent) SetContent(content []byte) { // SetRendered sets the content for the MessageContent and sets MessageContent state to rendered. func (m *MessageContent) SetRendered(content []byte) { m.content = content + m.renderedContent = content // Preserve for pattern extraction m.State = StateRendered } @@ -198,6 +201,12 @@ func (m *MessageContent) SetEncoded(content []byte) { m.State = StateEncoded } +// GetRenderedContent returns the preserved rendered content (before encoding). +// This is used for pattern extraction which needs plain text, not encoded binary. +func (m *MessageContent) GetRenderedContent() []byte { + return m.renderedContent +} + // ParsingExtra ships extra information parsers want to make available // to the rest of the pipeline. // E.g. Timestamp is used by the docker parsers to transmit a tailing offset. diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go index 1183cd7e797c..1c1c3800bedf 100644 --- a/pkg/logs/patterns/automaton/rules.go +++ b/pkg/logs/patterns/automaton/rules.go @@ -15,6 +15,10 @@ import ( ) // Priority constants for rule evaluation order +// +// Rules are sorted by priority (highest first) and evaluated sequentially until the first match. +// Priority is based on the specificity of the pattern. The more specific the pattern, the higher the priority. +// Higher priority = evaluated first = more specific classification. const ( PriorityHigh = 3 // Very specific patterns like IPv4, IPv6, Email PriorityMedium = 2 // Structured patterns like URI, Dates, HTTPStatus @@ -55,11 +59,25 @@ func NewRuleManager() *RuleManager { // AddRule adds a new terminal rule func (rm *RuleManager) AddRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error { + // Check for duplicate rule name + if rm.GetRule(name) != nil { + return fmt.Errorf("rule '%s' already exists", name) + } + + // Compile and validate regex pattern regex, err := regexp.Compile(pattern) if err != nil { return fmt.Errorf("invalid regex pattern '%s': %v", pattern, err) } + // Validate examples match the pattern + for _, example := range examples { + if !regex.MatchString(example) { + return fmt.Errorf("example '%s' does not match pattern '%s'", example, pattern) + } + } + + // Create and insert rule rule := &TerminalRule{ Name: name, Pattern: regex, @@ -70,12 +88,6 @@ func (rm *RuleManager) AddRule(name, pattern, category, description string, toke Examples: examples, } - for _, example := range examples { - if !regex.MatchString(example) { - return fmt.Errorf("example '%s' does not match pattern '%s'", example, pattern) - } - } - rm.insertRuleByPriority(rule) rm.addToCategory(rule) @@ -130,7 +142,9 @@ func (rm *RuleManager) LoadPredefinedRules() error { return nil } +// ================================================ // Helper methods +// ================================================ func (rm *RuleManager) insertRuleByPriority(rule *TerminalRule) { // Insert in priority order (higher priority first) @@ -215,30 +229,6 @@ func (rm *RuleManager) GetCategories() []string { return categories } -// ValidateRule checks if a rule would work correctly -func (rm *RuleManager) ValidateRule(name, pattern string, examples []string) error { - regex, err := regexp.Compile(pattern) - if err != nil { - return fmt.Errorf("invalid regex: %v", err) - } - - // Check for conflicts with existing rules - for _, existing := range rm.rules { - if existing.Name == name { - return fmt.Errorf("rule '%s' already exists", name) - } - } - - // Validate examples - for _, example := range examples { - if !regex.MatchString(example) { - return fmt.Errorf("example '%s' does not match pattern", example) - } - } - - return nil -} - // GetRuleStats returns statistics about the rule system func (rm *RuleManager) GetRuleStats() RuleStats { stats := RuleStats{ @@ -493,77 +483,3 @@ func GetPredefinedRules() []*TerminalRule { return rules } - -// GetRuleByPriority returns rules with a specific priority -func (rm *RuleManager) GetRuleByPriority(priority int) []*TerminalRule { - result := make([]*TerminalRule, 0) - for _, rule := range rm.rules { - if rule.Priority == priority { - result = append(result, rule) - } - } - return result -} - -// GetHighestPriorityRules returns rules with the highest priority -func (rm *RuleManager) GetHighestPriorityRules() []*TerminalRule { - if len(rm.rules) == 0 { - return []*TerminalRule{} - } - - highestPriority := rm.rules[0].Priority - result := make([]*TerminalRule, 0) - - for _, rule := range rm.rules { - if rule.Priority == highestPriority { - result = append(result, rule) - } else { - break // Rules are sorted by priority - } - } - return result -} - -// UpdateRulePriority changes the priority of an existing rule -func (rm *RuleManager) UpdateRulePriority(name string, newPriority int) error { - rule := rm.GetRule(name) - if rule == nil { - return fmt.Errorf("rule '%s' not found", name) - } - - // Remove the rule and re-add with new priority - if !rm.RemoveRule(name) { - return fmt.Errorf("failed to remove rule '%s'", name) - } - - return rm.AddRule( - rule.Name, - rule.Pattern.String(), - rule.Category, - rule.Description, - rule.TokenType, - newPriority, - rule.Examples, - ) -} - -// GetCategoryDescription returns the description for a category -func (rm *RuleManager) GetCategoryDescription(category string) string { - if cat, exists := rm.categories[category]; exists { - return cat.Description - } - return "" -} - -// SetCategoryDescription updates the description for a category -func (rm *RuleManager) SetCategoryDescription(category, description string) { - if rm.categories[category] == nil { - rm.categories[category] = &RuleCategory{ - Name: category, - Description: description, - Rules: make([]*TerminalRule, 0), - } - } else { - rm.categories[category].Description = description - } -} diff --git a/pkg/logs/patterns/automaton/rules_test.go b/pkg/logs/patterns/automaton/rules_test.go index 9c318d424100..78f70c5abf90 100644 --- a/pkg/logs/patterns/automaton/rules_test.go +++ b/pkg/logs/patterns/automaton/rules_test.go @@ -8,23 +8,21 @@ package automaton import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) +// TestNewRuleManager tests the creation of a new rule manager func TestNewRuleManager(t *testing.T) { rm := NewRuleManager() - if rm.rules == nil { - t.Error("Expected rules slice to be initialized") - } - if rm.categories == nil { - t.Error("Expected categories map to be initialized") - } - if len(rm.rules) != 0 { - t.Errorf("Expected empty rules slice, got %d rules", len(rm.rules)) - } + assert.NotNil(t, rm.rules, "Expected rules slice to be initialized") + assert.NotNil(t, rm.categories, "Expected categories map to be initialized") + assert.Equal(t, 0, len(rm.rules), "Expected empty rules slice") } +// TestRuleManager_AddRule tests the addition of a new rule func TestRuleManager_AddRule(t *testing.T) { rm := NewRuleManager() @@ -38,29 +36,17 @@ func TestRuleManager_AddRule(t *testing.T) { []string{"192.168.1.1", "10.0.0.1"}, ) - if err != nil { - t.Fatalf("Failed to add rule: %v", err) - } - - if len(rm.rules) != 1 { - t.Errorf("Expected 1 rule, got %d", len(rm.rules)) - } + assert.NoError(t, err, "Failed to add rule") + assert.Equal(t, 1, len(rm.rules), "Expected 1 rule") rule := rm.rules[0] - if rule.Name != "TestIPv4" { - t.Errorf("Expected rule name 'TestIPv4', got '%s'", rule.Name) - } - if rule.TokenType != token.TokenIPv4 { - t.Errorf("Expected token type TokenIPv4, got %v", rule.TokenType) - } - if rule.Priority != 100 { - t.Errorf("Expected priority 100, got %d", rule.Priority) - } - if rule.Category != "network" { - t.Errorf("Expected category 'network', got '%s'", rule.Category) - } + assert.Equal(t, "TestIPv4", rule.Name, "Expected rule name 'TestIPv4'") + assert.Equal(t, token.TokenIPv4, rule.TokenType, "Expected token type TokenIPv4") + assert.Equal(t, 100, rule.Priority, "Expected priority 100") + assert.Equal(t, "network", rule.Category, "Expected category 'network'") } +// TestRuleManager_AddRule_InvalidPattern tests the addition of a new rule with an invalid regex pattern func TestRuleManager_AddRule_InvalidPattern(t *testing.T) { rm := NewRuleManager() @@ -74,11 +60,10 @@ func TestRuleManager_AddRule_InvalidPattern(t *testing.T) { []string{}, ) - if err == nil { - t.Error("Expected error for invalid regex pattern") - } + assert.Error(t, err, "Expected error for invalid regex pattern") } +// TestRuleManager_AddRule_InvalidExample tests the addition of a new rule with an invalid example func TestRuleManager_AddRule_InvalidExample(t *testing.T) { rm := NewRuleManager() @@ -92,57 +77,59 @@ func TestRuleManager_AddRule_InvalidExample(t *testing.T) { []string{"123", "abc"}, // "abc" doesn't match ^\d+$ ) - if err == nil { - t.Error("Expected error for example that doesn't match pattern") - } + assert.Error(t, err, "Expected error for example that doesn't match pattern") } +// TestRuleManager_AddRule_Duplicate tests the addition of a duplicate rule +func TestRuleManager_AddRule_Duplicate(t *testing.T) { + rm := NewRuleManager() + + // Add first rule + err := rm.AddRule("TestRule", `^\d+$`, "test", "Numeric", token.TokenNumeric, 50, []string{"123"}) + assert.NoError(t, err, "Failed to add first rule") + + // Try to add duplicate rule + err = rm.AddRule("TestRule", `^[a-z]+$`, "test", "Alpha", token.TokenWord, 50, []string{"abc"}) + assert.Error(t, err, "Expected error when adding duplicate rule name") + assert.Contains(t, err.Error(), "already exists", "Expected 'already exists' error") +} + +// TestRuleManager_RemoveRule tests the removal of a rule func TestRuleManager_RemoveRule(t *testing.T) { rm := NewRuleManager() // Add a rule first rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) - if len(rm.rules) != 1 { - t.Fatalf("Expected 1 rule before removal") - } + assert.Equal(t, 1, len(rm.rules), "Expected 1 rule before removal") // Remove the rule removed := rm.RemoveRule("TestRule") - if !removed { - t.Error("Expected RemoveRule to return true") - } - - if len(rm.rules) != 0 { - t.Errorf("Expected 0 rules after removal, got %d", len(rm.rules)) - } + assert.True(t, removed, "Expected RemoveRule to return true") + assert.Equal(t, 0, len(rm.rules), "Expected 0 rules after removal") // Try to remove non-existent rule removed = rm.RemoveRule("NonExistent") - if removed { - t.Error("Expected RemoveRule to return false for non-existent rule") - } + assert.False(t, removed, "Expected RemoveRule to return false for non-existent rule") } +// TestRuleManager_GetRule tests the retrieval of a rule by name func TestRuleManager_GetRule(t *testing.T) { rm := NewRuleManager() rm.AddRule("TestRule", `^\d+$`, "test", "Test", token.TokenNumeric, 50, []string{"123"}) rule := rm.GetRule("TestRule") - if rule == nil { - t.Fatal("Expected to find rule 'TestRule'") - } - if rule.Name != "TestRule" { - t.Errorf("Expected rule name 'TestRule', got '%s'", rule.Name) + assert.NotNil(t, rule, "Expected to find rule 'TestRule'") + if rule != nil { + assert.Equal(t, "TestRule", rule.Name, "Expected rule name 'TestRule'") } notFound := rm.GetRule("NonExistent") - if notFound != nil { - t.Error("Expected nil for non-existent rule") - } + assert.Nil(t, notFound, "Expected nil for non-existent rule") } +// TestRuleManager_PriorityOrdering tests the ordering of rules by priority func TestRuleManager_PriorityOrdering(t *testing.T) { rm := NewRuleManager() @@ -152,24 +139,19 @@ func TestRuleManager_PriorityOrdering(t *testing.T) { rm.AddRule("Medium", `medium`, "test", "Medium priority", token.TokenWord, 50, []string{"medium"}) rules := rm.ListRules() - if len(rules) != 3 { - t.Fatalf("Expected 3 rules, got %d", len(rules)) - } + assert.Equal(t, 3, len(rules), "Expected 3 rules") // Should be ordered by priority (highest first) expectedOrder := []string{"High", "Medium", "Low"} expectedPriorities := []int{100, 50, 10} for i, rule := range rules { - if rule.Name != expectedOrder[i] { - t.Errorf("Rule %d: expected name '%s', got '%s'", i, expectedOrder[i], rule.Name) - } - if rule.Priority != expectedPriorities[i] { - t.Errorf("Rule %d: expected priority %d, got %d", i, expectedPriorities[i], rule.Priority) - } + assert.Equal(t, expectedOrder[i], rule.Name, "Rule %d name mismatch", i) + assert.Equal(t, expectedPriorities[i], rule.Priority, "Rule %d priority mismatch", i) } } +// TestRuleManager_ApplyRules tests the application of rules to a value func TestRuleManager_ApplyRules(t *testing.T) { rm := NewRuleManager() @@ -190,12 +172,11 @@ func TestRuleManager_ApplyRules(t *testing.T) { for _, test := range tests { result := rm.ApplyRules(test.input) - if result != test.expected { - t.Errorf("ApplyRules('%s'): expected %v, got %v", test.input, test.expected, result) - } + assert.Equal(t, test.expected, result, "ApplyRules('%s') mismatch", test.input) } } +// TestRuleManager_GetRulesByCategory tests the retrieval of rules by category func TestRuleManager_GetRulesByCategory(t *testing.T) { rm := NewRuleManager() @@ -204,21 +185,16 @@ func TestRuleManager_GetRulesByCategory(t *testing.T) { rm.AddRule("Numeric", `num`, "numeric", "Number", token.TokenNumeric, 50, []string{"num"}) networkRules := rm.GetRulesByCategory("network") - if len(networkRules) != 2 { - t.Errorf("Expected 2 network rules, got %d", len(networkRules)) - } + assert.Equal(t, 2, len(networkRules), "Expected 2 network rules") numericRules := rm.GetRulesByCategory("numeric") - if len(numericRules) != 1 { - t.Errorf("Expected 1 numeric rule, got %d", len(numericRules)) - } + assert.Equal(t, 1, len(numericRules), "Expected 1 numeric rule") emptyRules := rm.GetRulesByCategory("nonexistent") - if len(emptyRules) != 0 { - t.Errorf("Expected 0 rules for nonexistent category, got %d", len(emptyRules)) - } + assert.Equal(t, 0, len(emptyRules), "Expected 0 rules for nonexistent category") } +// TestRuleManager_GetCategories tests the retrieval of categories func TestRuleManager_GetCategories(t *testing.T) { rm := NewRuleManager() @@ -227,19 +203,18 @@ func TestRuleManager_GetCategories(t *testing.T) { rm.AddRule("Rule3", `r3`, "network", "Rule 3", token.TokenWord, 50, []string{"r3"}) categories := rm.GetCategories() - if len(categories) != 2 { - t.Errorf("Expected 2 categories, got %d", len(categories)) - } + assert.Equal(t, 2, len(categories), "Expected 2 categories") // Categories should be sorted expectedCategories := []string{"network", "time"} for i, expected := range expectedCategories { - if i >= len(categories) || categories[i] != expected { - t.Errorf("Expected category %d to be '%s', got '%s'", i, expected, categories[i]) + if assert.Less(t, i, len(categories), "Category %d should exist", i) { + assert.Equal(t, expected, categories[i], "Expected category %d to be '%s'", i, expected) } } } +// TestRuleManager_GetRuleStats tests the retrieval of rule statistics func TestRuleManager_GetRuleStats(t *testing.T) { rm := NewRuleManager() @@ -249,29 +224,18 @@ func TestRuleManager_GetRuleStats(t *testing.T) { stats := rm.GetRuleStats() - if stats.TotalRules != 3 { - t.Errorf("Expected TotalRules=3, got %d", stats.TotalRules) - } - if stats.Categories != 2 { - t.Errorf("Expected Categories=2, got %d", stats.Categories) - } - if stats.ByCategory["network"] != 2 { - t.Errorf("Expected 2 network rules, got %d", stats.ByCategory["network"]) - } - if stats.ByCategory["numeric"] != 1 { - t.Errorf("Expected 1 numeric rule, got %d", stats.ByCategory["numeric"]) - } - if stats.ByTokenType[token.TokenIPv4] != 1 { - t.Errorf("Expected 1 IPv4 token rule, got %d", stats.ByTokenType[token.TokenIPv4]) - } + assert.Equal(t, 3, stats.TotalRules, "Expected TotalRules=3") + assert.Equal(t, 2, stats.Categories, "Expected Categories=2") + assert.Equal(t, 2, stats.ByCategory["network"], "Expected 2 network rules") + assert.Equal(t, 1, stats.ByCategory["numeric"], "Expected 1 numeric rule") + assert.Equal(t, 1, stats.ByTokenType[token.TokenIPv4], "Expected 1 IPv4 token rule") } +// TestGetPredefinedRules tests the retrieval of predefined rules func TestGetPredefinedRules(t *testing.T) { rules := GetPredefinedRules() - if len(rules) == 0 { - t.Error("Expected predefined rules to be non-empty") - } + assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be non-empty") // Check that we have the expected rule types foundRules := make(map[string]bool) @@ -279,226 +243,45 @@ func TestGetPredefinedRules(t *testing.T) { foundRules[rule.Name] = true // Validate rule structure - if rule.Pattern == nil { - t.Errorf("Rule '%s' has nil pattern", rule.Name) - } - if rule.Name == "" { - t.Error("Found rule with empty name") - } - if rule.Category == "" { - t.Errorf("Rule '%s' has empty category", rule.Name) - } - if len(rule.Examples) == 0 { - t.Errorf("Rule '%s' has no examples", rule.Name) - } + assert.NotNil(t, rule.Pattern, "Rule '%s' has nil pattern", rule.Name) + assert.NotEqual(t, "", rule.Name, "Found rule with empty name") + assert.NotEqual(t, "", rule.Category, "Rule '%s' has empty category", rule.Name) + assert.NotEqual(t, 0, len(rule.Examples), "Rule '%s' has no examples", rule.Name) // Test examples against pattern for _, example := range rule.Examples { - if !rule.Pattern.MatchString(example) { - t.Errorf("Rule '%s': example '%s' doesn't match pattern", rule.Name, example) - } + assert.True(t, rule.Pattern.MatchString(example), + "Rule '%s': example '%s' doesn't match pattern", rule.Name, example) } } expectedRules := []string{"IPv4Address", "EmailAddress", "URI", "HTTPStatus", "Numeric"} for _, expected := range expectedRules { - if !foundRules[expected] { - t.Errorf("Expected predefined rule '%s' not found", expected) - } + assert.True(t, foundRules[expected], "Expected predefined rule '%s' not found", expected) } } +// TestRuleManager_LoadPredefinedRules tests the loading of predefined rules func TestRuleManager_LoadPredefinedRules(t *testing.T) { rm := NewRuleManager() err := rm.LoadPredefinedRules() - if err != nil { - t.Fatalf("Failed to load predefined rules: %v", err) - } + assert.NoError(t, err, "Failed to load predefined rules") rules := rm.ListRules() - if len(rules) == 0 { - t.Error("Expected predefined rules to be loaded") - } + assert.NotEqual(t, 0, len(rules), "Expected predefined rules to be loaded") // Verify some key rules exist ipv4Rule := rm.GetRule("IPv4Address") - if ipv4Rule == nil { - t.Error("Expected IPv4Address rule to be loaded") - } + assert.NotNil(t, ipv4Rule, "Expected IPv4Address rule to be loaded") emailRule := rm.GetRule("EmailAddress") - if emailRule == nil { - t.Error("Expected EmailAddress rule to be loaded") - } + assert.NotNil(t, emailRule, "Expected EmailAddress rule to be loaded") // Test that rules are working result := rm.ApplyRules("192.168.1.1") - if result != token.TokenIPv4 { - t.Errorf("Expected IPv4 token for '192.168.1.1', got %v", result) - } + assert.Equal(t, token.TokenIPv4, result, "Expected IPv4 token for '192.168.1.1'") result = rm.ApplyRules("test@example.com") - if result != token.TokenEmail { - t.Errorf("Expected Email token for 'test@example.com', got %v", result) - } -} - -// Test the priority management functions -func TestRuleManager_GetRuleByPriority(t *testing.T) { - rm := NewRuleManager() - - rm.AddRule("High1", `high1`, "test", "High 1", token.TokenWord, 100, []string{"high1"}) - rm.AddRule("High2", `high2`, "test", "High 2", token.TokenWord, 100, []string{"high2"}) - rm.AddRule("Medium", `medium`, "test", "Medium", token.TokenWord, 50, []string{"medium"}) - - highRules := rm.GetRuleByPriority(100) - if len(highRules) != 2 { - t.Errorf("Expected 2 rules with priority 100, got %d", len(highRules)) - } - - mediumRules := rm.GetRuleByPriority(50) - if len(mediumRules) != 1 { - t.Errorf("Expected 1 rule with priority 50, got %d", len(mediumRules)) - } - - noRules := rm.GetRuleByPriority(999) - if len(noRules) != 0 { - t.Errorf("Expected 0 rules with priority 999, got %d", len(noRules)) - } -} - -func TestRuleManager_GetHighestPriorityRules(t *testing.T) { - rm := NewRuleManager() - - // Empty rule manager - highRules := rm.GetHighestPriorityRules() - if len(highRules) != 0 { - t.Errorf("Expected 0 highest priority rules for empty manager, got %d", len(highRules)) - } - - rm.AddRule("High1", `high1`, "test", "High 1", token.TokenWord, 100, []string{"high1"}) - rm.AddRule("High2", `high2`, "test", "High 2", token.TokenWord, 100, []string{"high2"}) - rm.AddRule("Medium", `medium`, "test", "Medium", token.TokenWord, 50, []string{"medium"}) - - highRules = rm.GetHighestPriorityRules() - if len(highRules) != 2 { - t.Errorf("Expected 2 highest priority rules, got %d", len(highRules)) - } - - for _, rule := range highRules { - if rule.Priority != 100 { - t.Errorf("Expected priority 100, got %d", rule.Priority) - } - } -} - -func TestRuleManager_UpdateRulePriority(t *testing.T) { - rm := NewRuleManager() - - rm.AddRule("TestRule", `test`, "test", "Test", token.TokenWord, 50, []string{"test"}) - - err := rm.UpdateRulePriority("TestRule", 100) - if err != nil { - t.Fatalf("Failed to update rule priority: %v", err) - } - - rule := rm.GetRule("TestRule") - if rule == nil { - t.Fatal("Rule not found after priority update") - } - if rule.Priority != 100 { - t.Errorf("Expected priority 100, got %d", rule.Priority) - } - - // Test updating non-existent rule - err = rm.UpdateRulePriority("NonExistent", 200) - if err == nil { - t.Error("Expected error when updating non-existent rule") - } -} - -func TestRuleManager_CategoryDescription(t *testing.T) { - rm := NewRuleManager() - - // Test empty description - desc := rm.GetCategoryDescription("network") - if desc != "" { - t.Errorf("Expected empty description for non-existent category, got '%s'", desc) - } - - // Set category description - rm.SetCategoryDescription("network", "Network-related rules") - desc = rm.GetCategoryDescription("network") - if desc != "Network-related rules" { - t.Errorf("Expected 'Network-related rules', got '%s'", desc) - } - - // Add a rule to existing category and check description is preserved - rm.AddRule("IPv4", `ipv4`, "network", "IPv4", token.TokenIPv4, 100, []string{"ipv4"}) - desc = rm.GetCategoryDescription("network") - if desc != "Network-related rules" { - t.Errorf("Expected description to be preserved, got '%s'", desc) - } - - // Update existing category description - rm.SetCategoryDescription("network", "Updated network description") - desc = rm.GetCategoryDescription("network") - if desc != "Updated network description" { - t.Errorf("Expected 'Updated network description', got '%s'", desc) - } -} - -// Test global functions that provide external access to terminal rules -func TestGlobalTerminalRuleFunctions(t *testing.T) { - // Test GetTerminalRules - rules := GetTerminalRules() - if len(rules) == 0 { - t.Error("Expected GetTerminalRules to return non-empty list") - } - - // Test GetRulesByCategory - networkRules := GetRulesByCategory("network") - if len(networkRules) == 0 { - t.Error("Expected GetRulesByCategory('network') to return rules") - } - - // Test GetRuleCategories - categories := GetRuleCategories() - if len(categories) == 0 { - t.Error("Expected GetRuleCategories to return non-empty list") - } - - // Test AddTerminalRule - err := AddTerminalRule( - "TestGlobalRule", - `^test$`, - "test", - "Global test rule", - token.TokenWord, - 25, - []string{"test"}, - ) - if err != nil { - t.Errorf("Failed to add terminal rule: %v", err) - } - - // Verify the rule was added - allRules := GetTerminalRules() - found := false - for _, rule := range allRules { - if rule.Name == "TestGlobalRule" { - found = true - break - } - } - if !found { - t.Error("TestGlobalRule not found after adding") - } - - // Test GetRuleStats - stats := GetRuleStats() - if stats.TotalRules == 0 { - t.Error("Expected GetRuleStats to return non-zero total rules") - } + assert.Equal(t, token.TokenEmail, result, "Expected Email token for 'test@example.com'") } diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go index 84eae7ae8ff9..b45943ae4964 100644 --- a/pkg/logs/patterns/automaton/tokenizer.go +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -8,9 +8,11 @@ package automaton import ( + "fmt" "unicode" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" + "github.com/DataDog/datadog-agent/pkg/util/log" ) // TokenizerState represents the current state of the FSA @@ -24,6 +26,15 @@ const ( StateSpecial // Operators, punctuation, symbols ) +const ( + // These numbers could be ran with some more testing on more log samples to optimize these values. + // tokenizerBufferCapacity is the initial capacity for the rune buffer. + tokenizerBufferCapacity = 128 + + // tokenizerTokensCapacity is the initial capacity for the tokens slice. + tokenizerTokensCapacity = 24 +) + // Tokenizer implements a finite state automaton for log tokenization type Tokenizer struct { input string @@ -41,8 +52,8 @@ func NewTokenizer(input string) *Tokenizer { pos: 0, length: len(input), state: StateStart, - buffer: make([]rune, 0, 64), // Pre-allocate buffer - tokens: make([]token.Token, 0, 32), // Pre-allocate tokens slice + buffer: make([]rune, 0, tokenizerBufferCapacity), + tokens: make([]token.Token, 0, tokenizerTokensCapacity), } } @@ -60,37 +71,48 @@ func (t *Tokenizer) Tokenize() *token.TokenList { return token.NewTokenListWithTokens(t.tokens) } -// classifyTokens applies terminal rules for token classification +// classifyTokens upgrades generic tokens to specific types. +// The FSA first creates generic tokens (TokenWord, TokenNumeric), then this function uses +// pattern matching to identify structured types: +// - "192.168.1.1" → TokenNumeric upgraded to TokenIPv4 +// - "user@example.com" → TokenWord upgraded to TokenEmail +// - "GET" → TokenWord upgraded to TokenHttpMethod func (t *Tokenizer) classifyTokens() { for i, tok := range t.tokens { - // Only classify word-like and numeric tokens that might be structured - if tok.Type != token.TokenWord && tok.Type != token.TokenNumeric { + // Skip if not eligible for classification + if !t.shouldClassify(&tok) { continue } - // Skip classification for punctuation (already marked as NotWildcard in createSpecialToken) - if tok.Wildcard == token.NotWildcard { + // identify specific structured types (IP, Email, Date, HTTP, etc.) + // fallback to word token if can't upgrade to specific type + classifiedType, err := t.classifyToken(tok.Value) + if err != nil { + log.Warnf("Failed to classify token '%s': %v. Falling back to word token type", tok.Value, err) continue } - classifiedType := t.classifyToken(tok.Value) - - // If classification returns TokenWord or TokenUnknown, keep current state - // TokenWord = "generic word, no specific classification" - // TokenUnknown = "should not happen, but keep current state" - if classifiedType == token.TokenWord || classifiedType == token.TokenUnknown { + // fallback to word token if can't upgrade to specific type + if classifiedType == token.TokenWord { continue } - // Update token type to the more specific classification + // Upgrade token to the more specific type t.tokens[i].Type = classifiedType - - // Set wildcard potential based on classified type t.tokens[i].Wildcard = getWildcardPotential(classifiedType) - } } +// shouldClassify determines if a token is eligible for pattern-based classification. +// Returns true only for generic Word/Numeric tokens that are PotentialWildcard. +// Excludes: whitespace, punctuation (NotWildcard) +func (t *Tokenizer) shouldClassify(tok *token.Token) bool { + isGenericType := tok.Type == token.TokenWord || tok.Type == token.TokenNumeric + canVary := tok.Wildcard != token.NotWildcard + + return isGenericType && canVary +} + // processNextToken advances the automaton by one token func (t *Tokenizer) processNextToken() bool { if t.pos >= t.length { @@ -149,7 +171,7 @@ func (t *Tokenizer) handleWordState(char rune) bool { } // handleNumericState processes numeric tokens -// Allows digits and special chars for dates (2024-01-15), times (10:30:45), IPs (192.168.1.1) +// Allows digits and special chars for dates (2024-01-15), times (10:30:45) or IPs (192.168.1.1) func (t *Tokenizer) handleNumericState(char rune) bool { switch { case unicode.IsDigit(char), char == '.', char == '-', char == '/', char == ':': @@ -187,10 +209,12 @@ func (t *Tokenizer) handleSpecialState(char rune) bool { return true } -// classifyToken attempts to classify a single token's type using terminal rules -// Takes a token value and returns a more specific type if a rule matches, or TokenUnknown -func (t *Tokenizer) classifyToken(value string) token.TokenType { - return globalTrie.Match(value) +// classifyToken attempts to classify a single token's type using terminal rules. +func (t *Tokenizer) classifyToken(value string) (token.TokenType, error) { + if len(value) == 0 { + return token.TokenUnknown, fmt.Errorf("cannot classify empty srting token value") + } + return globalTrie.Match(value), nil } // getWildcardPotential determines if a token type can potentially become a wildcard @@ -203,7 +227,6 @@ func getWildcardPotential(tokenType token.TokenType) token.WildcardStatus { } // Everything else can potentially become wildcards - // Dates wildcard if they have the same format (both TokenDate means same structure) return token.PotentialWildcard } diff --git a/pkg/logs/patterns/automaton/tokenizer_test.go b/pkg/logs/patterns/automaton/tokenizer_test.go new file mode 100644 index 000000000000..182f5a4b4088 --- /dev/null +++ b/pkg/logs/patterns/automaton/tokenizer_test.go @@ -0,0 +1,360 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package automaton + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TestTokenizer_SimpleTokenization tests basic tokenization and type classification +func TestTokenizer_SimpleTokenization(t *testing.T) { + input := "GET /api 200" + tokenizer := NewTokenizer(input) + tokenList := tokenizer.Tokenize() + + assert.NotEqual(t, 0, tokenList.Length(), "Expected tokens, got empty list") + + // Should have: GET, whitespace, /api, whitespace, 200 + assert.Equal(t, 5, tokenList.Length(), "Expected 5 tokens") + + // Verify token types + expectedTypes := []token.TokenType{ + token.TokenHttpMethod, // GET + token.TokenWhitespace, // space + token.TokenAbsolutePath, // /api + token.TokenWhitespace, // space + token.TokenHttpStatus, // 200 + } + + for i, expected := range expectedTypes { + if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) { + assert.Equal(t, expected, tokenList.Tokens[i].Type, + "Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected) + } + } +} + +// TestTokenizer_StateTransitions tests the state transitions of the tokenizer +func TestTokenizer_StateTransitions(t *testing.T) { + tests := []struct { + input string + expectedStates []TokenizerState + description string + }{ + {"GET", []TokenizerState{StateStart, StateWord}, "Simple word"}, + {"123", []TokenizerState{StateStart, StateNumeric}, "Simple numeric"}, + {" ", []TokenizerState{StateStart, StateWhitespace}, "Single whitespace"}, + {"/path", []TokenizerState{StateStart, StateWord}, "Path starts as word character"}, + {"192.168.1.1", []TokenizerState{StateStart, StateNumeric}, "IPv4 stays in numeric state initially"}, + } + + for _, test := range tests { + tokenizer := NewTokenizer(test.input) + + // Capture state transitions + var states []TokenizerState + states = append(states, tokenizer.state) + + for tokenizer.pos < tokenizer.length { + if !tokenizer.processNextToken() { + break + } + states = append(states, tokenizer.state) + } + + // For simple cases, check exact sequence + if test.input != "192.168.1.1" { + assert.GreaterOrEqual(t, len(states), len(test.expectedStates), + "Input '%s' (%s): expected at least %d states", test.input, test.description, len(test.expectedStates)) + + // Check that expected states appear in sequence + for i, expected := range test.expectedStates { + if assert.Less(t, i, len(states), "State %d should exist for input '%s'", i, test.input) { + assert.Equal(t, expected, states[i], + "Input '%s' (%s): expected state %d to be %v", test.input, test.description, i, expected) + } + } + } else { + // For IPv4 with simplified FSA, check basic state transitions + hasStart := false + hasNumeric := false + + for _, state := range states { + switch state { + case StateStart: + hasStart = true + case StateNumeric: + hasNumeric = true + } + } + + assert.True(t, hasStart, "IPv4 test: expected to see StateStart") + assert.True(t, hasNumeric, "IPv4 test: expected to see StateNumeric") + } + } +} + +// TestTokenTypePreservation tests that TokenNumeric stays TokenNumeric when no pattern matches +// This is critical: classification should upgrade OR preserve, never downgrade +func TestTokenTypePreservation(t *testing.T) { + // Test that generic number stays TokenNumeric (not downgraded to TokenWord) + tokenList := TokenizeString("User 12345 logged in") + + // Find the numeric token + var numericToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == "12345" { + numericToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, numericToken, "Expected to find numeric token '12345'") + + // Should stay TokenNumeric, not become TokenWord + if numericToken != nil { + assert.Equal(t, token.TokenNumeric, numericToken.Type, + "Token '12345' should stay TokenNumeric") + } + + // Test that numeric upgrades when pattern matches + tokenList = TokenizeString("User 192.168.1.1 logged in") + + // Find the IP token + var ipToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == "192.168.1.1" { + ipToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, ipToken, "Expected to find IP token '192.168.1.1'") + + // Should be upgraded to TokenIPv4 + if ipToken != nil { + assert.Equal(t, token.TokenIPv4, ipToken.Type, + "Token '192.168.1.1' should be TokenIPv4") + } +} + +// TestWildcardStatus tests that tokens are correctly marked as NotWildcard or PotentialWildcard +func TestWildcardStatus(t *testing.T) { + tests := []struct { + input string + tokenValue string + expectedWildcard token.WildcardStatus + description string + }{ + {" ", " ", token.NotWildcard, "Whitespace should be NotWildcard"}, + {":", ":", token.NotWildcard, "Punctuation should be NotWildcard"}, + {"hello", "hello", token.PotentialWildcard, "Generic word should be PotentialWildcard"}, + {"12345", "12345", token.PotentialWildcard, "Generic number should be PotentialWildcard"}, + {"INFO User logged in", "INFO", token.PotentialWildcard, "Severity level should be PotentialWildcard"}, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + var targetToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Value == test.tokenValue { + targetToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, targetToken, "Expected to find token '%s'", test.tokenValue) + + if targetToken != nil { + assert.Equal(t, test.expectedWildcard, targetToken.Wildcard, test.description) + } + }) + } +} + +// Test the complete data flow +func TestArchitectureCompliance(t *testing.T) { + // Test the exact call graph + // automaton.TokenizeString → NewTokenizer → Tokenizer.Tokenize → processNextToken → classifyToken → globalTrie.Match + + input := "GET /api/users 200" + + // Step 1: automaton.TokenizeString (main entry point) + tokenList := TokenizeString(input) + + // Verify TokenList creation + assert.NotNil(t, tokenList, "TokenizeString returned nil") + + // Step 2: Verify token classification used globalTrie.Match + var httpMethod, httpStatus, path *token.Token + + for i := range tokenList.Tokens { + switch tokenList.Tokens[i].Type { + case token.TokenHttpMethod: + httpMethod = &tokenList.Tokens[i] + case token.TokenHttpStatus: + httpStatus = &tokenList.Tokens[i] + case token.TokenAbsolutePath: + path = &tokenList.Tokens[i] + } + } + + if assert.NotNil(t, httpMethod, "HTTP method token not found - trie classification failed") { + assert.Equal(t, "GET", httpMethod.Value, "Expected HTTP method 'GET'") + } + + if assert.NotNil(t, httpStatus, "HTTP status token not found - trie classification failed") { + assert.Equal(t, "200", httpStatus.Value, "Expected HTTP status '200'") + } + + if assert.NotNil(t, path, "Path token not found - state machine failed") { + assert.Equal(t, "/api/users", path.Value, "Expected path '/api/users'") + } + + // Step 3: Verify signature generation works + signature := token.NewSignature(tokenList) + assert.False(t, signature.IsEmpty(), "Signature generation failed") + + expectedPosition := "HttpMethod|Whitespace|AbsolutePath|Whitespace|HttpStatus" + assert.Equal(t, expectedPosition, signature.Position, "Signature position mismatch") +} + +// TestComplexLogScenarios tests complex log scenarios +func TestComplexLogScenarios(t *testing.T) { + tests := []struct { + name string + input string + expected []token.TokenType + }{ + { + name: "HTTP Request", + input: "GET /api/users 200", + expected: []token.TokenType{ + token.TokenHttpMethod, token.TokenWhitespace, + token.TokenAbsolutePath, token.TokenWhitespace, + token.TokenHttpStatus, + }, + }, + { + name: "Error Message", + input: "ERROR Database connection failed", + expected: []token.TokenType{ + token.TokenSeverityLevel, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, + }, + }, + { + name: "User Login", + input: "INFO User 12345 logged in", + expected: []token.TokenType{ + token.TokenSeverityLevel, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenNumeric, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenWord, + }, + }, + { + name: "Complex with Email and IP", + input: "user@domain.com from 192.168.1.1", + expected: []token.TokenType{ + token.TokenEmail, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenIPv4, + }, + }, + { + name: "URL with Scheme", + input: "Visit https://example.com/docs", + expected: []token.TokenType{ + token.TokenWord, token.TokenWhitespace, + token.TokenURI, + }, + }, + { + name: "Date in Context", + input: "Event on 2024-01-15", + expected: []token.TokenType{ + token.TokenWord, token.TokenWhitespace, + token.TokenWord, token.TokenWhitespace, + token.TokenDate, + }, + }, + { + name: "False Positive - Single @ is not Email", + input: "Price @ $10 each", + expected: []token.TokenType{ + token.TokenWord, // Price + token.TokenWhitespace, // space + token.TokenWord, // @ (with trailing space) + token.TokenWord, // $1 + token.TokenNumeric, // 0 + token.TokenWhitespace, // space + token.TokenWord, // each + }, + }, + { + name: "False Positive - Division operator is not Path", + input: "Calculate 10 / 2 = 5", + expected: []token.TokenType{ + token.TokenWord, // Calculate + token.TokenWhitespace, // space + token.TokenNumeric, // 10 + token.TokenWhitespace, // space + token.TokenWord, // / + token.TokenWhitespace, // space + token.TokenNumeric, // 2 + token.TokenWhitespace, // space + token.TokenWord, // = (with trailing space) + token.TokenNumeric, // 5 + }, + }, + { + name: "False Positive - Phone number is not Date", + input: "Phone: 123-456-7890", + expected: []token.TokenType{ + token.TokenWord, // Phone + token.TokenWord, // : (with trailing space) + token.TokenNumeric, // 123-456-7890 stays numeric, not date + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + assert.Equal(t, len(test.expected), tokenList.Length(), + "Expected %d tokens, got: %v", len(test.expected), tokenTypesToString(tokenList.Tokens)) + + for i, expected := range test.expected { + if assert.Less(t, i, tokenList.Length(), "Token %d should exist", i) { + assert.Equal(t, expected, tokenList.Tokens[i].Type, + "Token %d (value: '%s') should be type %v", i, tokenList.Tokens[i].Value, expected) + } + } + }) + } +} + +// =============================== +// Helper functions +// =============================== +func tokenTypesToString(tokens []token.Token) []string { + result := make([]string, len(tokens)) + for i, tok := range tokens { + result[i] = tok.String() + } + return result +} diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index 0e5473a9d591..12be88b1ac89 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -99,7 +99,7 @@ func (c *Cluster) GeneratePattern() *token.TokenList { primaryGroup = group } } - // TODO: Log warning that cluster is heterogeneous and we're only using primary group + // TODO: Need to handle semantic mergeability of different patterns in the group } else { primaryGroup = groups[0] } @@ -146,7 +146,7 @@ func (c *Cluster) GeneratePattern() *token.TokenList { return c.Pattern } -// GetWildcardPositions returns wildcard positions. +// GetWildcardPositions returns wildcard token positions (indices in token array). func (c *Cluster) GetWildcardPositions() []int { if c.Pattern == nil { c.GeneratePattern() @@ -160,6 +160,34 @@ func (c *Cluster) GetWildcardPositions() []int { return positions } +// GetWildcardCharPositions returns character positions where wildcards appear in the pattern string. +// This is used for stateful encoding where the intake needs to know where to insert dynamic values. +func (c *Cluster) GetWildcardCharPositions() []int { + if c.Pattern == nil { + c.GeneratePattern() + } + + var charPositions []int + currentPos := 0 + + for _, tok := range c.Pattern.Tokens { + // Clean the token value for proper length calculation + cleaned := sanitizeForTemplate(tok.Value) + + if tok.Wildcard == token.IsWildcard { + // Record the current character position for this wildcard + charPositions = append(charPositions, currentPos) + // Wildcard is represented as "*" (1 character) + currentPos += 1 + } else if cleaned != "" { + // Add the length of the cleaned token value + currentPos += len(cleaned) + } + } + + return charPositions +} + // HasWildcards returns true if this cluster contains wildcard positions. func (c *Cluster) HasWildcards() bool { if c.Pattern == nil { @@ -169,6 +197,31 @@ func (c *Cluster) HasWildcards() bool { return len(c.WildcardMap) > 0 } +// GetWildcardValues extracts the actual values from the most recent token list that correspond to wildcard positions +func (c *Cluster) GetWildcardValues() []string { + if c.Pattern == nil { + c.GeneratePattern() + } + + // Get the most recent token list + if len(c.TokenLists) == 0 { + return nil + } + lastTokenList := c.TokenLists[len(c.TokenLists)-1] + + // Extract values at wildcard positions + var values []string + for i, tok := range c.Pattern.Tokens { + if tok.Wildcard == token.IsWildcard { + if i < len(lastTokenList.Tokens) { + values = append(values, lastTokenList.Tokens[i].Value) + } + } + } + + return values +} + // ExtractWildcardValues extracts the wildcard values from a specific TokenList func (c *Cluster) ExtractWildcardValues(tokenList *token.TokenList) []string { if c.Pattern == nil { @@ -205,12 +258,29 @@ func (c *Cluster) GetPatternString() string { if tok.Wildcard == token.IsWildcard { parts = append(parts, "*") } else { - parts = append(parts, tok.Value) + // Only use printable ASCII/UTF-8 characters in the template + cleaned := sanitizeForTemplate(tok.Value) + if cleaned != "" { + parts = append(parts, cleaned) + } } } return strings.Join(parts, "") } +// sanitizeForTemplate removes non-printable characters from template strings +func sanitizeForTemplate(s string) string { + runes := []rune(s) + result := make([]rune, 0, len(runes)) + for _, r := range runes { + // Keep only printable characters (space and above, excluding DEL) + if r >= ' ' && r != 0x7F && r < 0xFFFD { + result = append(result, r) + } + } + return string(result) +} + // GetPatternID returns the pattern ID for this cluster func (c *Cluster) GetPatternID() uint64 { return c.PatternID diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go new file mode 100644 index 000000000000..96ab52c9e5a3 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -0,0 +1,216 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "crypto/rand" + "encoding/binary" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// PatternChangeType indicates what changed when adding a TokenList to the cluster manager +type PatternChangeType int + +const ( + // PatternNoChange means the TokenList was added to an existing cluster without structural changes + PatternNoChange PatternChangeType = iota + // PatternNew means a brand new pattern was created (first time seeing this signature) + PatternNew + // PatternUpdated means an existing pattern's structure changed (more wildcards added) + PatternUpdated +) + +// ClusterManager manages the clustering of TokenLists using hash-based bucketing. +type ClusterManager struct { + hashBuckets map[uint64][]*Cluster + totalTokenLists int + totalClusters int +} + +// NewClusterManager creates a new ClusterManager. +func NewClusterManager() *ClusterManager { + return &ClusterManager{ + hashBuckets: make(map[uint64][]*Cluster), + totalTokenLists: 0, + totalClusters: 0, + } +} + +// Add processes a TokenList and adds it to the appropriate cluster. +// Returns the cluster and a PatternChangeType indicating what changed. +func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Cluster, PatternChangeType) { + if tokenList == nil || tokenList.IsEmpty() { + return nil, PatternNoChange + } + + signature := token.NewSignature(tokenList) + hash := signature.Hash + + clusters := cm.hashBuckets[hash] + + for _, cluster := range clusters { + if cluster.Signature.Equals(signature) { + // Check if pattern will be updated + // If cluster already has a pattern and we're adding more token lists, + // the pattern might gain new wildcards + willUpdate := cluster.Size() > 1 && cluster.Pattern != nil + + cluster.Add(tokenList) + cm.totalTokenLists++ + + if willUpdate { + return cluster, PatternUpdated + } + return cluster, PatternNoChange + } + } + + // Creating a new cluster means a new pattern + newCluster := NewCluster(signature, tokenList) + newCluster.SetPatternID(generatePatternID()) + cm.hashBuckets[hash] = append(clusters, newCluster) + + cm.totalTokenLists++ + cm.totalClusters++ + + return newCluster, PatternNew +} + +// GetCluster retrieves the cluster with the given signature. +func (cm *ClusterManager) GetCluster(signature token.Signature) *Cluster { + hash := signature.Hash + + clusters, exists := cm.hashBuckets[hash] + if !exists { + return nil + } + + for _, cluster := range clusters { + if cluster.Signature.Equals(signature) { + return cluster + } + } + + return nil +} + +// GetClustersWithPatterns returns all clusters that have patterns defined. +// This is useful for re-sending pattern state after stream rotation. +func (cm *ClusterManager) GetClustersWithPatterns() []*Cluster { + var clustersWithPatterns []*Cluster + + for _, clusters := range cm.hashBuckets { + for _, cluster := range clusters { + // Only include clusters with actual patterns + if cluster.Pattern != nil { + clustersWithPatterns = append(clustersWithPatterns, cluster) + } + } + } + + return clustersWithPatterns +} + +// Clear removes all clusters and resets statistics. +func (cm *ClusterManager) Clear() { + cm.hashBuckets = make(map[uint64][]*Cluster) + cm.totalTokenLists = 0 + cm.totalClusters = 0 +} + +// GetAllClusters returns all clusters in the manager. +func (cm *ClusterManager) GetAllClusters() []*Cluster { + var allClusters []*Cluster + + for _, clusters := range cm.hashBuckets { + allClusters = append(allClusters, clusters...) + } + + return allClusters +} + +// GetClustersByLength returns clusters by length. +func (cm *ClusterManager) GetClustersByLength(length int) []*Cluster { + var result []*Cluster + + for _, clusters := range cm.hashBuckets { + for _, cluster := range clusters { + if cluster.Signature.Length == length { + result = append(result, cluster) + } + } + } + + return result +} + +// GetClustersByHash returns clusters by hash. +func (cm *ClusterManager) GetClustersByHash(hash uint64) []*Cluster { + if clusters, exists := cm.hashBuckets[hash]; exists { + result := make([]*Cluster, len(clusters)) + copy(result, clusters) + return result + } + + return []*Cluster{} +} + +// Stats returns statistics about the clustering. +type ClusterStats struct { + TotalTokenLists int + TotalClusters int + HashBuckets int + AverageClusterSize float64 +} + +// GetStats returns current clustering statistics. +func (cm *ClusterManager) GetStats() ClusterStats { + avgSize := 0.0 + if cm.totalClusters > 0 { + avgSize = float64(cm.totalTokenLists) / float64(cm.totalClusters) + } + + return ClusterStats{ + TotalTokenLists: cm.totalTokenLists, + TotalClusters: cm.totalClusters, + HashBuckets: len(cm.hashBuckets), + AverageClusterSize: avgSize, + } +} + +// GetLargestClusters returns the N largest clusters. +func (cm *ClusterManager) GetLargestClusters(n int) []*Cluster { + allClusters := cm.GetAllClusters() + + // Simple bubble sort for small N + for i := 0; i < len(allClusters)-1; i++ { + for j := 0; j < len(allClusters)-i-1; j++ { + if allClusters[j].Size() < allClusters[j+1].Size() { + allClusters[j], allClusters[j+1] = allClusters[j+1], allClusters[j] + } + } + } + + if n > len(allClusters) { + n = len(allClusters) + } + + return allClusters[:n] +} + +// generatePatternID generates a unique pattern ID +func generatePatternID() uint64 { + var buf [8]byte + _, err := rand.Read(buf[:]) + if err != nil { + return uint64(time.Now().UnixNano()) + } + return binary.BigEndian.Uint64(buf[:]) +} diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go new file mode 100644 index 000000000000..0ade17a247a6 --- /dev/null +++ b/pkg/logs/patterns/clustering/cluster_manager_test.go @@ -0,0 +1,489 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestClusterManager_NewClusterManager(t *testing.T) { + cm := NewClusterManager() + + if cm == nil { + t.Fatal("ClusterManager should not be nil") + } + + stats := cm.GetStats() + if stats.TotalTokenLists != 0 || stats.TotalClusters != 0 || stats.HashBuckets != 0 { + t.Error("New ClusterManager should have zero stats") + } +} + +func TestClusterManager_Add_NewCluster(t *testing.T) { + cm := NewClusterManager() + + // Create TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + + cluster, changeType := cm.Add(tokenList) + + if cluster == nil { + t.Fatal("Should return a cluster") + } + + if cluster.Size() != 1 { + t.Errorf("Cluster should have size 1, got %d", cluster.Size()) + } + + if changeType != PatternNew { + t.Errorf("Expected PatternNew for first add, got %v", changeType) + } + + stats := cm.GetStats() + if stats.TotalTokenLists != 1 || stats.TotalClusters != 1 { + t.Errorf("Expected 1 TokenList and 1 cluster, got %d TokenLists and %d clusters", + stats.TotalTokenLists, stats.TotalClusters) + } +} + +func TestClusterManager_Add_ExistingCluster(t *testing.T) { + cm := NewClusterManager() + + // Create two TokenLists with same signature + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, // Different value, same type + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, // Different value, same type + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster1, changeType1 := cm.Add(tokenList1) + cluster2, changeType2 := cm.Add(tokenList2) + + // Should be the same cluster + if cluster1 != cluster2 { + t.Error("TokenLists with same signature should go to same cluster") + } + + if cluster1.Size() != 2 { + t.Errorf("Cluster should have size 2, got %d", cluster1.Size()) + } + + if changeType1 != PatternNew { + t.Errorf("Expected PatternNew for first add, got %v", changeType1) + } + + if changeType2 != PatternNoChange { + t.Errorf("Expected PatternNoChange for second add to same cluster, got %v", changeType2) + } + + stats := cm.GetStats() + if stats.TotalTokenLists != 2 || stats.TotalClusters != 1 { + t.Errorf("Expected 2 TokenLists and 1 cluster, got %d TokenLists and %d clusters", + stats.TotalTokenLists, stats.TotalClusters) + } +} + +func TestClusterManager_Add_DifferentSignatures(t *testing.T) { + cm := NewClusterManager() + + // Create TokenLists with different signatures + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, // Different type + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, // Different type + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster1, _ := cm.Add(tokenList1) + cluster2, _ := cm.Add(tokenList2) + + // Should be different clusters + if cluster1 == cluster2 { + t.Error("TokenLists with different signatures should go to different clusters") + } + + stats := cm.GetStats() + if stats.TotalTokenLists != 2 || stats.TotalClusters != 2 { + t.Errorf("Expected 2 TokenLists and 2 clusters, got %d TokenLists and %d clusters", + stats.TotalTokenLists, stats.TotalClusters) + } +} + +func TestClusterManager_GetCluster(t *testing.T) { + cm := NewClusterManager() + + // Create and add TokenList + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + + addedCluster, _ := cm.Add(tokenList) + + // Retrieve cluster by signature + retrievedCluster := cm.GetCluster(signature) + + if retrievedCluster != addedCluster { + t.Error("Retrieved cluster should be the same as added cluster") + } + + // Try to get non-existent cluster + differentTokens := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + differentTokenList := token.NewTokenListWithTokens(differentTokens) + differentSignature := token.NewSignature(differentTokenList) + + nonExistentCluster := cm.GetCluster(differentSignature) + if nonExistentCluster != nil { + t.Error("Should return nil for non-existent cluster") + } +} + +func TestClusterManager_GetAllClusters(t *testing.T) { + cm := NewClusterManager() + + // Add multiple clusters + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + tokens3 := []token.Token{ + {Value: "192.168.1.1", Type: token.TokenIPv4}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connected", Type: token.TokenWord}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + + cm.Add(tokenList1) + cm.Add(tokenList2) + cm.Add(tokenList3) + + allClusters := cm.GetAllClusters() + + if len(allClusters) != 3 { + t.Errorf("Expected 3 clusters, got %d", len(allClusters)) + } +} + +func TestClusterManager_GetClustersByLength(t *testing.T) { + cm := NewClusterManager() + + // Add TokenLists of different lengths with different signatures + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + } // Length 2 + + tokens2 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } // Length 3 + + tokens3 := []token.Token{ + {Value: "192.168.1.1", Type: token.TokenIPv4}, + {Value: " ", Type: token.TokenWhitespace}, + } // Length 2 (different signature than tokens1) + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + + cm.Add(tokenList1) + cm.Add(tokenList2) + cm.Add(tokenList3) + + // Get clusters of length 2 - should have 2 different clusters + length2Clusters := cm.GetClustersByLength(2) + if len(length2Clusters) != 2 { + t.Errorf("Expected 2 clusters of length 2, got %d", len(length2Clusters)) + } + + // Get clusters of length 3 + length3Clusters := cm.GetClustersByLength(3) + if len(length3Clusters) != 1 { + t.Errorf("Expected 1 cluster of length 3, got %d", len(length3Clusters)) + } + + // Get clusters of non-existent length + length5Clusters := cm.GetClustersByLength(5) + if len(length5Clusters) != 0 { + t.Errorf("Expected 0 clusters of length 5, got %d", len(length5Clusters)) + } +} + +func TestClusterManager_GetLargestClusters(t *testing.T) { + cm := NewClusterManager() + + // Create clusters of different sizes + // Cluster 1: size 3 + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList1a := token.NewTokenListWithTokens(tokens1) + tokenList1b := token.NewTokenListWithTokens([]token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + }) + tokenList1c := token.NewTokenListWithTokens([]token.Token{ + {Value: "PUT", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/items", Type: token.TokenAbsolutePath}, + }) + + // Cluster 2: size 1 + tokens2 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + tokenList2 := token.NewTokenListWithTokens(tokens2) + + // Cluster 3: size 2 + tokens3 := []token.Token{ + {Value: "192.168.1.1", Type: token.TokenIPv4}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connected", Type: token.TokenWord}, + } + tokenList3a := token.NewTokenListWithTokens(tokens3) + tokenList3b := token.NewTokenListWithTokens([]token.Token{ + {Value: "10.0.0.1", Type: token.TokenIPv4}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "disconnected", Type: token.TokenWord}, + }) + + cm.Add(tokenList1a) + cm.Add(tokenList1b) + cm.Add(tokenList1c) + cm.Add(tokenList2) + cm.Add(tokenList3a) + cm.Add(tokenList3b) + + // Get top 2 largest clusters + largest := cm.GetLargestClusters(2) + + if len(largest) != 2 { + t.Errorf("Expected 2 largest clusters, got %d", len(largest)) + } + + // Should be ordered by size (largest first) + if largest[0].Size() != 3 { + t.Errorf("Largest cluster should have size 3, got %d", largest[0].Size()) + } + + if largest[1].Size() != 2 { + t.Errorf("Second largest cluster should have size 2, got %d", largest[1].Size()) + } +} + +func TestClusterManager_Clear(t *testing.T) { + cm := NewClusterManager() + + // Add some data + tokens := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokenList := token.NewTokenListWithTokens(tokens) + cm.Add(tokenList) + + // Verify data exists + stats := cm.GetStats() + if stats.TotalTokenLists == 0 || stats.TotalClusters == 0 { + t.Error("Should have data before clear") + } + + // Clear + cm.Clear() + + // Verify data is gone + stats = cm.GetStats() + if stats.TotalTokenLists != 0 || stats.TotalClusters != 0 || stats.HashBuckets != 0 { + t.Error("Should have no data after clear") + } + + allClusters := cm.GetAllClusters() + if len(allClusters) != 0 { + t.Error("Should have no clusters after clear") + } +} + +func TestClusterManager_Stats(t *testing.T) { + cm := NewClusterManager() + + // Add TokenLists to create clusters of different sizes + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/users", Type: token.TokenAbsolutePath}, + } + tokens3 := []token.Token{ + {Value: "ERROR", Type: token.TokenSeverityLevel}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + + cm.Add(tokenList1) + cm.Add(tokenList2) // Same cluster as tokenList1 + cm.Add(tokenList3) // Different cluster + + stats := cm.GetStats() + + if stats.TotalTokenLists != 3 { + t.Errorf("Expected 3 total TokenLists, got %d", stats.TotalTokenLists) + } + + if stats.TotalClusters != 2 { + t.Errorf("Expected 2 total clusters, got %d", stats.TotalClusters) + } + + expectedAvg := 3.0 / 2.0 // 3 TokenLists / 2 clusters + if stats.AverageClusterSize != expectedAvg { + t.Errorf("Expected average cluster size %.2f, got %.2f", expectedAvg, stats.AverageClusterSize) + } + + if stats.HashBuckets == 0 { + t.Error("Should have at least one hash bucket") + } +} + +func TestClusterManager_PatternChangeType(t *testing.T) { + cm := NewClusterManager() + + // Create token lists with same signature (HTTP method, space, path) + tokens1 := []token.Token{ + {Value: "GET", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/users", Type: token.TokenAbsolutePath}, + } + tokens2 := []token.Token{ + {Value: "POST", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/orders", Type: token.TokenAbsolutePath}, + } + tokens3 := []token.Token{ + {Value: "PUT", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/items", Type: token.TokenAbsolutePath}, + } + tokens4 := []token.Token{ + {Value: "DELETE", Type: token.TokenHttpMethod}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "/api/products", Type: token.TokenAbsolutePath}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) + tokenList4 := token.NewTokenListWithTokens(tokens4) + + // First add - should create a new pattern + cluster1, changeType1 := cm.Add(tokenList1) + if changeType1 != PatternNew { + t.Errorf("Expected PatternNew for first add, got %v", changeType1) + } + t.Logf("āœ… Add #1: PatternNew (created cluster with PatternID=%d)", cluster1.GetPatternID()) + + // Second add - same signature, but pattern not yet generated, so no change + cluster2, changeType2 := cm.Add(tokenList2) + if changeType2 != PatternNoChange { + t.Errorf("Expected PatternNoChange for second add, got %v", changeType2) + } + if cluster1 != cluster2 { + t.Error("Should return same cluster for same signature") + } + t.Logf("āœ… Add #2: PatternNoChange (added to existing cluster, size=%d)", cluster2.Size()) + + // Generate pattern to set up for PatternUpdated + pattern := cluster2.GeneratePattern() + if pattern == nil { + t.Fatal("Pattern should be generated") + } + t.Logf(" Pattern after 2 logs: '%s'", cluster2.GetPatternString()) + + // Third add - pattern exists, so it will be updated + cluster3, changeType3 := cm.Add(tokenList3) + if changeType3 != PatternUpdated { + t.Errorf("Expected PatternUpdated for third add (pattern exists), got %v", changeType3) + } + if cluster1 != cluster3 { + t.Error("Should return same cluster for same signature") + } + t.Logf("āœ… Add #3: PatternUpdated (pattern will change, size=%d)", cluster3.Size()) + + // Regenerate pattern to see the change + newPattern := cluster3.GeneratePattern() + if newPattern == nil { + t.Fatal("Pattern should be regenerated") + } + t.Logf(" Pattern after 3 logs: '%s'", cluster3.GetPatternString()) + + // Fourth add - pattern exists, so updated again + cluster4, changeType4 := cm.Add(tokenList4) + if changeType4 != PatternUpdated { + t.Errorf("Expected PatternUpdated for fourth add (pattern exists), got %v", changeType4) + } + t.Logf("āœ… Add #4: PatternUpdated (pattern will change, size=%d)", cluster4.Size()) + + // Final pattern + cluster4.GeneratePattern() + t.Logf(" Final pattern after 4 logs: '%s'", cluster4.GetPatternString()) + + // Verify all returned the same cluster + if cluster1.Size() != 4 { + t.Errorf("Expected cluster size 4, got %d", cluster1.Size()) + } +} diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go index b10f2a316615..20c5b9cf42f7 100644 --- a/pkg/logs/patterns/clustering/merging/merging.go +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -12,22 +12,12 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) -// ShouldProtectPosition determines if a position should never be wildcarded. -// Protection rules ensure pattern quality by preventing wildcarding of -// semantically important positions. -func ShouldProtectPosition(position int, tokenType token.TokenType) bool { - // Rule 1: Never wildcard the first word token - // The first word typically indicates the action/command and is semantically critical - // e.g., "Login successful" vs "Error occurred" should not merge to "* *" +// shouldProtectPosition determines if a the token is the first word token and should be wildcarded. +func shouldProtectPosition(position int, tokenType token.TokenType) bool { if position == 0 && tokenType == token.TokenWord { return true } - // Future: Add more protection rules - // - Never wildcard HTTP methods? - // - Never wildcard severity levels? - // - Protect first N tokens? - return false } @@ -56,7 +46,7 @@ func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { } // For wildcard result, check protection rules - if result == token.Wildcard && ShouldProtectPosition(i, tok1.Type) { + if result == token.Wildcard && shouldProtectPosition(i, tok1.Type) { return false } } @@ -89,8 +79,8 @@ func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { case token.Wildcard: // Check protection rules before wildcarding - if ShouldProtectPosition(i, tok1.Type) { - return nil // Abort: protected position cannot be wildcarded + if shouldProtectPosition(i, tok1.Type) { + return nil } // Create wildcard, preserving the first token's value as representative merged.AddToken(tok1.Type, tok1.Value, token.IsWildcard) diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go index 355655e5eab4..ef436455318e 100644 --- a/pkg/logs/patterns/clustering/merging/merging_test.go +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -48,7 +48,7 @@ func TestShouldProtectPosition(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := ShouldProtectPosition(tt.position, tt.tokenType) + result := shouldProtectPosition(tt.position, tt.tokenType) assert.Equal(t, tt.expected, result) }) } diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go index 84023dbfe779..6fc00d0569a5 100644 --- a/pkg/logs/patterns/token/token.go +++ b/pkg/logs/patterns/token/token.go @@ -10,7 +10,12 @@ import ( "fmt" ) +//go:generate stringer -type=TokenType -trimprefix=Token + // TokenType represents the type of a token +// TokenType.String() method is auto-generated by stringer +// Run: go generate ./pkg/logs/patterns/token to regenerate the stringer file if you make changes to the TokenType enum + type TokenType int const ( @@ -41,16 +46,14 @@ type WildcardStatus int const ( // NotWildcard - This token cannot become a wildcard - // Examples: dates, whitespace + // Examples: whitespace or first word token NotWildcard WildcardStatus = iota // PotentialWildcard - This token can become a wildcard - // Examples: all words ("connection", "user123"), HTTP methods, IPs, numbers - // Note: First word position is protected during merge + // Examples: all non white space tokens PotentialWildcard // IsWildcard - This token is already a wildcard - // Example: wildcard position in a pattern IsWildcard ) @@ -59,7 +62,7 @@ type MergeResult int const ( // Conflict - Tokens cannot merge, abort pattern creation - // Examples: different types, generic words with different values + // Examples: different types, words with different values Conflict MergeResult = iota // Identical - Tokens are the same, keep as-is @@ -75,7 +78,7 @@ const ( type Token struct { Type TokenType Value string - Wildcard WildcardStatus // NotWildcard, PotentialWildcard, or IsWildcard + Wildcard WildcardStatus } // NewToken creates a token with the specified wildcard status @@ -87,50 +90,6 @@ func NewToken(tokenType TokenType, value string, wildcard WildcardStatus) Token } } -// IsHTTP returns true if the token is HTTP-related -func (t *Token) IsHTTP() bool { - return t.Type == TokenHttpMethod || t.Type == TokenHttpStatus -} - -// IsNetwork returns true if the token is network-related -func (t *Token) IsNetwork() bool { - return t.Type == TokenIPv4 || t.Type == TokenIPv6 || t.Type == TokenEmail || t.Type == TokenURI -} - -// String returns the string representation of a TokenType -func (tt TokenType) String() string { - switch tt { - case TokenUnknown: - return "Unknown" - case TokenWord: - return "Word" - case TokenNumeric: - return "Numeric" - case TokenWhitespace: - return "Whitespace" - case TokenIPv4: - return "IPv4" - case TokenIPv6: - return "IPv6" - case TokenEmail: - return "Email" - case TokenURI: - return "URI" - case TokenAbsolutePath: - return "AbsolutePath" - case TokenHttpMethod: - return "HttpMethod" - case TokenHttpStatus: - return "HttpStatus" - case TokenSeverityLevel: - return "SeverityLevel" - case TokenDate: - return "Date" - default: - return fmt.Sprintf("TokenType(%d)", int(tt)) - } -} - // String returns a string representation of the token func (t *Token) String() string { return fmt.Sprintf("%s(%s)", t.Type, t.Value) diff --git a/pkg/logs/patterns/token/token_test.go b/pkg/logs/patterns/token/token_test.go index 9a941be5a18e..b9a81b68479e 100644 --- a/pkg/logs/patterns/token/token_test.go +++ b/pkg/logs/patterns/token/token_test.go @@ -7,82 +7,97 @@ package token import ( "testing" + + "github.com/stretchr/testify/assert" ) -func TestTokenType_String(t *testing.T) { - tests := []struct { - tokenType TokenType - expected string - }{ - {TokenUnknown, "Unknown"}, - {TokenWord, "Word"}, - {TokenNumeric, "Numeric"}, - {TokenWhitespace, "Whitespace"}, - {TokenIPv4, "IPv4"}, - {TokenIPv6, "IPv6"}, - {TokenEmail, "Email"}, - {TokenURI, "URI"}, - {TokenAbsolutePath, "AbsolutePath"}, - {TokenHttpMethod, "HttpMethod"}, - {TokenHttpStatus, "HttpStatus"}, - {TokenSeverityLevel, "SeverityLevel"}, - {TokenDate, "Date"}, - } - - for _, test := range tests { - result := test.tokenType.String() - if result != test.expected { - t.Errorf("TokenType %v: expected %s, got %s", test.tokenType, test.expected, result) - } - } +func TestNewToken(t *testing.T) { + token := NewToken(TokenWord, "test", PotentialWildcard) + + assert.Equal(t, TokenWord, token.Type, "Expected TokenWord") + assert.Equal(t, "test", token.Value, "Expected 'test'") + assert.Equal(t, PotentialWildcard, token.Wildcard, "Expected PotentialWildcard") +} + +func TestToken_Compare_DifferentTypes(t *testing.T) { + word := NewToken(TokenWord, "hello", PotentialWildcard) + number := NewToken(TokenNumeric, "123", PotentialWildcard) + + result := word.Compare(&number) + assert.Equal(t, Conflict, result, "Different types should return Conflict") +} + +func TestToken_Compare_SameValue(t *testing.T) { + token1 := NewToken(TokenWord, "hello", PotentialWildcard) + token2 := NewToken(TokenWord, "hello", PotentialWildcard) + + result := token1.Compare(&token2) + assert.Equal(t, Identical, result, "Same values should return Identical") +} + +func TestToken_Compare_WildcardMatches(t *testing.T) { + wildcard := NewToken(TokenWord, "anything", IsWildcard) + concrete := NewToken(TokenWord, "hello", PotentialWildcard) + + result := wildcard.Compare(&concrete) + assert.Equal(t, Identical, result, "Wildcard should match any value of same type") } -func TestToken_IsHTTP(t *testing.T) { - httpToken := Token{Type: TokenHttpMethod, Value: "GET"} - if !httpToken.IsHTTP() { - t.Error("HttpMethod token should be HTTP") - } - - statusToken := Token{Type: TokenHttpStatus, Value: "200"} - if !statusToken.IsHTTP() { - t.Error("HttpStatus token should be HTTP") - } - - wordToken := Token{Type: TokenWord, Value: "test"} - if wordToken.IsHTTP() { - t.Error("Word token should not be HTTP") - } +func TestToken_Compare_WhitespaceConflict(t *testing.T) { + space1 := NewToken(TokenWhitespace, " ", NotWildcard) + space2 := NewToken(TokenWhitespace, " ", NotWildcard) + + result := space1.Compare(&space2) + assert.Equal(t, Conflict, result, "Different whitespace should return Conflict") +} + +func TestToken_Compare_WordsWithDifferentValues(t *testing.T) { + // Both PotentialWildcard - should merge to wildcard + word1 := NewToken(TokenWord, "hello", PotentialWildcard) + word2 := NewToken(TokenWord, "world", PotentialWildcard) + + result := word1.Compare(&word2) + assert.Equal(t, Wildcard, result, "Different PotentialWildcard words should return Wildcard") + + // One is NotWildcard - should conflict + word3 := NewToken(TokenWord, "INFO", NotWildcard) + word4 := NewToken(TokenWord, "ERROR", PotentialWildcard) + + result2 := word3.Compare(&word4) + assert.Equal(t, Conflict, result2, "Words with NotWildcard should return Conflict") } -func TestToken_IsNetwork(t *testing.T) { - ipv4Token := Token{Type: TokenIPv4, Value: "192.168.1.1"} - if !ipv4Token.IsNetwork() { - t.Error("IPv4 token should be network") - } - - emailToken := Token{Type: TokenEmail, Value: "test@example.com"} - if !emailToken.IsNetwork() { - t.Error("Email token should be network") - } - - wordToken := Token{Type: TokenWord, Value: "test"} - if wordToken.IsNetwork() { - t.Error("Word token should not be network") - } +func TestToken_Compare_StructuredTypes(t *testing.T) { + // Different IPs should merge to wildcard + ip1 := NewToken(TokenIPv4, "192.168.1.1", PotentialWildcard) + ip2 := NewToken(TokenIPv4, "10.0.0.1", PotentialWildcard) + + result := ip1.Compare(&ip2) + assert.Equal(t, Wildcard, result, "Different structured types (same type) should return Wildcard") + + // Different numbers should merge to wildcard + num1 := NewToken(TokenNumeric, "123", PotentialWildcard) + num2 := NewToken(TokenNumeric, "456", PotentialWildcard) + + result2 := num1.Compare(&num2) + assert.Equal(t, Wildcard, result2, "Different numeric values should return Wildcard") + + // Different dates should merge to wildcard + date1 := NewToken(TokenDate, "2023-01-01", PotentialWildcard) + date2 := NewToken(TokenDate, "2023-12-31", PotentialWildcard) + + result3 := date1.Compare(&date2) + assert.Equal(t, Wildcard, result3, "Different dates should return Wildcard") } func TestToken_String(t *testing.T) { // Regular token token := Token{Type: TokenWord, Value: "hello"} expected := "Word(hello)" - if token.String() != expected { - t.Errorf("Expected %s, got %s", expected, token.String()) - } + assert.Equal(t, expected, token.String(), "Token String() should format correctly") // Wildcard token - still shows the value, not "*" wildcardToken := Token{Type: TokenWord, Value: "test", Wildcard: IsWildcard} expectedWildcard := "Word(test)" - if wildcardToken.String() != expectedWildcard { - t.Errorf("Expected %s, got %s", expectedWildcard, wildcardToken.String()) - } + assert.Equal(t, expectedWildcard, wildcardToken.String(), "Wildcard token String() should show value") } diff --git a/pkg/logs/patterns/token/tokentype_string.go b/pkg/logs/patterns/token/tokentype_string.go new file mode 100644 index 000000000000..6b6cc1c05042 --- /dev/null +++ b/pkg/logs/patterns/token/tokentype_string.go @@ -0,0 +1,36 @@ +// Code generated by "stringer -type=TokenType -trimprefix=Token"; DO NOT EDIT. + +package token + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[TokenUnknown-0] + _ = x[TokenWord-1] + _ = x[TokenNumeric-2] + _ = x[TokenWhitespace-3] + _ = x[TokenIPv4-4] + _ = x[TokenIPv6-5] + _ = x[TokenEmail-6] + _ = x[TokenURI-7] + _ = x[TokenAbsolutePath-8] + _ = x[TokenHttpMethod-9] + _ = x[TokenHttpStatus-10] + _ = x[TokenSeverityLevel-11] + _ = x[TokenDate-12] +} + +const _TokenType_name = "UnknownWordNumericWhitespaceIPv4IPv6EmailURIAbsolutePathHttpMethodHttpStatusSeverityLevelDate" + +var _TokenType_index = [...]uint8{0, 7, 11, 18, 28, 32, 36, 41, 44, 56, 66, 76, 89, 93} + +func (i TokenType) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_TokenType_index)-1 { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[idx]:_TokenType_index[idx+1]] +} diff --git a/pkg/logs/sender/dumb_strategy.go b/pkg/logs/sender/dumb_strategy.go index 5c3c89a9a36f..c2d8781ee724 100644 --- a/pkg/logs/sender/dumb_strategy.go +++ b/pkg/logs/sender/dumb_strategy.go @@ -8,7 +8,7 @@ package sender import ( "bytes" - "encoding/json" + "encoding/gob" "unsafe" "github.com/DataDog/datadog-agent/pkg/logs/message" @@ -34,13 +34,21 @@ type dumbStrategy struct { buffer []*message.Message } -// Simple pattern payload for POC - just the essential fields -type PatternPayload struct { - PatternID uint64 `json:"pattern_id"` - Pattern string `json:"pattern"` - ParamCount int `json:"param_count"` - WildcardPos []int `json:"wildcard_positions"` - // OriginalMsg string `json:"original_message"` // For debugging and double checking if pattern is correct base on the original message. Might remove it after POC. Protobuf might not be happy with this. +// PatternData is a simple intermediate format for patterns (avoids import cycles with grpc package) +// stream_worker will convert this to protobuf PatternDefine or PatternUpdate +type PatternData struct { + PatternID uint64 + Template string + ParamCount uint32 + PosList []uint32 + IsUpdate bool // true for PatternUpdate, false for PatternDefine +} + +// LogData represents a log with pattern reference and wildcard values +type LogData struct { + PatternID uint64 + WildcardValues []string + Timestamp uint64 } // NewDumbStrategy returns a strategy that sends one message per payload using the @@ -117,152 +125,148 @@ func (s *dumbStrategy) flushBuffer() { } func (s *dumbStrategy) processMessage(m *message.Message) { - content := m.GetContent() + // Use rendered content for pattern extraction (plain text), not encoded content (binary) + content := m.GetRenderedContent() if len(content) == 0 { return } + // Debug: Check content + previewLen := 100 + if len(content) < previewLen { + previewLen = len(content) + } + log.Infof("šŸ” Tokenizing rendered content (first %d chars): %q", previewLen, content[:previewLen]) + + contentStr := bytesToString(content) + // Simple pattern extraction for POC - tokenList := automaton.TokenizeString(bytesToString(content)) + tokenList := automaton.TokenizeString(contentStr) if tokenList != nil && !tokenList.IsEmpty() { - if cluster := s.clusterManager.Add(tokenList); cluster != nil { + cluster, changeType := s.clusterManager.Add(tokenList) + if cluster != nil { cluster.GeneratePattern() - // Build simple pattern payload - payload, err := s.buildSimplePatternPayload(m, cluster) + // Log pattern changes + switch changeType { + case clustering.PatternNew: + log.Infof("šŸ“ NEW pattern discovered: PatternID=%d, Template='%s', Size=%d", + cluster.GetPatternID(), cluster.GetPatternString(), cluster.Size()) + case clustering.PatternUpdated: + log.Infof("šŸ”„ Pattern UPDATED: PatternID=%d, Template='%s', Size=%d", + cluster.GetPatternID(), cluster.GetPatternString(), cluster.Size()) + case clustering.PatternNoChange: + log.Debugf("Pattern matched: PatternID=%d", cluster.GetPatternID()) + } + + // Step 1: Send pattern change (define/update) if needed + if changeType == clustering.PatternNew || changeType == clustering.PatternUpdated { + patternPayload, err := s.buildPatternChangePayload(m, cluster, changeType) + if err != nil { + log.Warn("Failed to build pattern change payload", err) + return + } + log.Debugf("ā« Queuing pattern payload (changeType=%v, patternID=%d) to outputChan", changeType, cluster.GetPatternID()) + s.outputChan <- patternPayload + log.Debugf("āœ… Pattern payload queued successfully") + } + + // Step 2: Send log with pattern reference + wildcard values + logPayload, err := s.buildLogPayload(m, cluster) if err != nil { - log.Warn("Failed to build payload", err) + log.Warn("Failed to build log payload", err) return } - - s.outputChan <- payload + log.Debugf("ā« Queuing log payload (patternID=%d) to outputChan", cluster.GetPatternID()) + s.outputChan <- logPayload + log.Debugf("āœ… Log payload queued successfully") } } } -// Simple pattern payload builder for POC -func (s *dumbStrategy) buildSimplePatternPayload(m *message.Message, cluster *clustering.Cluster) (*message.Payload, error) { - patternPayload := PatternPayload{ - PatternID: cluster.GetPatternID(), - Pattern: cluster.GetPatternString(), - ParamCount: len(cluster.GetWildcardPositions()), - WildcardPos: cluster.GetWildcardPositions(), - // OriginalMsg: bytesToString(m.GetContent()), // Keep for POC debugging +// buildPatternChangePayload creates a payload for PatternDefine or PatternUpdate +func (s *dumbStrategy) buildPatternChangePayload(m *message.Message, cluster *clustering.Cluster, changeType clustering.PatternChangeType) (*message.Payload, error) { + // Get character positions where wildcards appear in the template string + charPos := cluster.GetWildcardCharPositions() + posList := make([]uint32, len(charPos)) + for i, pos := range charPos { + posList[i] = uint32(pos) } - // Use existing serialization with compression - intake handles decompression - return s.serializePayload(patternPayload, m) -} - -// ========== COMMENTED OUT COMPLEX LOGIC FOR POC ========== -/* -func (s *dumbStrategy) buildPayload(m *message.Message) (*message.Payload, error) { - if s.cluster != nil && s.cluster.NeedsSending() { - // Pattern needs to be sent (new or updated) - if s.cluster.IsNewPattern() { - return s.buildPatternCreationPayload(m) - } else if s.cluster.WasUpdatedSinceLastSent() { - return s.buildPatternUpdatePayload(m) - } - } else if s.cluster != nil { - // Pattern already sent, just send wildcards - return s.buildWildcardPayload(m) + // Create pattern data + patternData := &PatternData{ + PatternID: cluster.GetPatternID(), + Template: cluster.GetPatternString(), + ParamCount: uint32(len(charPos)), + PosList: posList, + IsUpdate: changeType == clustering.PatternUpdated, } - // No pattern, send raw message (fallback) - return s.buildRawPayload(m) + // Serialize to binary format + return s.serializePattern(patternData, m) } -func (s *dumbStrategy) buildPatternCreationPayload(m *message.Message) (*message.Payload, error) { - patternPayload := PatternPayload{ - StateChange: "pattern_create", - PatternID: s.cluster.GetPatternID(), - Pattern: s.cluster.GetPatternString(), - ParamCount: len(s.cluster.GetWildcardPositions()), - WildcardPos: s.cluster.GetWildcardPositions(), - OriginalMsg: bytesToString(m.GetContent()), - } - - s.cluster.MarkAsSent() - return s.serializePayload(patternPayload, m) -} +// buildLogPayload creates a payload for Log with StructuredLog (pattern_id + wildcard values) +func (s *dumbStrategy) buildLogPayload(m *message.Message, cluster *clustering.Cluster) (*message.Payload, error) { + // Extract wildcard values from the cluster + wildcardValues := cluster.GetWildcardValues() -func (s *dumbStrategy) buildPatternUpdatePayload(m *message.Message) (*message.Payload, error) { - patternPayload := PatternPayload{ - StateChange: "pattern_update", - PatternID: s.cluster.GetPatternID(), - Pattern: s.cluster.GetPatternString(), - ParamCount: len(s.cluster.GetWildcardPositions()), - WildcardPos: s.cluster.GetWildcardPositions(), + // Create log data + logData := &LogData{ + PatternID: cluster.GetPatternID(), + WildcardValues: wildcardValues, + Timestamp: uint64(m.IngestionTimestamp), } - s.cluster.MarkAsSent() - return s.serializePayload(patternPayload, m) + // Serialize to binary format + return s.serializeLog(logData, m) } -func (s *dumbStrategy) buildWildcardPayload(m *message.Message) (*message.Payload, error) { - // Extract wildcard values from the current message - tokenList := automaton.TokenizeString(bytesToString(m.GetContent())) - var wildcardValues []string - if tokenList != nil { - wildcardValues = s.cluster.ExtractWildcardValues(tokenList) - } +// serializePattern serializes pattern data using gob encoding +func (s *dumbStrategy) serializePattern(pattern *PatternData, m *message.Message) (*message.Payload, error) { + var buf bytes.Buffer + encoder := gob.NewEncoder(&buf) - patternPayload := struct { - PatternID uint64 `json:"pattern_id"` - DynamicValues []string `json:"dynamic_values"` - }{ - PatternID: s.cluster.GetPatternID(), - DynamicValues: wildcardValues, + if err := encoder.Encode(pattern); err != nil { + return nil, err } - return s.serializePayload(patternPayload, m) -} - -func (s *dumbStrategy) buildRawPayload(m *message.Message) (*message.Payload, error) { - rawPayload := struct { - Message string `json:"raw_message"` - }{ - Message: bytesToString(m.GetContent()), + // Create payload with original message metadata + metaCopy := m.MessageMetadata + // Add pattern change indicator to processing tags + if pattern.IsUpdate { + metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern_update") + } else { + metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern_define") } - return s.serializePayload(rawPayload, m) + return message.NewPayload( + []*message.MessageMetadata{&metaCopy}, // original message metadata with pattern tag + buf.Bytes(), // gob-encoded pattern data + "", // no content encoding - gRPC handles compression + buf.Len(), // gob size + ), nil } -*/ -func (s *dumbStrategy) serializePayload(payload interface{}, m *message.Message) (*message.Payload, error) { - s.serializer.Reset() +// serializeLog serializes log data using gob encoding +func (s *dumbStrategy) serializeLog(logData *LogData, m *message.Message) (*message.Payload, error) { + var buf bytes.Buffer + encoder := gob.NewEncoder(&buf) - patternBytes, err := json.Marshal(payload) - if err != nil { + if err := encoder.Encode(logData); err != nil { return nil, err } - // Compress the JSON data directly - var encodedPayload bytes.Buffer - compressor := s.compression.NewStreamCompressor(&encodedPayload) - - if _, err := compressor.Write(patternBytes); err != nil { - compressor.Close() - return nil, err - } - - if err := compressor.Close(); err != nil { - return nil, err - } - - // Potentially seed some log payload instead here - // Create payload with original message metadata metaCopy := m.MessageMetadata - // Add pattern indicator to processing tags instead of encoding - metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern") + // Add log with pattern reference indicator + metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:log_with_pattern") return message.NewPayload( - []*message.MessageMetadata{&metaCopy}, // original message metadata with pattern tag - encodedPayload.Bytes(), // compressed pattern payload (sent as-is like HTTP/TCP) - s.compression.ContentEncoding(), // regular "gzip" or "zstd" - len(patternBytes), // uncompressed pattern JSON size + []*message.MessageMetadata{&metaCopy}, // original message metadata with log tag + buf.Bytes(), // gob-encoded log data + "", // no content encoding - gRPC handles compression + buf.Len(), // gob size ), nil } diff --git a/pkg/logs/sender/grpc/grpc_sender.go b/pkg/logs/sender/grpc/grpc_sender.go index e2d1dd7400e8..58afffa5f61b 100644 --- a/pkg/logs/sender/grpc/grpc_sender.go +++ b/pkg/logs/sender/grpc/grpc_sender.go @@ -192,9 +192,10 @@ func (s *GRPCSender) createConnection() error { } // Configure keepalive + // Note: Increased Time from 30s to 5min to avoid "too_many_pings" errors from intake keepaliveParams := keepalive.ClientParameters{ - Time: 30 * time.Second, - Timeout: 5 * time.Second, + Time: 5 * time.Minute, // Send ping every 5 minutes (was 30s) + Timeout: 10 * time.Second, // Wait 10 seconds for response (was 5s) PermitWithoutStream: true, } opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go index b4b729b3f486..10679c815663 100644 --- a/pkg/logs/sender/grpc/stream_worker.go +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -6,7 +6,9 @@ package grpc import ( + "bytes" "context" + "encoding/gob" "errors" "fmt" "io" @@ -97,6 +99,10 @@ type StreamWorker struct { pendingPayloads map[uint32]*message.Payload // batchID -> payload pendingPayloadsMu sync.Mutex // Protects pendingPayloads map + // Poor man's snapshot: cache pattern payloads for re-sending after rotation + patternCache []*message.Payload // Recently sent pattern define/update payloads + patternCacheMu sync.Mutex // Protects patternCache + // Control stopChan chan struct{} done chan struct{} @@ -125,6 +131,7 @@ func NewStreamWorker( rotationType: RotationTypeNone, signalStreamRotate: make(chan StreamRotateSignal, 1), // Size-1 buffer for drop-old semantics pendingPayloads: make(map[uint32]*message.Payload), // Initialize batch tracking map + patternCache: make([]*message.Payload, 0, 100), // Cache for pattern payloads stopChan: make(chan struct{}), done: make(chan struct{}), } @@ -528,30 +535,50 @@ func (s *StreamWorker) payloadToBatch(payload *message.Payload) *StatefulBatch { Data: make([]*Datum, 0, payload.Count()), } - // Check if this is a pattern payload by looking at metadata tags - isPattern := false + // Check payload type by looking at metadata tags + payloadType := "" for _, meta := range payload.MessageMetas { for _, tag := range meta.ProcessingTags { - if tag == "data_type:pattern" { - isPattern = true + if tag == "data_type:pattern_define" || tag == "data_type:pattern_update" || tag == "data_type:log_with_pattern" { + payloadType = tag break } } - if isPattern { + if payloadType != "" { break } } - if isPattern { - // Handle pattern payload - hardcode for POC testing - datum := s.createHardcodedPatternDatum() - if datum != nil { + switch payloadType { + case "data_type:pattern_define", "data_type:pattern_update": + // Handle pattern change (define or update) + datum, err := s.decodePatternDatum(payload) + if err != nil { + log.Errorf("Worker %s: Failed to decode pattern: %v", s.workerID, err) + } else if datum != nil { batch.Data = append(batch.Data, datum) + if payloadType == "data_type:pattern_define" { + log.Infof("šŸ“¤ Worker %s: Sending PatternDefine (ID=%d, template='%s')", + s.workerID, datum.GetPatternDefine().PatternId, datum.GetPatternDefine().Template) + } else { + log.Infof("šŸ“¤ Worker %s: Sending PatternUpdate (ID=%d, template='%s')", + s.workerID, datum.GetPatternUpdate().PatternId, datum.GetPatternUpdate().NewTemplate) + } } - // Commented out to reduce log noise - // log.Infof("šŸ“¤ PATTERN BATCH SENT: %v", batch) - } else { - // Handle regular log payload + + case "data_type:log_with_pattern": + // Handle log with pattern reference + wildcard values + datum, err := s.decodeLogDatum(payload) + if err != nil { + log.Errorf("Worker %s: Failed to decode log: %v", s.workerID, err) + } else if datum != nil { + batch.Data = append(batch.Data, datum) + log.Debugf("šŸ“¤ Worker %s: Sending Log with pattern_id=%d, %d wildcard values", + s.workerID, datum.GetLogs().GetStructured().PatternId, len(datum.GetLogs().GetStructured().DynamicValues)) + } + + default: + // Handle regular log payload (no pattern) datum := &Datum{ Data: &Datum_Logs{ Logs: &Log{ @@ -567,26 +594,96 @@ func (s *StreamWorker) payloadToBatch(payload *message.Payload) *StatefulBatch { return batch } -// PatternPayload represents the JSON structure from dumb_strategy -type PatternPayload struct { - PatternID uint64 `json:"pattern_id"` - Pattern string `json:"pattern"` - ParamCount int `json:"param_count"` - WildcardPos []int `json:"wildcard_positions"` +// PatternData matches the structure from dumb_strategy (sender package) +// We define it here to avoid import cycles +type PatternData struct { + PatternID uint64 + Template string + ParamCount uint32 + PosList []uint32 + IsUpdate bool } -// createHardcodedPatternDatum creates a hardcoded pattern for POC testing -func (s *StreamWorker) createHardcodedPatternDatum() *Datum { - // log.Infof("Worker %s: Sending hardcoded pattern for POC testing", s.workerID) +// LogData matches the structure from dumb_strategy (sender package) +type LogData struct { + PatternID uint64 + WildcardValues []string + Timestamp uint64 +} + +// decodePatternDatum decodes a pattern payload from gob format to protobuf (PatternDefine or PatternUpdate) +func (s *StreamWorker) decodePatternDatum(payload *message.Payload) (*Datum, error) { + // Decode gob-encoded PatternData + var patternData PatternData + decoder := gob.NewDecoder(bytes.NewReader(payload.Encoded)) + if err := decoder.Decode(&patternData); err != nil { + return nil, fmt.Errorf("failed to decode pattern data: %w", err) + } + + // Convert to protobuf PatternDefine or PatternUpdate + if patternData.IsUpdate { + patternUpdate := &PatternUpdate{ + PatternId: patternData.PatternID, + NewTemplate: patternData.Template, + ParamCount: patternData.ParamCount, + PosList: patternData.PosList, + } + return &Datum{ + Data: &Datum_PatternUpdate{ + PatternUpdate: patternUpdate, + }, + }, nil + } + + patternDefine := &PatternDefine{ + PatternId: patternData.PatternID, + Template: patternData.Template, + ParamCount: patternData.ParamCount, + PosList: patternData.PosList, + } return &Datum{ Data: &Datum_PatternDefine{ - PatternDefine: &PatternDefine{ - PatternId: 12345, - Template: "User * logged in from *", - ParamCount: 2, - PosList: []uint32{1, 4}, - }, + PatternDefine: patternDefine, }, + }, nil +} + +// decodeLogDatum decodes a log payload from gob format to protobuf Log with StructuredLog +func (s *StreamWorker) decodeLogDatum(payload *message.Payload) (*Datum, error) { + // Decode gob-encoded LogData + var logData LogData + decoder := gob.NewDecoder(bytes.NewReader(payload.Encoded)) + + if err := decoder.Decode(&logData); err != nil { + return nil, fmt.Errorf("failed to decode log data: %w", err) + } + + // Convert wildcard values to DynamicValue protobuf + dynamicValues := make([]*DynamicValue, len(logData.WildcardValues)) + for i, val := range logData.WildcardValues { + dynamicValues[i] = &DynamicValue{ + Value: &DynamicValue_StringValue{ + StringValue: val, + }, + } } + + // Create StructuredLog + structuredLog := &StructuredLog{ + PatternId: logData.PatternID, + DynamicValues: dynamicValues, + } + + // Wrap in Log and Datum + return &Datum{ + Data: &Datum_Logs{ + Logs: &Log{ + Timestamp: logData.Timestamp, + Content: &Log_Structured{ + Structured: structuredLog, + }, + }, + }, + }, nil } From 607d8b8b1b8413ec3a4fab45e303486ee692e523 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Mon, 3 Nov 2025 17:12:43 -0500 Subject: [PATCH 04/16] Remove documentation and demo files --- clustering_architecture.md | 279 ----------- pkg/logs/patterns/comprehensive_demo.go | 146 ------ pkg/logs/patterns/merging.md | 626 ------------------------ 3 files changed, 1051 deletions(-) delete mode 100644 clustering_architecture.md delete mode 100644 pkg/logs/patterns/comprehensive_demo.go delete mode 100644 pkg/logs/patterns/merging.md diff --git a/clustering_architecture.md b/clustering_architecture.md deleted file mode 100644 index 032312911a69..000000000000 --- a/clustering_architecture.md +++ /dev/null @@ -1,279 +0,0 @@ -# Log Pattern Clustering Architecture - -## Main Data Flow Pipeline - -```mermaid -flowchart TD - A[Raw Log Messages
GET /api/users 200
POST /api/users 201] --> B[Tokenization] - - B --> C["Token Classification
automaton.TokenizeString()"] - C --> D["Token List Creation
token.NewTokenList()"] - - D --> E["Signature Generation
.Signature()"] - E --> F["Hash Computation
computeHash()"] - - F --> G["Cluster Manager
clustering.Add()"] - G --> H{Hash Bucket
Lookup} - - H -->|Existing Cluster| I["Add to Cluster
cluster.Add()"] - H -->|New Signature| J["Create New Cluster
NewCluster()"] - - I --> K["Pattern Generation
cluster.GeneratePattern()"] - J --> K - - K --> L[Wildcard Patterns
* /api/users *
ERROR * failed] - - style A fill:#e1f5fe - style L fill:#c8e6c9 - style G fill:#fff3e0 -``` - -## Core Function Call Graph - -```mermaid -graph TD - A[automaton.TokenizeString] --> B[NewTokenizer] - A --> C[Tokenizer.Tokenize] - - C --> D[processNextToken] - C --> E[consumeWhitespace] - C --> F[extractWord] - C --> G[classifyToken] - - G --> H[globalTrie.Match] - G --> I[GetTerminalRules] - - A --> J[token.NewTokenList] - J --> K[TokenList.Signature] - - K --> L[PositionSignature] - K --> M[CountSignature] - K --> N[computeHash] - - O[clustering.NewClusterManager] --> P[ClusterManager.Add] - P --> Q[hashBuckets lookup] - P --> R[cluster.Signature.Equals] - P --> S[NewCluster] - - S --> T[Cluster.Add] - T --> U[Cluster.GeneratePattern] - - style A fill:#ffecb3 - style P fill:#f3e5f5 - style U fill:#e8f5e8 -``` - -## Hash Bucket Architecture - -```mermaid -graph TB - A[ClusterManager] --> B["hashBuckets: map[uint64][]*Cluster"] - - B --> C["Hash: 12345"] - B --> D["Hash: 67890"] - - C --> E["Cluster1
HTTP Requests"] - C --> F["Cluster2
Hash Collision"] - - E --> G["TokenLists:
GET /api 200
POST /api 201
PUT /api 200"] - E --> H["Pattern: * /api *
Wildcards: positions 0, 4"] - - D --> I["Cluster3
Error Messages"] - I --> J["TokenLists:
ERROR DB failed
ERROR Auth failed"] - I --> K["Pattern: ERROR * failed
Wildcards: position 2"] - - style A fill:#f9f,stroke:#333,stroke-width:2px - style E fill:#bbf,stroke:#333,stroke-width:2px - style I fill:#fbb,stroke:#333,stroke-width:2px -``` - -## Memory Layout and Data Structure - -```mermaid -classDiagram - class ClusterManager { - +map~uint64~[]Cluster hashBuckets - +int totalTokenLists - +int totalClusters - +Add(tokenList) Cluster - +GetCluster(signature) Cluster - } - - class Cluster { - +Signature signature - +[]TokenList tokenLists - +TokenList pattern - +map~int~bool wildcardMap - +Add(tokenList) bool - +GeneratePattern() TokenList - } - - class TokenList { - +[]Token tokens - +Signature() Signature - +PositionSignature() string - +CountSignature() string - } - - class Token { - +string Value - +TokenType Type - +bool IsWildcard - } - - ClusterManager --> Cluster : contains - Cluster --> TokenList : groups - TokenList --> Token : contains -``` - -## Performance Characteristics - -### Algorithm Complexity by Operation - -```mermaid -graph LR - subgraph "Tokenization Pipeline" - A["Raw Log
O(n) time
O(k) space"] --> B["Token Classification
O(1) per token
Trie + Rules"] - B --> C["TokenList
O(k) creation
O(k) memory"] - end - - subgraph "Clustering Pipeline" - C --> D["Signature Generation
O(k) time
O(1) space"] - D --> E["Hash Lookup
O(1) avg
O(m) worst"] - E --> F["Cluster Assignment
O(1) insertion
O(1) space"] - end - - subgraph "Pattern Pipeline" - F --> G["Pattern Generation
O(k Ɨ c) time
O(k) space"] - G --> H["Wildcard Detection
O(k Ɨ c) comparison
Lazy evaluation"] - end - - style A fill:#ffecb3 - style E fill:#f3e5f5 - style G fill:#e8f5e8 -``` - -### Performance Analysis - -```mermaid -graph TB - subgraph "Performance Characteristics" - A["šŸš€ Tokenization
O(n) always
Single-pass processing"] - B["šŸ“Š Signature
O(k) linear
Cached result"] - C["šŸ” Hash Lookup
O(1) avg, O(m) worst
Rare collisions"] - D["šŸŽÆ Clustering
O(1) typical
Hit existing clusters"] - E["šŸŽØ Pattern Gen
O(k) single, O(kƗc) multiple
Lazy evaluation"] - end - - A --> B - B --> C - C --> D - D --> E - - style A fill:#e3f2fd - style B fill:#f3e5f5 - style C fill:#fff3e0 - style D fill:#e8f5e8 - style E fill:#fce4ec -``` - -### Test Results from Codebase - -From the actual test suite (`TestClusteringPerformance`): -- **Input**: 400 similar log messages -- **Output**: 3 clusters created -- **Demonstrates**: Effective pattern consolidation for similar structured logs - -### Algorithm Variables - -```mermaid -graph LR - subgraph "Input Variables" - A["n: String Length
Character count
Linear tokenization cost"] - B["k: Tokens per Message
After tokenization
Affects signature generation"] - end - - subgraph "System Variables" - C["m: Clusters per Bucket
Hash collisions
Usually 1 cluster"] - D["c: Messages per Cluster
Pattern generation cost
Compression vs speed trade-off"] - end - - style A fill:#e3f2fd - style B fill:#e3f2fd - style C fill:#fff3e0 - style D fill:#fff3e0 -``` - -### Key Optimizations - -```mermaid -graph TB - subgraph "Memory Optimizations" - A["String Interning
Common tokens cached
GET, POST, ERROR reused"] - B["Lazy Evaluation
Patterns generated on-demand
Reduces memory footprint"] - end - - subgraph "CPU Optimizations" - C["Hash Pre-computation
Signatures include cached hash
Avoids repeated calculations"] - D["Trie Lookup
O(1) for HTTP methods
O(1) for severity levels"] - end - - subgraph "Reliability Features" - E["Collision Handling
Graceful hash collision recovery
Exact signature fallback"] - F["Input Validation
UTF-8 safety checks
Defensive programming"] - end - - style A fill:#e8f5e8 - style B fill:#e8f5e8 - style C fill:#fff3e0 - style D fill:#fff3e0 - style E fill:#fce4ec - style F fill:#fce4ec -``` - -## Production Data Flow Example - -```mermaid -sequenceDiagram - participant L as Log Message - participant T as Tokenizer - participant TL as TokenList - participant CM as ClusterManager - participant C as Cluster - - L->>T: "GET /api/users 200" - T->>T: TokenizeString() - T->>TL: [HttpMethod(GET), Whitespace( ), AbsolutePath(/api/users), ...] - TL->>TL: Generate Signature() - TL->>TL: "HttpMethod,Whitespace,AbsolutePath,Whitespace,HttpStatus" - TL->>TL: Hash: 0x1a2b3c4d - - TL->>CM: ClusterManager.Add(tokenList) - CM->>CM: hashBuckets[0x1a2b3c4d] lookup - CM->>C: Found existing cluster - C->>C: cluster.Add(tokenList) - C->>C: GeneratePattern() - C-->>CM: Pattern: "* /api/users *" - CM-->>L: Clustered successfully - - Note over C: Wildcards at positions [0, 4]
for HTTP method and status code -``` - -## Key Production Functions - -### Core Pipeline -- `automaton.TokenizeString()` - Entry point -- `ClusterManager.Add()` - Main clustering logic -- `Cluster.GeneratePattern()` - Pattern extraction -- `TokenList.Signature()` - Clustering key generation - -### Support Functions -- `NewClusterManager()` - Initialization -- `NewCluster()` - Cluster creation -- `Cluster.Add()` - Add TokenList to existing cluster -- `ClusterManager.GetCluster()` - Retrieve by signature - -### Infrastructure -- `globalTrie.Match()` - Fast token classification -- `Signature.Equals()` - Hash collision resolution -- `computeHash()` - Signature hashing for buckets \ No newline at end of file diff --git a/pkg/logs/patterns/comprehensive_demo.go b/pkg/logs/patterns/comprehensive_demo.go deleted file mode 100644 index 0343ee2bdadc..000000000000 --- a/pkg/logs/patterns/comprehensive_demo.go +++ /dev/null @@ -1,146 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -// Package patterns provides a simple demo of pattern extraction -package main - -import ( - "fmt" - "strings" - - "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" -) - -func main() { - fmt.Println("=== Log Pattern Extraction Demo ===") - - // Step 1: Setup - runBasicDemo() - - // Step 2: Advanced features - runAdvancedDemo() - - fmt.Println("=== Demo Complete ===") -} - -func runBasicDemo() { - fmt.Println("1. BASIC PATTERN EXTRACTION") - fmt.Println(" Processing HTTP requests to find patterns...") - - clusterManager := clustering.NewClusterManager() - - // Simple HTTP logs - httpLogs := []string{ - "GET /api/users 200", - "POST /api/users 201", - "PUT /api/users 200", - "GET /api/orders 200", - "DELETE /api/users 204", - } - - // Process logs and show tokenization - for i, logMsg := range httpLogs { - fmt.Printf(" Log %d: %s\n", i+1, logMsg) - - // Tokenize and show breakdown - tokenList := automaton.TokenizeString(logMsg) - fmt.Printf(" → Tokens: %s\n", formatTokens(tokenList)) - - // Add to clustering - cluster := clusterManager.Add(tokenList) - fmt.Printf(" → Cluster size: %d\n\n", cluster.Size()) - } - - // Show discovered patterns - showPatterns(clusterManager, "HTTP API Requests") -} - -func runAdvancedDemo() { - fmt.Println("2. ADVANCED TOKENIZATION") - fmt.Println(" Showing specialized token detection...") - - clusterManager := clustering.NewClusterManager() - - // Advanced logs with different data types - advancedLogs := []string{ - "ERROR Database connection to 192.168.1.100 failed", - "ERROR Database connection to 192.168.1.101 failed", - "ERROR Database connection to 192.168.1.102 failed", - "INFO User admin@company.com logged in at 2024-01-15", - "INFO User john@company.com logged in at 2024-01-16", - "INFO User jane@company.com logged in at 2024-01-17", - } - - for i, logMsg := range advancedLogs { - fmt.Printf(" Log %d: %s\n", i+1, logMsg) - - tokenList := automaton.TokenizeString(logMsg) - fmt.Printf(" → Specialized tokens: %s\n", formatSpecializedTokens(tokenList)) - - cluster := clusterManager.Add(tokenList) - fmt.Printf(" → Cluster size: %d\n\n", cluster.Size()) - } - - showPatterns(clusterManager, "Advanced Tokenization") -} - -func formatTokens(tokenList *token.TokenList) string { - if tokenList.IsEmpty() { - return "none" - } - - var parts []string - for _, tok := range tokenList.Tokens { - parts = append(parts, fmt.Sprintf("%s", tok.Value)) - } - return strings.Join(parts, " | ") -} - -func formatSpecializedTokens(tokenList *token.TokenList) string { - if tokenList.IsEmpty() { - return "none" - } - - var parts []string - for _, tok := range tokenList.Tokens { - if tok.Type.String() != "Word" && tok.Type.String() != "Whitespace" { - parts = append(parts, fmt.Sprintf("%s(%s)", tok.Type, tok.Value)) - } - } - - if len(parts) == 0 { - return "no specialized tokens" - } - return strings.Join(parts, ", ") -} - -func showPatterns(clusterManager *clustering.ClusterManager, title string) { - fmt.Printf(" PATTERNS DISCOVERED in %s:\n", title) - - allClusters := clusterManager.GetAllClusters() - patternCount := 0 - - for _, cluster := range allClusters { - if cluster.Size() >= 3 { // Lower threshold for demo - patternStr := cluster.GetPatternString() - if patternStr != "" { - patternCount++ - fmt.Printf(" → Pattern %d: %s (found %d times)\n", - patternCount, patternStr, cluster.Size()) - } - } - } - - if patternCount == 0 { - fmt.Printf(" → No patterns found (need at least 3 similar messages)\n") - } - - // Show stats - stats := clusterManager.GetStats() - fmt.Printf(" → Stats: %d messages processed, %d clusters created\n\n", - stats.TotalTokenLists, stats.TotalClusters) -} diff --git a/pkg/logs/patterns/merging.md b/pkg/logs/patterns/merging.md deleted file mode 100644 index 4e68836310fc..000000000000 --- a/pkg/logs/patterns/merging.md +++ /dev/null @@ -1,626 +0,0 @@ -# Java Mergeability Implementation Guide - -## šŸŽÆ Overview - -This guide shows how to implement the Java approach to pattern merging in your Go library. The Java approach uses **token-level mergeability** with discrete levels instead of continuous similarity scoring. - -## 🧠 Key Discovery: How Java Actually Works - -After thorough analysis of the Java codebase, the Java approach uses a **two-phase process**: - -### Phase 1: Real-time Document Processing -- **Tokenization**: Each log is tokenized using `DefaultLuceneTokenizingAutomatonBuilder` -- **Clustering Key**: Uses `PatternClusteringKey` which only considers: - - **Tags** (metadata) - - **Token count** (number of tokens) -- **Real-time Merging**: Documents with same clustering key go to same bucket, but **only merge if tokens are identical** - -### Phase 2: Batch Consolidation (The Magic!) -The key insight is in `MergeableRootNode.mergeClusters()` - this is where wildcards are actually created: - -```java -// Groups clusters by (tags, token_count) -clusters.stream() - .collect(Collectors.groupingBy( - cluster -> Pair.of(cluster.getTags(), cluster.getRootToken().size()))) - .values() - .forEach(similarClusters -> { - // For each group of similar clusters... - while (!similarClusters.isEmpty()) { - final MergeableNode cluster = similarClusters.remove(similarClusters.size() - 1); - final ListIterator> iter = similarClusters.listIterator(); - while (iter.hasNext()) { - final MergeableNode candidate = iter.next(); - if (cluster.mergeTokensIfFits(candidate)) { - iter.remove(); // Merge successful! - } - } - } - }); -``` - -### The `possiblyWildcard` Flag -- **Only Word tokens with numeric patterns have `possiblyWildcard = true`** -- This means **only words like `user123`, `session456` can merge into wildcards** -- Generic words like `bob`, `cat` are **not mergeable** and stay separate -- The `WildcardableWord.mergeWith()` method handles the actual wildcard creation - -### Example: `user123 logged in successfully` vs `user456 logged in successfully` -1. **Tokenization**: Both become `[Word("user123"), Word("logged"), Word("in"), Word("successfully")]` -2. **Clustering Key**: Both get `(tags, 4)` → Same bucket -3. **Real-time**: Can't merge (different tokens) -4. **Batch Consolidation**: - - `Word("user123")` vs `Word("user456")` → `MERGEABLE_AS_WILDCARD` (both have numeric patterns) - - `Word("logged")` vs `Word("logged")` → `FITS_AS_IT_IS` (same text) - - `Word("in")` vs `Word("in")` → `FITS_AS_IT_IS` (same text) - - `Word("successfully")` vs `Word("successfully")` → `FITS_AS_IT_IS` (same text) -5. **Result**: Pattern becomes `[user* logged in successfully]` - -### Example: `bob loves eat 25` vs `cat loves eat 62` -1. **Tokenization**: Both become `[Word("bob"), Word("loves"), Word("eat"), NumericValue(25)]` -2. **Clustering Key**: Both get `(tags, 4)` → Same bucket -3. **Real-time**: Can't merge (different tokens) -4. **Batch Consolidation**: - - `Word("bob")` vs `Word("cat")` → `UNMERGEABLE` (generic words, no numeric patterns) - - **Result**: Separate patterns (no merge) āœ… - -## šŸ—ļø Project Structure - -``` -your-go-library/ -ā”œā”€ā”€ internal/ -│ ā”œā”€ā”€ token/ -│ │ ā”œā”€ā”€ token.go # Token interface and MergeabilityLevel -│ │ ā”œā”€ā”€ word.go # Word token implementation -│ │ ā”œā”€ā”€ numeric.go # NumericValue token implementation -│ │ ā”œā”€ā”€ special.go # SpecialCharacter token implementation -│ │ └── token_list.go # TokenList implementation -│ ā”œā”€ā”€ tokenization/ -│ │ ā”œā”€ā”€ tokenizer.go # Tokenization engine -│ │ └── parser.go # Parser interface -│ ā”œā”€ā”€ clustering/ -│ │ ā”œā”€ā”€ clusterer.go # Clustering interfaces -│ │ ā”œā”€ā”€ realtime.go # RealTimeClusterer -│ │ └── consolidation.go # Batch consolidation -│ └── patterns/ -│ ā”œā”€ā”€ extractor.go # PatternExtractor -│ └── matcher.go # Pattern matching -ā”œā”€ā”€ pkg/ -│ └── patterns/ -│ └── patterns.go # Public API -└── go.mod -``` - -## šŸš€ Implementation Steps - -### Step 1: Core Token System (45 minutes) - -**File: `internal/token/token.go`** - -```go -package token - -type Token interface { - IsWildcard() bool - GetPatternString() string - GetMergeabilityLevel(other Token) MergeabilityLevel - MergeWith(other Token) Token -} - -type MergeabilityLevel int - -const ( - UNMERGEABLE MergeabilityLevel = iota - MERGEABLE_AS_NEW_TYPE - MERGEABLE_AS_WILDCARD - MERGEABLE_WITH_WIDER_RANGE - FITS - FITS_AS_IT_IS -) - -func (m MergeabilityLevel) Compare(other MergeabilityLevel) int { - return int(m) - int(other) -} - -func (m MergeabilityLevel) IsMergeable() bool { - return m > UNMERGEABLE -} -``` - -### Step 2: Word Token Implementation (30 minutes) - -**File: `internal/token/word.go`** - -```go -package token - -type Word struct { - text string - hasDigits bool - possiblyWildcard bool - wildcardSummary WildcardSummary -} - -func NewWord(text string, possiblyWildcard, withSummaries bool) *Word { - return &Word{ - text: text, - hasDigits: containsDigits(text), - possiblyWildcard: possiblyWildcard, - wildcardSummary: createWildcardSummary(text, withSummaries), - } -} - -func (w *Word) GetMergeabilityLevel(other Token) MergeabilityLevel { - if otherWord, ok := other.(*Word); ok { - return w.getMergeabilityWithWord(otherWord) - } else if numericValue, ok := other.(*NumericValue); ok { - return w.getMergeabilityWithNumeric(numericValue) - } - return UNMERGEABLE -} - -func (w *Word) getMergeabilityWithWord(other *Word) MergeabilityLevel { - if w.text != "" && other.text != "" { - if w.text == other.text { - if w.possiblyWildcard && !other.possiblyWildcard { - return FITS - } - return FITS_AS_IT_IS - } else if w.possiblyWildcard && other.possiblyWildcard { - return MERGEABLE_AS_WILDCARD // Both have numeric patterns - } else { - return UNMERGEABLE // Generic words don't merge - } - } - - if w.possiblyWildcard { - return MERGEABLE_AS_WILDCARD - } - - return UNMERGEABLE // Generic words are not mergeable -} - -func (w *Word) MergeWith(other Token) Token { - if otherWord, ok := other.(*Word); ok { - return w.mergeWithWord(otherWord) - } else if numericValue, ok := other.(*NumericValue); ok { - return w.mergeWithNumeric(numericValue) - } - return w -} - -func (w *Word) mergeWithWord(other *Word) *Word { - merged := &Word{ - text: w.text, - hasDigits: w.hasDigits || other.hasDigits, - possiblyWildcard: w.possiblyWildcard, - wildcardSummary: w.wildcardSummary, - } - - // If both have text and they're different, make wildcard - if w.text != "" && other.text != "" && w.text != other.text { - merged.possiblyWildcard = true - merged.wildcardSummary = mergeWildcardSummaries(w.wildcardSummary, other.wildcardSummary) - } - - return merged -} -``` - -### Step 3: TokenList Implementation (20 minutes) - -**File: `internal/token/token_list.go`** - -```go -package token - -type TokenList struct { - tokens []Token -} - -func NewTokenList(tokens []Token) *TokenList { - return &TokenList{tokens: tokens} -} - -func (tl *TokenList) GetMergeabilityLevel(other Token) MergeabilityLevel { - otherList, ok := other.(*TokenList) - if !ok { - return UNMERGEABLE - } - - if len(tl.tokens) != len(otherList.tokens) { - return UNMERGEABLE - } - - minLevel := FITS_AS_IT_IS - for i := 0; i < len(tl.tokens); i++ { - level := tl.tokens[i].GetMergeabilityLevel(otherList.tokens[i]) - if level.Compare(minLevel) < 0 { - if level == UNMERGEABLE { - return UNMERGEABLE - } - minLevel = level - } - } - return minLevel -} - -func (tl *TokenList) MergeWith(other Token) Token { - otherList := other.(*TokenList) - mergedTokens := make([]Token, len(tl.tokens)) - for i := 0; i < len(tl.tokens); i++ { - mergedTokens[i] = tl.tokens[i].MergeWith(otherList.tokens[i]) - } - return NewTokenList(mergedTokens) -} -``` - -### Step 4: Two-Phase Clustering System (60 minutes) - -**File: `internal/clustering/realtime.go`** - -```go -package clustering - -import ( - "sync" - "github.com/your-library/internal/token" -) - -type ClusteringKey struct { - Tags map[string]interface{} - TokenCount int -} - -type RealTimeClusterer struct { - clusters map[ClusteringKey][]*MergeableNode - mutex sync.RWMutex -} - -type MergeableNode struct { - rootToken *token.TokenList - messages []string - count int - tags map[string]interface{} -} - -func NewRealTimeClusterer() *RealTimeClusterer { - return &RealTimeClusterer{ - clusters: make(map[ClusteringKey][]*MergeableNode), - } -} - -func (rtc *RealTimeClusterer) ProcessDocument(message string, rootToken *token.TokenList, tags map[string]interface{}) *MergeableNode { - key := ClusteringKey{ - Tags: tags, - TokenCount: len(rootToken.GetTokens()), - } - - rtc.mutex.Lock() - defer rtc.mutex.Unlock() - - // Try to find existing cluster that can accept this document - if clusters, exists := rtc.clusters[key]; exists { - for _, cluster := range clusters { - if cluster.ProcessIfMergeable(rootToken) { - cluster.AddMessage(message) - return cluster - } - } - } - - // Create new cluster - newCluster := &MergeableNode{ - rootToken: rootToken, - messages: []string{message}, - count: 1, - tags: tags, - } - rtc.clusters[key] = append(rtc.clusters[key], newCluster) - return newCluster -} - -func (mn *MergeableNode) ProcessIfMergeable(rootToken *token.TokenList) bool { - if mn.rootToken.GetMergeabilityLevel(rootToken).IsMergeable() { - mn.rootToken = mn.rootToken.MergeWith(rootToken).(*token.TokenList) - return true - } - return false -} - -func (mn *MergeableNode) AddMessage(message string) { - mn.messages = append(mn.messages, message) - mn.count++ -} -``` - -### Step 5: Batch Consolidation (45 minutes) - -**File: `internal/clustering/consolidation.go`** - -```go -package clustering - -func (rtc *RealTimeClusterer) ConsolidateClusters() []*MergeableNode { - rtc.mutex.Lock() - defer rtc.mutex.Unlock() - - var consolidatedClusters []*MergeableNode - - // Group clusters by (tags, token_count) - same as Java - for _, clusters := range rtc.clusters { - consolidatedClusters = append(consolidatedClusters, - rtc.mergeClusters(clusters)...) - } - - return consolidatedClusters -} - -func (rtc *RealTimeClusterer) mergeClusters(clusters []*MergeableNode) []*MergeableNode { - var consolidatedClusters []*MergeableNode - - // Java-style consolidation algorithm - for len(clusters) > 0 { - cluster := clusters[len(clusters)-1] - clusters = clusters[:len(clusters)-1] - - var remainingClusters []*MergeableNode - for _, candidate := range clusters { - if cluster.MergeTokensIfFits(candidate) { - // Merge successful, candidate is absorbed - continue - } else if candidate.MergeTokensIfFits(cluster) { - // Candidate can absorb cluster, use candidate as base - cluster = candidate - continue - } else { - // No merge possible, keep candidate - remainingClusters = append(remainingClusters, candidate) - } - } - - consolidatedClusters = append(consolidatedClusters, cluster) - clusters = remainingClusters - } - - return consolidatedClusters -} - -func (mn *MergeableNode) MergeTokensIfFits(other *MergeableNode) bool { - if mn.rootToken.GetMergeabilityLevel(other.rootToken).IsMergeable() { - mn.rootToken = mn.rootToken.MergeWith(other.rootToken).(*token.TokenList) - mn.messages = append(mn.messages, other.messages...) - mn.count += other.count - return true - } - return false -} -``` - -### Step 6: Pattern Extractor Integration (30 minutes) - -**File: `pkg/patterns/patterns.go`** - -```go -package patterns - -import ( - "github.com/your-library/internal/clustering" - "github.com/your-library/internal/tokenization" -) - -type PatternExtractor struct { - tokenizer tokenization.Tokenizer - clusterer *clustering.RealTimeClusterer -} - -func NewPatternExtractor() *PatternExtractor { - return &PatternExtractor{ - tokenizer: tokenization.NewDefaultTokenizer(), - clusterer: clustering.NewRealTimeClusterer(), - } -} - -func (pe *PatternExtractor) ExtractPatterns(messages []string) ([]*Pattern, error) { - // Phase 1: Real-time processing - for _, message := range messages { - tokens, err := pe.tokenizer.Tokenize(message) - if err != nil { - return nil, err - } - - tokenList := token.NewTokenList(tokens) - pe.clusterer.ProcessDocument(message, tokenList, make(map[string]interface{})) - } - - // Phase 2: Batch consolidation - clusters := pe.clusterer.ConsolidateClusters() - - // Convert to patterns - patterns := make([]*Pattern, len(clusters)) - for i, cluster := range clusters { - patterns[i] = &Pattern{ - Template: cluster.rootToken.GetPatternString(), - Count: cluster.count, - Messages: cluster.messages, - } - } - - return patterns, nil -} - -type Pattern struct { - Template string - Count int - Messages []string -} -``` - -### Step 7: Basic Tokenization (30 minutes) - -**File: `internal/tokenization/tokenizer.go`** - -```go -package tokenization - -import ( - "strconv" - "strings" - "github.com/your-library/internal/token" -) - -type Tokenizer interface { - Tokenize(input string) ([]token.Token, error) -} - -type DefaultTokenizer struct{} - -func NewDefaultTokenizer() *DefaultTokenizer { - return &DefaultTokenizer{} -} - -func (dt *DefaultTokenizer) Tokenize(input string) ([]token.Token, error) { - var tokens []token.Token - - // Simple word-based tokenization - words := strings.Fields(input) - for _, word := range words { - if isNumeric(word) { - tokens = append(tokens, token.NewNumericValue(word, false)) - } else if hasNumericPattern(word) { - // Only words with numeric patterns can be wildcards - tokens = append(tokens, token.NewWord(word, true, false)) // possiblyWildcard=true - } else { - // Generic words are not mergeable - tokens = append(tokens, token.NewWord(word, false, false)) // possiblyWildcard=false - } - } - - return tokens, nil -} - -func hasNumericPattern(word string) bool { - // Check if word contains numbers (user123, session456, etc.) - return regexp.MustCompile(`\d`).MatchString(word) -} - -func isNumeric(s string) bool { - _, err := strconv.ParseFloat(s, 64) - return err == nil -} -``` - -## 🧪 Testing Implementation (30 minutes) - -**File: `internal/token/word_test.go`** - -```go -package token - -import ( - "testing" - "github.com/stretchr/testify/assert" -) - -func TestWordMergeability(t *testing.T) { - tests := []struct { - name string - token1 *Word - token2 *Word - expected MergeabilityLevel - }{ - { - name: "Same text, both wildcard", - token1: NewWord("GET", true, false), - token2: NewWord("GET", true, false), - expected: FITS_AS_IT_IS, - }, - { - name: "Different text, both wildcard", - token1: NewWord("GET", true, false), - token2: NewWord("POST", true, false), - expected: MERGEABLE_AS_WILDCARD, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.token1.GetMergeabilityLevel(tt.token2) - assert.Equal(t, tt.expected, result) - }) - } -} - -func TestPatternClustering(t *testing.T) { - extractor := NewPatternExtractor() - - logMessages := []string{ - "user123 logged in successfully", - "user456 logged in successfully", - "user789 logged in successfully", - } - - patterns, err := extractor.ExtractPatterns(logMessages) - require.NoError(t, err) - - // Should create one pattern with wildcards - assert.Len(t, patterns, 1) - assert.Equal(t, "user* logged in successfully", patterns[0].Template) - assert.Equal(t, 3, patterns[0].Count) -} -``` - -## šŸ“¦ Go Module Setup (5 minutes) - -**File: `go.mod`** - -```go -module github.com/your-org/your-patterns-library - -go 1.21 - -require ( - github.com/stretchr/testify v1.8.4 -) -``` - -## šŸŽÆ Key Differences from Go Approach - -### Go Approach (Current) -- **Similarity-based**: Uses Jaccard similarity with 50% threshold -- **Single-phase**: All processing happens in real-time -- **Continuous scoring**: Similarity values between 0.0 and 1.0 -- **Constant word similarity**: Additional check prevents merging very different patterns - -### Java Approach (Proposed) -- **Mergeability-based**: Uses discrete mergeability levels -- **Two-phase**: Real-time processing + batch consolidation -- **Binary decisions**: Either mergeable or not mergeable -- **Token-level rules**: Each token type defines its own mergeability logic -- **`possiblyWildcard` flag**: Enables wildcard creation for different word tokens - -### Why Java Approach Works Better - -1. **No Similarity Thresholds**: The `possiblyWildcard` flag eliminates the need for similarity calculations -2. **Batch Optimization**: Consolidation happens after all documents are processed, allowing better pattern discovery -3. **Predictable Behavior**: Discrete levels make the system more debuggable -4. **Performance**: O(1) token-level checks vs O(n²) similarity calculations -5. **Semantic Awareness**: Different token types have different mergeability rules - -## āœ… Benefits of This Implementation - -1. **Performance**: O(1) token-level checks vs O(n²) similarity calculations -2. **Predictability**: Discrete mergeability levels make behavior more predictable -3. **Type Safety**: Each token type defines its own mergeability rules -4. **Extensibility**: Easy to add new token types with custom mergeability -5. **Semantic Awareness**: Can distinguish between different types of content -6. **Backward Compatibility**: Can fall back to Go approach if needed -7. **Wildcard Creation**: The `possiblyWildcard` flag enables automatic wildcard creation during batch consolidation - -## šŸŽÆ Summary - -**Total Estimated Time: ~4 hours for complete implementation from scratch** - -This implementation provides a high-performance, predictable pattern merging system that scales well under load while maintaining semantic awareness of different token types. The discrete mergeability levels make the system more maintainable and debuggable compared to continuous similarity scoring. - -**The key insight is the `possiblyWildcard` flag that enables automatic wildcard creation during batch consolidation, eliminating the need for similarity thresholds.** \ No newline at end of file From ce1503888614108ce197de18ecf072be7f01b3cb Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Mon, 3 Nov 2025 17:15:28 -0500 Subject: [PATCH 05/16] Add missing trie.go file for token classification --- pkg/logs/patterns/automaton/trie.go | 320 ++++++++++++++++++++++++++++ 1 file changed, 320 insertions(+) create mode 100644 pkg/logs/patterns/automaton/trie.go diff --git a/pkg/logs/patterns/automaton/trie.go b/pkg/logs/patterns/automaton/trie.go new file mode 100644 index 000000000000..c713c2c65f19 --- /dev/null +++ b/pkg/logs/patterns/automaton/trie.go @@ -0,0 +1,320 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package automaton provides log message tokenization using finite state automaton +// and trie-based pattern matching for token classification. +package automaton + +import ( + "regexp" + "strings" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// TrieNode represents a node in the classification trie +type TrieNode struct { + children map[rune]*TrieNode + tokenType token.TokenType + isTerminal bool +} + +// Trie implements a prefix tree for token classification +type Trie struct { + root *TrieNode + terminalRules []*TerminalRule +} + +// GlobalRuleManager manages terminal rules +var globalRuleManager *RuleManager + +// globalTrie is the singleton trie instance +var globalTrie *Trie + +// init initializes the global trie and rule manager +// todo: componentilize this eventually +func init() { + globalTrie = NewTrie() + globalRuleManager = NewRuleManager() + globalRuleManager.LoadPredefinedRules() + globalTrie.buildPredefinedPatterns() +} + +// NewTrie creates a new trie +func NewTrie() *Trie { + return &Trie{ + root: &TrieNode{ + children: make(map[rune]*TrieNode), + }, + terminalRules: make([]*TerminalRule, 0), + } +} + +// Match performs token classification +func (trie *Trie) Match(value string) token.TokenType { + if len(value) == 0 { + return token.TokenUnknown + } + + if tokenType := trie.exactMatch(value); tokenType != token.TokenUnknown { + return tokenType + } + + return trie.applyTerminalRules(value) +} + +// exactMatch performs exact string matching +func (trie *Trie) exactMatch(value string) token.TokenType { + node := trie.root + + for _, char := range value { + child, exists := node.children[char] + if !exists { + return token.TokenUnknown + } + node = child + } + + if node.isTerminal { + return node.tokenType + } + + return token.TokenUnknown +} + +// applyTerminalRules applies regex-based terminal rules +func (trie *Trie) applyTerminalRules(value string) token.TokenType { + return globalRuleManager.ApplyRules(value) +} + +// AddExactPattern adds an exact string pattern to the trie +func (trie *Trie) AddExactPattern(pattern string, tokenType token.TokenType) { + node := trie.root + + for _, char := range pattern { + if _, exists := node.children[char]; !exists { + node.children[char] = &TrieNode{ + children: make(map[rune]*TrieNode), + } + } + node = node.children[char] + } + + node.isTerminal = true + node.tokenType = tokenType +} + +// AddTerminalRule adds a regex-based pattern rule +func (trie *Trie) AddTerminalRule(pattern string, tokenType token.TokenType, priority int) error { + regex, err := regexp.Compile(pattern) + if err != nil { + return err + } + + rule := &TerminalRule{ + Name: "AnonymousRule", + Pattern: regex, + TokenType: tokenType, + Priority: priority, + Category: "default", + Description: "Anonymous terminal rule", + Examples: []string{}, + } + + inserted := false + for i, existing := range trie.terminalRules { + if priority > existing.Priority { + trie.terminalRules = append(trie.terminalRules[:i], append([]*TerminalRule{rule}, trie.terminalRules[i:]...)...) + inserted = true + break + } + } + + if !inserted { + trie.terminalRules = append(trie.terminalRules, rule) + } + + return nil +} + +// buildPredefinedPatterns populates the trie with predefined patterns +func (trie *Trie) buildPredefinedPatterns() { + httpMethods := []string{"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH", "TRACE", "CONNECT"} + for _, method := range httpMethods { + trie.AddExactPattern(method, token.TokenHttpMethod) + } + + severityLevels := []string{"TRACE", "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "FATAL", "PANIC", "EMERGENCY", "ALERT", "CRITICAL", "NOTICE"} + for _, level := range severityLevels { + trie.AddExactPattern(level, token.TokenSeverityLevel) + trie.AddExactPattern(strings.ToLower(level), token.TokenSeverityLevel) + } + + trie.AddExactPattern(" ", token.TokenWhitespace) + trie.AddExactPattern("\t", token.TokenWhitespace) + trie.AddExactPattern("\n", token.TokenWhitespace) + trie.AddExactPattern("\r\n", token.TokenWhitespace) + + trie.AddTerminalRule(`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, token.TokenIPv4, PriorityHigh) + trie.AddTerminalRule(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`, token.TokenEmail, PriorityHigh) + trie.AddTerminalRule(`^https?://[^\s]+$`, token.TokenURI, PriorityMedium) + trie.AddTerminalRule(`^\d{4}-\d{2}-\d{2}`, token.TokenDate, PriorityMedium) + trie.AddTerminalRule(`^\d{2}/\d{2}/\d{4}`, token.TokenDate, PriorityMedium) + trie.AddTerminalRule(`^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}`, token.TokenDate, PriorityMedium) + trie.AddTerminalRule(`^[1-5][0-9][0-9]$`, token.TokenHttpStatus, PriorityMedium) + trie.AddTerminalRule(`^/[^\s]+$`, token.TokenAbsolutePath, PriorityMedium) + trie.AddTerminalRule(`^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$`, token.TokenIPv6, PriorityHigh) + trie.AddTerminalRule(`^\d+$`, token.TokenNumeric, PriorityLow) +} + +// TokenizeString is the main entry point +func TokenizeString(input string) *token.TokenList { + if len(input) == 0 { + return token.NewTokenList() + } + + tokenizer := NewTokenizer(input) + return tokenizer.Tokenize() +} + +// Helper functions + +// GetTerminalRules returns all terminal rules +func GetTerminalRules() []*TerminalRule { + return globalRuleManager.ListRules() +} + +// GetRulesByCategory returns rules by category +func GetRulesByCategory(category string) []*TerminalRule { + return globalRuleManager.GetRulesByCategory(category) +} + +// GetRuleCategories returns all rule categories +func GetRuleCategories() []string { + return globalRuleManager.GetCategories() +} + +// AddTerminalRule adds a new terminal rule +func AddTerminalRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error { + return globalRuleManager.AddRule(name, pattern, category, description, tokenType, priority, examples) +} + +// GetRuleStats returns rule statistics +func GetRuleStats() RuleStats { + return globalRuleManager.GetRuleStats() +} + +// Statistics + +// Stats returns trie statistics +type TrieStats struct { + ExactPatterns int + TerminalRules int + TrieNodes int + MaxDepth int +} + +// GetStats returns trie statistics +func (trie *Trie) GetStats() TrieStats { + nodeCount, maxDepth := trie.countNodes(trie.root, 0) + + return TrieStats{ + ExactPatterns: trie.countExactPatterns(trie.root), + TerminalRules: len(trie.terminalRules), + TrieNodes: nodeCount, + MaxDepth: maxDepth, + } +} + +func (trie *Trie) countNodes(node *TrieNode, depth int) (int, int) { + count := 1 + maxDepth := depth + + for _, child := range node.children { + childCount, childDepth := trie.countNodes(child, depth+1) + count += childCount + if childDepth > maxDepth { + maxDepth = childDepth + } + } + + return count, maxDepth +} + +func (trie *Trie) countExactPatterns(node *TrieNode) int { + count := 0 + if node.isTerminal { + count = 1 + } + + for _, child := range node.children { + count += trie.countExactPatterns(child) + } + + return count +} + +// Validation helpers + +// validateIPv4 validates IPv4 addresses +func validateIPv4(value string) bool { + parts := strings.Split(value, ".") + if len(parts) != 4 { + return false + } + + for _, part := range parts { + if len(part) == 0 || len(part) > 3 { + return false + } + + // Convert to number and check range + num := 0 + for _, char := range part { + if char < '0' || char > '9' { + return false + } + num = num*10 + int(char-'0') + } + + if num > 255 { + return false + } + } + return true +} + +// validateEmail validates email addresses +func validateEmail(value string) bool { + atCount := strings.Count(value, "@") + if atCount != 1 { + return false + } + + parts := strings.Split(value, "@") + if len(parts) != 2 || len(parts[0]) == 0 || len(parts[1]) == 0 { + return false + } + + return strings.Contains(parts[1], ".") +} + +// validateDate validates date strings +func validateDate(value string) bool { + hasDateChars := strings.Contains(value, "-") || strings.Contains(value, ":") || strings.Contains(value, "/") + if !hasDateChars { + return false + } + + hasDigits := false + for _, char := range value { + if char >= '0' && char <= '9' { + hasDigits = true + break + } + } + + return hasDigits && len(value) >= 8 && len(value) <= 64 +} From 13d906d3160cc4332a534d76f93a04e04141eb51 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Mon, 3 Nov 2025 17:24:57 -0500 Subject: [PATCH 06/16] Fix merge conflict markers and restore mrfEnabled variable --- comp/logs/agent/config/config.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go index d34b6593c89c..82db19ac08d8 100644 --- a/comp/logs/agent/config/config.go +++ b/comp/logs/agent/config/config.go @@ -119,6 +119,8 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs "please use '%s' and '%s' instead", logsConfig.getConfigKey("logs_dd_url"), logsConfig.getConfigKey("logs_no_ssl")) } + mrfEnabled := coreConfig.GetBool("multi_region_failover.enabled") + // logs_config.logs_dd_url might specify a HTTP(S) proxy. Never fall back to TCP in this case. haveHTTPProxy := false if logsDDURL, defined := logsConfig.logsDDURL(); defined { From 7abb7b7d911b9eacc3854b6bee068da50b519acf Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Tue, 4 Nov 2025 11:32:14 -0500 Subject: [PATCH 07/16] lint + trie clean up --- pkg/logs/patterns/automaton/rules.go | 2 +- pkg/logs/patterns/automaton/tokenizer.go | 14 +- pkg/logs/patterns/automaton/tokenizer_test.go | 14 +- pkg/logs/patterns/automaton/trie.go | 160 ++---------------- pkg/logs/patterns/automaton/trie_test.go | 79 +++------ pkg/logs/patterns/token/signature_test.go | 12 +- pkg/logs/patterns/token/token.go | 44 ++--- pkg/logs/patterns/token/tokenlist_test.go | 20 +-- pkg/logs/patterns/token/tokentype_string.go | 6 +- 9 files changed, 86 insertions(+), 265 deletions(-) diff --git a/pkg/logs/patterns/automaton/rules.go b/pkg/logs/patterns/automaton/rules.go index 1c1c3800bedf..5d21f7bc81b2 100644 --- a/pkg/logs/patterns/automaton/rules.go +++ b/pkg/logs/patterns/automaton/rules.go @@ -445,7 +445,7 @@ func GetPredefinedRules() []*TerminalRule { { Name: "HTTPStatus", Pattern: regexp.MustCompile(`^[1-5][0-9][0-9]$`), - TokenType: token.TokenHttpStatus, + TokenType: token.TokenHTTPStatus, Priority: PriorityMedium, Category: "http", Description: "Matches HTTP status codes", diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go index b45943ae4964..d092b64477ae 100644 --- a/pkg/logs/patterns/automaton/tokenizer.go +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -19,11 +19,11 @@ import ( type TokenizerState int const ( - StateStart TokenizerState = iota - StateWord // Letters, digits, and common separators for structured tokens - StateNumeric // Pure numbers - StateWhitespace // Spaces, tabs, newlines - StateSpecial // Operators, punctuation, symbols + StateStart TokenizerState = iota // StateStart is the initial state + StateWord // StateWord is letters, digits, and common separators for structured tokens + StateNumeric // StateNumeric is pure numbers + StateWhitespace // StateWhitespace is spaces, tabs, newlines + StateSpecial // StateSpecial is operators, punctuation, symbols ) const ( @@ -76,7 +76,7 @@ func (t *Tokenizer) Tokenize() *token.TokenList { // pattern matching to identify structured types: // - "192.168.1.1" → TokenNumeric upgraded to TokenIPv4 // - "user@example.com" → TokenWord upgraded to TokenEmail -// - "GET" → TokenWord upgraded to TokenHttpMethod +// - "GET" → TokenWord upgraded to TokenHTTPMethod func (t *Tokenizer) classifyTokens() { for i, tok := range t.tokens { // Skip if not eligible for classification @@ -209,7 +209,7 @@ func (t *Tokenizer) handleSpecialState(char rune) bool { return true } -// classifyToken attempts to classify a single token's type using terminal rules. +// classifyToken attempts to classify a single token's type using trie and terminal rules. func (t *Tokenizer) classifyToken(value string) (token.TokenType, error) { if len(value) == 0 { return token.TokenUnknown, fmt.Errorf("cannot classify empty srting token value") diff --git a/pkg/logs/patterns/automaton/tokenizer_test.go b/pkg/logs/patterns/automaton/tokenizer_test.go index 182f5a4b4088..284b9200a92d 100644 --- a/pkg/logs/patterns/automaton/tokenizer_test.go +++ b/pkg/logs/patterns/automaton/tokenizer_test.go @@ -26,11 +26,11 @@ func TestTokenizer_SimpleTokenization(t *testing.T) { // Verify token types expectedTypes := []token.TokenType{ - token.TokenHttpMethod, // GET + token.TokenHTTPMethod, // GET token.TokenWhitespace, // space token.TokenAbsolutePath, // /api token.TokenWhitespace, // space - token.TokenHttpStatus, // 200 + token.TokenHTTPStatus, // 200 } for i, expected := range expectedTypes { @@ -199,9 +199,9 @@ func TestArchitectureCompliance(t *testing.T) { for i := range tokenList.Tokens { switch tokenList.Tokens[i].Type { - case token.TokenHttpMethod: + case token.TokenHTTPMethod: httpMethod = &tokenList.Tokens[i] - case token.TokenHttpStatus: + case token.TokenHTTPStatus: httpStatus = &tokenList.Tokens[i] case token.TokenAbsolutePath: path = &tokenList.Tokens[i] @@ -224,7 +224,7 @@ func TestArchitectureCompliance(t *testing.T) { signature := token.NewSignature(tokenList) assert.False(t, signature.IsEmpty(), "Signature generation failed") - expectedPosition := "HttpMethod|Whitespace|AbsolutePath|Whitespace|HttpStatus" + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus" assert.Equal(t, expectedPosition, signature.Position, "Signature position mismatch") } @@ -239,9 +239,9 @@ func TestComplexLogScenarios(t *testing.T) { name: "HTTP Request", input: "GET /api/users 200", expected: []token.TokenType{ - token.TokenHttpMethod, token.TokenWhitespace, + token.TokenHTTPMethod, token.TokenWhitespace, token.TokenAbsolutePath, token.TokenWhitespace, - token.TokenHttpStatus, + token.TokenHTTPStatus, }, }, { diff --git a/pkg/logs/patterns/automaton/trie.go b/pkg/logs/patterns/automaton/trie.go index c713c2c65f19..01b7f0a9fcb2 100644 --- a/pkg/logs/patterns/automaton/trie.go +++ b/pkg/logs/patterns/automaton/trie.go @@ -8,7 +8,6 @@ package automaton import ( - "regexp" "strings" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" @@ -23,8 +22,7 @@ type TrieNode struct { // Trie implements a prefix tree for token classification type Trie struct { - root *TrieNode - terminalRules []*TerminalRule + root *TrieNode } // GlobalRuleManager manages terminal rules @@ -48,7 +46,6 @@ func NewTrie() *Trie { root: &TrieNode{ children: make(map[rune]*TrieNode), }, - terminalRules: make([]*TerminalRule, 0), } } @@ -106,67 +103,28 @@ func (trie *Trie) AddExactPattern(pattern string, tokenType token.TokenType) { node.tokenType = tokenType } -// AddTerminalRule adds a regex-based pattern rule -func (trie *Trie) AddTerminalRule(pattern string, tokenType token.TokenType, priority int) error { - regex, err := regexp.Compile(pattern) - if err != nil { - return err - } - - rule := &TerminalRule{ - Name: "AnonymousRule", - Pattern: regex, - TokenType: tokenType, - Priority: priority, - Category: "default", - Description: "Anonymous terminal rule", - Examples: []string{}, - } - - inserted := false - for i, existing := range trie.terminalRules { - if priority > existing.Priority { - trie.terminalRules = append(trie.terminalRules[:i], append([]*TerminalRule{rule}, trie.terminalRules[i:]...)...) - inserted = true - break - } - } - - if !inserted { - trie.terminalRules = append(trie.terminalRules, rule) - } - - return nil -} - -// buildPredefinedPatterns populates the trie with predefined patterns +// buildPredefinedPatterns populates the trie with exact-match patterns +// for fast classification of known strings (HTTP methods, severity levels, whitespace). +// Regex-based terminal rules are handled by globalRuleManager via LoadPredefinedRules(). func (trie *Trie) buildPredefinedPatterns() { + // HTTP methods - exact string matching httpMethods := []string{"GET", "POST", "PUT", "DELETE", "HEAD", "OPTIONS", "PATCH", "TRACE", "CONNECT"} for _, method := range httpMethods { - trie.AddExactPattern(method, token.TokenHttpMethod) + trie.AddExactPattern(method, token.TokenHTTPMethod) } + // Severity levels - exact string matching (both uppercase and lowercase) severityLevels := []string{"TRACE", "DEBUG", "INFO", "WARN", "WARNING", "ERROR", "FATAL", "PANIC", "EMERGENCY", "ALERT", "CRITICAL", "NOTICE"} for _, level := range severityLevels { trie.AddExactPattern(level, token.TokenSeverityLevel) trie.AddExactPattern(strings.ToLower(level), token.TokenSeverityLevel) } + // Whitespace - exact character matching trie.AddExactPattern(" ", token.TokenWhitespace) trie.AddExactPattern("\t", token.TokenWhitespace) trie.AddExactPattern("\n", token.TokenWhitespace) trie.AddExactPattern("\r\n", token.TokenWhitespace) - - trie.AddTerminalRule(`^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$`, token.TokenIPv4, PriorityHigh) - trie.AddTerminalRule(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`, token.TokenEmail, PriorityHigh) - trie.AddTerminalRule(`^https?://[^\s]+$`, token.TokenURI, PriorityMedium) - trie.AddTerminalRule(`^\d{4}-\d{2}-\d{2}`, token.TokenDate, PriorityMedium) - trie.AddTerminalRule(`^\d{2}/\d{2}/\d{4}`, token.TokenDate, PriorityMedium) - trie.AddTerminalRule(`^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}`, token.TokenDate, PriorityMedium) - trie.AddTerminalRule(`^[1-5][0-9][0-9]$`, token.TokenHttpStatus, PriorityMedium) - trie.AddTerminalRule(`^/[^\s]+$`, token.TokenAbsolutePath, PriorityMedium) - trie.AddTerminalRule(`^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$`, token.TokenIPv6, PriorityHigh) - trie.AddTerminalRule(`^\d+$`, token.TokenNumeric, PriorityLow) } // TokenizeString is the main entry point @@ -179,36 +137,9 @@ func TokenizeString(input string) *token.TokenList { return tokenizer.Tokenize() } -// Helper functions - -// GetTerminalRules returns all terminal rules -func GetTerminalRules() []*TerminalRule { - return globalRuleManager.ListRules() -} - -// GetRulesByCategory returns rules by category -func GetRulesByCategory(category string) []*TerminalRule { - return globalRuleManager.GetRulesByCategory(category) -} - -// GetRuleCategories returns all rule categories -func GetRuleCategories() []string { - return globalRuleManager.GetCategories() -} - -// AddTerminalRule adds a new terminal rule -func AddTerminalRule(name, pattern, category, description string, tokenType token.TokenType, priority int, examples []string) error { - return globalRuleManager.AddRule(name, pattern, category, description, tokenType, priority, examples) -} - -// GetRuleStats returns rule statistics -func GetRuleStats() RuleStats { - return globalRuleManager.GetRuleStats() -} - // Statistics -// Stats returns trie statistics +// TrieStats is the stats of the trie type TrieStats struct { ExactPatterns int TerminalRules int @@ -216,13 +147,19 @@ type TrieStats struct { MaxDepth int } -// GetStats returns trie statistics +// GetStats returns trie statistics for testing purposes func (trie *Trie) GetStats() TrieStats { nodeCount, maxDepth := trie.countNodes(trie.root, 0) + // Terminal rules are managed by globalRuleManager, not the trie itself + terminalRuleCount := 0 + if globalRuleManager != nil { + terminalRuleCount = len(globalRuleManager.rules) + } + return TrieStats{ ExactPatterns: trie.countExactPatterns(trie.root), - TerminalRules: len(trie.terminalRules), + TerminalRules: terminalRuleCount, TrieNodes: nodeCount, MaxDepth: maxDepth, } @@ -255,66 +192,3 @@ func (trie *Trie) countExactPatterns(node *TrieNode) int { return count } - -// Validation helpers - -// validateIPv4 validates IPv4 addresses -func validateIPv4(value string) bool { - parts := strings.Split(value, ".") - if len(parts) != 4 { - return false - } - - for _, part := range parts { - if len(part) == 0 || len(part) > 3 { - return false - } - - // Convert to number and check range - num := 0 - for _, char := range part { - if char < '0' || char > '9' { - return false - } - num = num*10 + int(char-'0') - } - - if num > 255 { - return false - } - } - return true -} - -// validateEmail validates email addresses -func validateEmail(value string) bool { - atCount := strings.Count(value, "@") - if atCount != 1 { - return false - } - - parts := strings.Split(value, "@") - if len(parts) != 2 || len(parts[0]) == 0 || len(parts[1]) == 0 { - return false - } - - return strings.Contains(parts[1], ".") -} - -// validateDate validates date strings -func validateDate(value string) bool { - hasDateChars := strings.Contains(value, "-") || strings.Contains(value, ":") || strings.Contains(value, "/") - if !hasDateChars { - return false - } - - hasDigits := false - for _, char := range value { - if char >= '0' && char <= '9' { - hasDigits = true - break - } - } - - return hasDigits && len(value) >= 8 && len(value) <= 64 -} diff --git a/pkg/logs/patterns/automaton/trie_test.go b/pkg/logs/patterns/automaton/trie_test.go index 1bcbf3dcf2cd..8ae73f8d5d4c 100644 --- a/pkg/logs/patterns/automaton/trie_test.go +++ b/pkg/logs/patterns/automaton/trie_test.go @@ -16,8 +16,8 @@ func TestGlobalTrie_ExactMatch(t *testing.T) { input string expected token.TokenType }{ - {"GET", token.TokenHttpMethod}, - {"POST", token.TokenHttpMethod}, + {"GET", token.TokenHTTPMethod}, + {"POST", token.TokenHTTPMethod}, {"ERROR", token.TokenSeverityLevel}, {"INFO", token.TokenSeverityLevel}, {"debug", token.TokenSeverityLevel}, // lowercase @@ -40,9 +40,9 @@ func TestGlobalTrie_TerminalRules(t *testing.T) { input string expected token.TokenType }{ - {"200", token.TokenHttpStatus}, - {"404", token.TokenHttpStatus}, - {"500", token.TokenHttpStatus}, + {"200", token.TokenHTTPStatus}, + {"404", token.TokenHTTPStatus}, + {"500", token.TokenHTTPStatus}, {"192.168.1.1", token.TokenIPv4}, {"10.0.0.1", token.TokenIPv4}, {"test@example.com", token.TokenEmail}, @@ -104,8 +104,8 @@ func TestTrie_AddExactPattern(t *testing.T) { } func TestTrie_AddTerminalRule(t *testing.T) { - // Test adding terminal rule to global rule manager instead - err := AddTerminalRule( + // Test adding terminal rule to global rule manager + err := globalRuleManager.AddRule( "TestRule", `^TEST\d+$`, "test", @@ -136,7 +136,7 @@ func TestTrie_AddTerminalRule(t *testing.T) { func TestTrie_InvalidTerminalRule(t *testing.T) { // Try to add invalid regex to global rule manager - err := AddTerminalRule( + err := globalRuleManager.AddRule( "InvalidRule", `[invalid(regex`, "test", @@ -157,13 +157,24 @@ func TestTrie_ExactMatchPriority(t *testing.T) { testTrie.AddExactPattern("TEST", token.TokenWord) // Add terminal rule that would also match - testTrie.AddTerminalRule(`^TEST$`, token.TokenNumeric, PriorityHigh) + globalRuleManager.AddRule( + "ExactMatchTestRule", + `^TEST$`, + "test", + "Test rule for exact match priority", + token.TokenNumeric, + PriorityHigh, + []string{"TEST"}, + ) // Exact match should take priority result := testTrie.Match("TEST") if result != token.TokenWord { t.Errorf("Exact match should take priority, expected TokenWord, got %v", result) } + + // Clean up + globalRuleManager.RemoveRule("ExactMatchTestRule") } func TestTrie_EmptyInput(t *testing.T) { @@ -173,56 +184,6 @@ func TestTrie_EmptyInput(t *testing.T) { } } -func TestValidationFunctions(t *testing.T) { - // Test IPv4 validation - validIPv4 := []string{"192.168.1.1", "10.0.0.1", "255.255.255.255", "0.0.0.0"} - invalidIPv4 := []string{"256.1.1.1", "192.168.1", "192.168.1.1.1", "abc.def.ghi.jkl"} - - for _, ip := range validIPv4 { - if !validateIPv4(ip) { - t.Errorf("validateIPv4('%s') should return true", ip) - } - } - - for _, ip := range invalidIPv4 { - if validateIPv4(ip) { - t.Errorf("validateIPv4('%s') should return false", ip) - } - } - - // Test email validation - validEmails := []string{"test@example.com", "user@domain.org", "admin@company.co.uk"} - invalidEmails := []string{"invalid", "test@", "@domain.com", "test@@domain.com"} - - for _, email := range validEmails { - if !validateEmail(email) { - t.Errorf("validateEmail('%s') should return true", email) - } - } - - for _, email := range invalidEmails { - if validateEmail(email) { - t.Errorf("validateEmail('%s') should return false", email) - } - } - - // Test date validation - validDates := []string{"2023-12-25", "2023-12-25T14:30:00", "12/25/2023", "2023-12-25T14:30:00.123Z"} - invalidDates := []string{"invalid", "123", "abc", ""} - - for _, date := range validDates { - if !validateDate(date) { - t.Errorf("validateDate('%s') should return true", date) - } - } - - for _, date := range invalidDates { - if validateDate(date) { - t.Errorf("validateDate('%s') should return false", date) - } - } -} - func TestTrieNodeStructure(t *testing.T) { testTrie := NewTrie() testTrie.AddExactPattern("ABC", token.TokenWord) diff --git a/pkg/logs/patterns/token/signature_test.go b/pkg/logs/patterns/token/signature_test.go index cde682bdff73..bb4bbd4c0234 100644 --- a/pkg/logs/patterns/token/signature_test.go +++ b/pkg/logs/patterns/token/signature_test.go @@ -19,16 +19,16 @@ func TestNewSignature(t *testing.T) { // Non-empty TokenList tokens := []Token{ - {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenHTTPMethod, Value: "GET"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/api"}, {Type: TokenWhitespace, Value: " "}, - {Type: TokenHttpStatus, Value: "200"}, + {Type: TokenHTTPStatus, Value: "200"}, } tl := NewTokenListWithTokens(tokens) sig := NewSignature(tl) - expectedPosition := "HttpMethod|Whitespace|AbsolutePath|Whitespace|HttpStatus" + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath|Whitespace|HTTPStatus" if sig.Position != expectedPosition { t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, sig.Position) } @@ -120,12 +120,12 @@ func TestSignature_IsEmpty(t *testing.T) { func TestSignature_HasSameStructure(t *testing.T) { // Same structure, different values tokens1 := []Token{ - {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenHTTPMethod, Value: "GET"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/api"}, } tokens2 := []Token{ - {Type: TokenHttpMethod, Value: "POST"}, + {Type: TokenHTTPMethod, Value: "POST"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/users"}, } @@ -192,7 +192,7 @@ func TestComputeHash(t *testing.T) { func TestSignature_ConsistentHashing(t *testing.T) { // Test that identical TokenLists produce identical signatures with same hash tokens := []Token{ - {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenHTTPMethod, Value: "GET"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/api"}, } diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go index 6fc00d0569a5..2f40c36ab8d4 100644 --- a/pkg/logs/patterns/token/token.go +++ b/pkg/logs/patterns/token/token.go @@ -12,33 +12,33 @@ import ( //go:generate stringer -type=TokenType -trimprefix=Token -// TokenType represents the type of a token // TokenType.String() method is auto-generated by stringer // Run: go generate ./pkg/logs/patterns/token to regenerate the stringer file if you make changes to the TokenType enum +// TokenType represents the type of a token type TokenType int const ( // Basic token types - TokenUnknown TokenType = iota - TokenWord - TokenNumeric - TokenWhitespace + TokenUnknown TokenType = iota // TokenUnknown is the unknown token type + TokenWord // TokenWord is the word token type + TokenNumeric // TokenNumeric is the numeric token type + TokenWhitespace // TokenWhitespace is the whitespace token type // Network-related tokens - TokenIPv4 - TokenIPv6 - TokenEmail - TokenURI - TokenAbsolutePath + TokenIPv4 // TokenIPv4 is the IPv4 token type + TokenIPv6 // TokenIPv6 is the IPv6 token type + TokenEmail // TokenEmail is the email token type + TokenURI // TokenURI is the URI token type + TokenAbsolutePath // TokenAbsolutePath is the absolute path token type // HTTP-related tokens - TokenHttpMethod - TokenHttpStatus + TokenHTTPMethod // TokenHTTPMethod is the HTTP method token type + TokenHTTPStatus // TokenHTTPStatus is the HTTP status token type // Log-related tokens - TokenSeverityLevel - TokenDate + TokenSeverityLevel // TokenSeverityLevel is the severity level token type + TokenDate // TokenDate is the date token type ) // WildcardStatus describes a token's relationship to wildcards @@ -96,31 +96,31 @@ func (t *Token) String() string { } // Compare checks if two tokens can merge and returns the result -func (t1 *Token) Compare(t2 *Token) MergeResult { +func (t *Token) Compare(t2 *Token) MergeResult { // Different types cannot merge - if t1.Type != t2.Type { + if t.Type != t2.Type { return Conflict } // Identical value - if t1.Value == t2.Value { + if t.Value == t2.Value { return Identical } - // t1 is wildcard - matches any value of same type - if t1.Wildcard == IsWildcard { + // t is wildcard - matches any value of same type + if t.Wildcard == IsWildcard { return Identical } // Different values - check if they can merge into wildcard // Whitespace never wildcards (structural) - if t1.Type == TokenWhitespace { + if t.Type == TokenWhitespace { return Conflict } // Words only wildcard if both are PotentialWildcard - if t1.Type == TokenWord { - if t1.Wildcard == PotentialWildcard && t2.Wildcard == PotentialWildcard { + if t.Type == TokenWord { + if t.Wildcard == PotentialWildcard && t2.Wildcard == PotentialWildcard { return Wildcard } return Conflict diff --git a/pkg/logs/patterns/token/tokenlist_test.go b/pkg/logs/patterns/token/tokenlist_test.go index 06063897a724..8a6b571f6f3d 100644 --- a/pkg/logs/patterns/token/tokenlist_test.go +++ b/pkg/logs/patterns/token/tokenlist_test.go @@ -81,13 +81,13 @@ func TestTokenList_PositionSignature(t *testing.T) { // Non-empty token list tokens := []Token{ - {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenHTTPMethod, Value: "GET"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/api"}, } tl := NewTokenListWithTokens(tokens) - expectedPosition := "HttpMethod|Whitespace|AbsolutePath" + expectedPosition := "HTTPMethod|Whitespace|AbsolutePath" if positionSignature(tl) != expectedPosition { t.Errorf("Expected position signature '%s', got '%s'", expectedPosition, positionSignature(tl)) } @@ -96,7 +96,7 @@ func TestTokenList_PositionSignature(t *testing.T) { func TestTokenList_Signature(t *testing.T) { // Test that TokenList.Signature() creates a proper signature tokens := []Token{ - {Type: TokenHttpMethod, Value: "GET"}, + {Type: TokenHTTPMethod, Value: "GET"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenAbsolutePath, Value: "/api"}, } @@ -113,17 +113,3 @@ func TestTokenList_Signature(t *testing.T) { t.Error("Signature position should not be empty") } } - -// Helper function to check if string contains substring -func containsSubstring(str, substr string) bool { - return len(str) >= len(substr) && findSubstring(str, substr) >= 0 -} - -func findSubstring(str, substr string) int { - for i := 0; i <= len(str)-len(substr); i++ { - if str[i:i+len(substr)] == substr { - return i - } - } - return -1 -} diff --git a/pkg/logs/patterns/token/tokentype_string.go b/pkg/logs/patterns/token/tokentype_string.go index 6b6cc1c05042..91b756894d27 100644 --- a/pkg/logs/patterns/token/tokentype_string.go +++ b/pkg/logs/patterns/token/tokentype_string.go @@ -17,13 +17,13 @@ func _() { _ = x[TokenEmail-6] _ = x[TokenURI-7] _ = x[TokenAbsolutePath-8] - _ = x[TokenHttpMethod-9] - _ = x[TokenHttpStatus-10] + _ = x[TokenHTTPMethod-9] + _ = x[TokenHTTPStatus-10] _ = x[TokenSeverityLevel-11] _ = x[TokenDate-12] } -const _TokenType_name = "UnknownWordNumericWhitespaceIPv4IPv6EmailURIAbsolutePathHttpMethodHttpStatusSeverityLevelDate" +const _TokenType_name = "UnknownWordNumericWhitespaceIPv4IPv6EmailURIAbsolutePathHTTPMethodHTTPStatusSeverityLevelDate" var _TokenType_index = [...]uint8{0, 7, 11, 18, 28, 32, 36, 41, 44, 56, 66, 76, 89, 93} From e265c33dc3313db8af734ba10c4e1065d65ad470 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Tue, 4 Nov 2025 12:14:34 -0500 Subject: [PATCH 08/16] Restore AGENTS.md and CLAUDE.md to main versions --- AGENTS.md | 229 ++++++++++++++++++++++++++++++++++++++++++++++-------- CLAUDE.md | 1 - 2 files changed, 196 insertions(+), 34 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5ae851e43fe3..9f6fce30a3cb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,33 +1,196 @@ -# Repository Guidelines - -## Project Structure & Module Organization -- `cmd/` hosts the binaries: `cmd/agent` for the core Agent, `cmd/cluster-agent`, `cmd/dogstatsd`, `cmd/trace-agent`, and eBPF tooling under `cmd/system-probe`. -- Shared Go packages live in `pkg/` (e.g., `pkg/aggregator`, `pkg/collector`, `pkg/config`), while componentized logic is in `comp/` for incremental adoption. -- Python invoke tasks reside in `tasks/`; docs and contributor references are under `docs/` and `docs/dev/`. -- Development configs live in `dev/dist/`; the main runtime config copies to `bin/agent/dist/datadog.yaml` after builds. - -## Build, Test, and Development Commands -- `dda inv install-tools` installs the Go, Python, and system tools required for local builds. -- `dda inv agent.build --build-exclude=systemd` produces the primary agent binary without systemd assets; swap in component-specific targets such as `dda inv dogstatsd.build` or `dda inv trace-agent.build` when iterating on those services. -- `dda inv test --targets=./pkg/aggregator` scopes unit tests to a package; omit `--targets` to exercise the full suite. -- `dda inv linter.go` runs `golangci-lint`; prefer `dda inv linter.all` before large merges to surface cross-language issues early. - -## Coding Style & Naming Conventions -- Format Go sources with `gofmt` (tabs for indentation, camelCase for identifiers) and rely on `golangci-lint` to enforce project rules. -- Python tooling in `tasks/` follows PEP 8; run `dda inv linter.python` if you touch those scripts. -- Favor descriptive package paths (`pkg/network/`, `comp/core/telemetry`) and snake_case filenames for YAML configs. - -## Testing Guidelines -- Go tests use the standard framework; function names must follow `TestXxx`. Table-driven subtests are preferred for coverage clarity. -- Python checks leverage `pytest`; mirror module names with `test_*.py` files. -- Investigate coverage gaps with `dda inv test --targets= --coverage` and document notable exclusions in the PR. - -## Commit & Pull Request Guidelines -- Recent history shows conventional prefixes (`feat:`, `fix:`, `docs:`) and ticket tags (`[CXP-####]`); follow that pattern and keep subjects under 72 characters. -- Reference issues in the body, outline testing performed, and attach logs or screenshots when UI or observability output changes. -- Pull requests should describe scope, risks, and rollout considerations; note configuration updates so reviewers can flag downstream impacts. - -## Security & Configuration Tips -- Never commit secrets; use the secret backend or redacted fixtures for tests. -- Store experimental configuration under `dev/` and guard runtime features with the appropriate Go build tags (see `tasks/build_tags.py`). -- Review changes touching `system-probe` or `security-agent` with dedicated owners—these components ship kernel-space code and warrant extra scrutiny. +# Datadog Agent - Project Overview for AI coding assistant + +## Project Summary +The Datadog Agent is a comprehensive monitoring and observability agent written primarily in Go. It collects metrics, traces, logs, and security events from systems and applications, forwarding them to the Datadog platform. This is the main repository for Agent versions 6 and 7. + +## Project Structure + +### Core Directories +- `/cmd/` - Entry points for various agent components + - `agent/` - Main agent binary + - `cluster-agent/` - Kubernetes cluster agent + - `dogstatsd/` - StatsD metrics daemon + - `trace-agent/` - APM trace collection agent + - `system-probe/` - System-level monitoring (eBPF) + - `security-agent/` - Security monitoring + - `process-agent/` - Process monitoring + +- `/pkg/` - Core Go packages and libraries + - `aggregator/` - Metrics aggregation + - `collector/` - Check scheduling and execution + - `config/` - Configuration management + - `logs/` - Log collection and processing + - `metrics/` - Metrics types and handling + - `network/` - Network monitoring + - `security/` - Security monitoring components + - `trace/` - APM tracing components + +- `/comp/` - Component-based architecture modules + - `core/` - Core components + - `metadata/` - Metadata collection + - `logs/` - Log components + - `trace/` - Trace components + +- `/tasks/` - Python invoke tasks for development + - Build, test, lint, and deployment automation + +- `/rtloader/` - Runtime loader for Python checks + +## Development Workflow + +### Common Commands + +#### Building + +```bash +# install dda on mac OS +brew install --cask dda + +# Install development tools +dda inv install-tools + +# Build the main agent +dda inv agent.build --build-exclude=systemd + +# Build specific components +dda inv dogstatsd.build +dda inv trace-agent.build +dda inv system-probe.build +``` + +#### Testing +```bash +# Run all tests +dda inv test + +# Test specific package +dda inv test --targets=./pkg/aggregator + +# Run Go linters +dda inv linter.go + +# Run all linters +dda inv linter.all +``` + +#### Running Locally +```bash +# Create dev config with testing API key +echo "api_key: 0000001" > dev/dist/datadog.yaml + +# Run the agent +./bin/agent/agent run -c bin/agent/dist/datadog.yaml +``` + +### Development Configuration +The development configuration file should be placed at `dev/dist/datadog.yaml`. After building, it gets copied to `bin/agent/dist/datadog.yaml`. + +## Key Components + +### Check System +- Checks are Python or Go modules that collect metrics +- Located in `cmd/agent/dist/checks/` +- Can be autodiscovered via Kubernetes annotations/labels + +### Configuration +- Main config: `datadog.yaml` +- Check configs: `conf.d/.d/conf.yaml` +- Supports environment variable overrides with `DD_` prefix + +## Testing Strategy + +### Unit Tests +- Go tests using standard `go test` +- Python tests using pytest +- Run with `dda inv test --targets=` + +### End-to-End Tests +- E2E framework in `test/new-e2e/` + +### Linting +- Go: golangci-lint via `dda inv linter.go` +- Python: various linters via `dda inv linter.python` +- YAML: yamllint +- Shell: shellcheck + +## Build System + +### Invoke Tasks +The project uses Python's Invoke framework with custom tasks. Main task categories: +- `agent.*` - Core agent tasks +- `test` - Testing tasks +- `linter.*` - Linting tasks +- `docker.*` - Docker image tasks +- `release.*` - Release management + +### Build Tags +Go build tags control feature inclusion, some examples are: +- `kubeapiserver` - Kubernetes API server support +- `containerd` - containerd support +- `docker` - Docker support +- `ebpf` - eBPF support +- `python` - Python check support +- and MANY more, refer to ./tasks/build_tags.py for a full reference. + +## Important Files + +### Configuration +- `datadog.yaml` - Main agent configuration +- `modules.yml` - Go module definitions +- `release.json` - Release version information +- `.gitlab-ci.yml` - CI/CD pipeline configuration + +### Documentation +- `/docs/` - Internal documentation +- `/docs/dev/` - Developer guides +- `README.md` - Project overview +- `CONTRIBUTING.md` - Contribution guidelines + +## CI/CD Pipeline + +### GitLab CI +- Primary CI system +- Defined in `.gitlab-ci.yml` and `.gitlab/` directory +- Runs tests, builds, and deployments + +### GitHub Actions +- Secondary CI for specific workflows +- Tests about the pull-request settings or repository configuration +- Release automation workflows + +## Security Considerations + +### Sensitive Data +- Never commit API keys or secrets +- Use secret backend for credentials + +## Module System +The project uses Go modules with multiple sub-modules. +TODO: Describe specific strategies for managing modules, including any invoke +tasks. + +## Platform Support +- **Linux**: Full support (amd64, arm64) +- **Windows**: Full support (Server 2016+, Windows 10+) +- **macOS**: Supported +- **AIX**: No support in this codebase +- **Container**: Docker, Kubernetes, ECS, containerd, and more + +## Best Practices + +1. **Always run linters before committing**: `dda inv linter.go` +2. **Always test your changes**: `dda inv test --targets=` +3. **Follow Go conventions**: Use gofmt, follow project structure +4. **Update documentation**: Keep docs in sync with code changes +6. **Check for security implications**: Review security-sensitive changes carefully + +## Troubleshooting Development Issues + +### Common Build Issues +- **Missing tools**: Run `dda inv install-tools` +- **CMake errors**: Remove `dda inv rtloader.clean` + +### Testing Issues +- **Flaky tests**: Check `flakes.yaml` for known issues +- **Coverage issues**: Use `--coverage` flag + diff --git a/CLAUDE.md b/CLAUDE.md index a41daf630e23..4e006f51f5fb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,3 +1,2 @@ @AGENTS.md @CLAUDE_PERSONAL.md -@stateful_encoding_design.md \ No newline at end of file From 6f5d7989b048e1422017688a99d6cdd27d2dfb2e Mon Sep 17 00:00:00 2001 From: Joy Zhang Date: Thu, 4 Sep 2025 12:12:20 +0200 Subject: [PATCH 09/16] Initial implementation of stateful gRPC stream for Logs agent Hooks gRPC stream to Logs agent pipeline --- comp/logs/agent/config/config.go | 9 +- comp/logs/agent/config/config_keys.go | 14 + comp/logs/agent/config/config_test.go | 6 +- comp/logs/agent/config/endpoints.go | 21 +- comp/logs/agent/config/endpoints_test.go | 32 + pkg/config/setup/config.go | 4 + pkg/logs/message/message.go | 8 + pkg/logs/pipeline/pipeline.go | 18 +- pkg/logs/pipeline/provider.go | 5 +- pkg/logs/sender/grpc/batch_strategy.go | 238 +++++ pkg/logs/sender/grpc/batch_strategy_test.go | 469 +++++++++ pkg/logs/sender/grpc/inflight.go | 145 +++ pkg/logs/sender/grpc/inflight_test.go | 482 +++++++++ pkg/logs/sender/grpc/mock_encoder.go | 23 + pkg/logs/sender/grpc/mock_state.go | 75 ++ pkg/logs/sender/grpc/sender.go | 264 +++++ pkg/logs/sender/grpc/stateful_encoding.pb.go | 968 ++++++++++++++++++ pkg/logs/sender/grpc/stateful_encoding.proto | 107 ++ .../sender/grpc/stateful_encoding_grpc.pb.go | 115 +++ pkg/logs/sender/grpc/stream_worker.go | 680 ++++++++++++ pkg/logs/sender/grpc/stream_worker_test.go | 789 ++++++++++++++ pkg/logs/sender/grpc/streamstate_string.go | 27 + pkg/logs/sender/message_buffer.go | 9 +- 23 files changed, 4499 insertions(+), 9 deletions(-) create mode 100644 pkg/logs/sender/grpc/batch_strategy.go create mode 100644 pkg/logs/sender/grpc/batch_strategy_test.go create mode 100644 pkg/logs/sender/grpc/inflight.go create mode 100644 pkg/logs/sender/grpc/inflight_test.go create mode 100644 pkg/logs/sender/grpc/mock_encoder.go create mode 100644 pkg/logs/sender/grpc/mock_state.go create mode 100644 pkg/logs/sender/grpc/sender.go create mode 100644 pkg/logs/sender/grpc/stateful_encoding.pb.go create mode 100644 pkg/logs/sender/grpc/stateful_encoding.proto create mode 100644 pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go create mode 100644 pkg/logs/sender/grpc/stream_worker.go create mode 100644 pkg/logs/sender/grpc/stream_worker_test.go create mode 100644 pkg/logs/sender/grpc/streamstate_string.go diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go index 44d9c98a4303..69e932c3f49b 100644 --- a/comp/logs/agent/config/config.go +++ b/comp/logs/agent/config/config.go @@ -124,7 +124,7 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs if logsDDURL, defined := logsConfig.logsDDURL(); defined { haveHTTPProxy = strings.HasPrefix(logsDDURL, "http://") || strings.HasPrefix(logsDDURL, "https://") } - if logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { + if logsConfig.isGRPCUse() || logsConfig.isForceHTTPUse() || haveHTTPProxy || logsConfig.obsPipelineWorkerEnabled() || (bool(httpConnectivity) && !(logsConfig.isForceTCPUse() || logsConfig.isSocks5ProxySet() || logsConfig.hasAdditionalEndpoints())) { return BuildHTTPEndpointsWithConfig(coreConfig, logsConfig, endpointPrefix, intakeTrackType, intakeProtocol, intakeOrigin) } log.Warnf("You are currently sending Logs to Datadog through TCP (either because %s or %s is set or the HTTP connectivity test has failed) "+ @@ -373,7 +373,7 @@ func buildHTTPEndpoints(coreConfig pkgconfigmodel.Reader, logsConfig *LogsConfig batchMaxContentSize := logsConfig.batchMaxContentSize() inputChanSize := logsConfig.inputChanSize() - return NewEndpointsWithBatchSettings(main, additionals, false, true, batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil + return NewEndpointsWithBatchSettings(main, additionals, false, true, logsConfig.isGRPCUse(), batchWait, batchMaxConcurrentSend, batchMaxSize, batchMaxContentSize, inputChanSize), nil } type defaultParseAddressFunc func(string) (host string, port int, err error) @@ -447,6 +447,11 @@ func TaggerWarmupDuration(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).taggerWarmupDuration() } +// StreamLifetime returns the duration for gRPC stream lifetime before rotation. +func StreamLifetime(coreConfig pkgconfigmodel.Reader) time.Duration { + return defaultLogsConfigKeys(coreConfig).streamLifetime() +} + // AggregationTimeout is used when performing aggregation operations func AggregationTimeout(coreConfig pkgconfigmodel.Reader) time.Duration { return defaultLogsConfigKeys(coreConfig).aggregationTimeout() diff --git a/comp/logs/agent/config/config_keys.go b/comp/logs/agent/config/config_keys.go index bf6f9313c28c..110243d41068 100644 --- a/comp/logs/agent/config/config_keys.go +++ b/comp/logs/agent/config/config_keys.go @@ -101,6 +101,10 @@ func (l *LogsConfigKeys) isForceHTTPUse() bool { l.getConfig().GetBool(l.getConfigKey("force_use_http")) } +func (l *LogsConfigKeys) isGRPCUse() bool { + return l.getConfig().GetBool(l.getConfigKey("use_grpc")) +} + func (l *LogsConfigKeys) logsNoSSL() bool { return l.getConfig().GetBool(l.getConfigKey("logs_no_ssl")) } @@ -292,6 +296,16 @@ func (l *LogsConfigKeys) senderRecoveryReset() bool { return l.getConfig().GetBool(l.getConfigKey("sender_recovery_reset")) } +func (l *LogsConfigKeys) streamLifetime() time.Duration { + key := l.getConfigKey("stream_lifetime") + streamLifetime := l.getConfig().GetInt(key) + if streamLifetime <= 0 { + log.Warnf("Invalid %s: %v should be > 0, fallback on %v", key, streamLifetime, pkgconfigsetup.DefaultLogsStreamLifetime) + return time.Duration(pkgconfigsetup.DefaultLogsStreamLifetime) * time.Second + } + return time.Duration(streamLifetime) * time.Second +} + // AggregationTimeout is used when performing aggregation operations func (l *LogsConfigKeys) aggregationTimeout() time.Duration { return l.getConfig().GetDuration(l.getConfigKey("aggregation_timeout")) * time.Millisecond diff --git a/comp/logs/agent/config/config_test.go b/comp/logs/agent/config/config_test.go index ee9049f48d9a..1de098e78668 100644 --- a/comp/logs/agent/config/config_test.go +++ b/comp/logs/agent/config/config_test.go @@ -287,7 +287,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsEnvVar() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -414,7 +414,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) @@ -504,7 +504,7 @@ func (suite *ConfigTestSuite) TestMultipleHttpEndpointsInConfig2() { isReliable: true, } - expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) + expectedEndpoints := NewEndpointsWithBatchSettings(expectedMainEndpoint, []Endpoint{expectedAdditionalEndpoint1, expectedAdditionalEndpoint2}, false, true, false, 1*time.Second, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, pkgconfigsetup.DefaultBatchMaxContentSize, pkgconfigsetup.DefaultInputChanSize) endpoints, err := BuildHTTPEndpoints(suite.config, "test-track", "test-proto", "test-source") suite.Nil(err) diff --git a/comp/logs/agent/config/endpoints.go b/comp/logs/agent/config/endpoints.go index 6771f20d6d25..238c222c38ff 100644 --- a/comp/logs/agent/config/endpoints.go +++ b/comp/logs/agent/config/endpoints.go @@ -343,6 +343,7 @@ type Endpoints struct { Endpoints []Endpoint UseProto bool UseHTTP bool + UseGRPC bool BatchWait time.Duration BatchMaxConcurrentSend int BatchMaxSize int @@ -369,6 +370,23 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, additionalEndpoints, useProto, useHTTP, + false, // useGRPC defaults to false for backward compatibility + pkgconfigsetup.DefaultBatchWait, + pkgconfigsetup.DefaultBatchMaxConcurrentSend, + pkgconfigsetup.DefaultBatchMaxSize, + pkgconfigsetup.DefaultBatchMaxContentSize, + pkgconfigsetup.DefaultInputChanSize, + ) +} + +// NewEndpointsWithGRPC returns a new endpoints composite with gRPC support +func NewEndpointsWithGRPC(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool) *Endpoints { + return NewEndpointsWithBatchSettings( + main, + additionalEndpoints, + useProto, + useHTTP, + useGRPC, pkgconfigsetup.DefaultBatchWait, pkgconfigsetup.DefaultBatchMaxConcurrentSend, pkgconfigsetup.DefaultBatchMaxSize, @@ -378,12 +396,13 @@ func NewEndpoints(main Endpoint, additionalEndpoints []Endpoint, useProto bool, } // NewEndpointsWithBatchSettings returns a new endpoints composite with non-default batching settings specified -func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { +func NewEndpointsWithBatchSettings(main Endpoint, additionalEndpoints []Endpoint, useProto bool, useHTTP bool, useGRPC bool, batchWait time.Duration, batchMaxConcurrentSend int, batchMaxSize int, batchMaxContentSize int, inputChanSize int) *Endpoints { return &Endpoints{ Main: main, Endpoints: append([]Endpoint{main}, additionalEndpoints...), UseProto: useProto, UseHTTP: useHTTP, + UseGRPC: useGRPC, BatchWait: batchWait, BatchMaxConcurrentSend: batchMaxConcurrentSend, BatchMaxSize: batchMaxSize, diff --git a/comp/logs/agent/config/endpoints_test.go b/comp/logs/agent/config/endpoints_test.go index 831e7b52113c..cfdba0590321 100644 --- a/comp/logs/agent/config/endpoints_test.go +++ b/comp/logs/agent/config/endpoints_test.go @@ -135,6 +135,24 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPCon suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) } +func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidGRPCConfig() { + var endpoints *Endpoints + var endpoint Endpoint + var err error + + suite.config.SetWithoutSource("logs_config.use_grpc", true) + + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + suite.Equal(endpoints.BatchWait, 5*time.Second) + + endpoint = endpoints.Main + suite.True(endpoint.UseSSL()) + suite.Equal("agent-http-intake.logs.datadoghq.com.", endpoint.Host) +} + func (suite *EndpointsTestSuite) TestBuildEndpointsShouldSucceedWithValidHTTPConfigAndCompression() { var endpoints *Endpoints var endpoint Endpoint @@ -259,6 +277,7 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.force_use_tcp", "false") suite.config.SetWithoutSource("logs_config.use_http", "false") suite.config.SetWithoutSource("logs_config.force_use_http", "false") + suite.config.SetWithoutSource("logs_config.use_grpc", "false") suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{}) } @@ -329,6 +348,19 @@ func (suite *EndpointsTestSuite) TestBuildEndpointsShouldTakeIntoAccountHTTPConn suite.config.SetWithoutSource("logs_config.socks5_proxy_address", "") }) + suite.Run("When use_grpc is true always create gRPC endpoints", func() { + defer resetHTTPConfigValuesToFalse() + suite.config.SetWithoutSource("logs_config.use_grpc", "true") + endpoints, err := BuildEndpoints(suite.config, HTTPConnectivitySuccess, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + endpoints, err = BuildEndpoints(suite.config, HTTPConnectivityFailure, "test-track", "test-proto", "test-source") + suite.Nil(err) + suite.True(endpoints.UseGRPC) + suite.False(endpoints.UseHTTP) + }) + suite.Run("When additional_endpoints is not empty always create TCP endpoints", func() { defer resetHTTPConfigValuesToFalse() suite.config.SetWithoutSource("logs_config.additional_endpoints", []map[string]interface{}{ diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index bd96231222bd..835c4ee90a75 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -118,6 +118,9 @@ const ( // DefaultLogsSenderBackoffRecoveryInterval is the default logs sender backoff recovery interval DefaultLogsSenderBackoffRecoveryInterval = 2 + // DefaultLogsStreamLifetime is the default gRPC stream lifetime in seconds (15 minutes) + DefaultLogsStreamLifetime = 900 + // maxExternalMetricsProviderChunkSize ensures batch queries are limited in size. maxExternalMetricsProviderChunkSize = 35 @@ -2728,6 +2731,7 @@ func bindEnvAndSetLogsConfigKeys(config pkgconfigmodel.Setup, prefix string) { config.BindEnvAndSetDefault(prefix+"sender_backoff_max", DefaultLogsSenderBackoffMax) config.BindEnvAndSetDefault(prefix+"sender_recovery_interval", DefaultForwarderRecoveryInterval) config.BindEnvAndSetDefault(prefix+"sender_recovery_reset", false) + config.BindEnvAndSetDefault(prefix+"stream_lifetime", DefaultLogsStreamLifetime) config.BindEnvAndSetDefault(prefix+"use_v2_api", true) config.SetKnown(prefix + "dev_mode_no_ssl") //nolint:forbidigo // TODO: replace by 'SetDefaultAndBindEnv' } diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 4f91ef10ffc4..a8f090390aea 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -70,6 +70,14 @@ type Message struct { MessageMetadata } +// StatefulMessage represents a log message for gRPC stateful streaming +// It contains a Datum (from stateful_encoding.proto) and associated metadata +// Datum is stored as `any` to avoid import cycle with sender/grpc package +type StatefulMessage struct { + Datum any // Will hold *grpc.Datum + Metadata *MessageMetadata +} + // MessageMetadata contains metadata information about a log message // //nolint:revive // exported: ignore package name struct conflict diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go index 499209b0d313..0cbd05e143f8 100644 --- a/pkg/logs/pipeline/pipeline.go +++ b/pkg/logs/pipeline/pipeline.go @@ -20,6 +20,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/processor" "github.com/DataDog/datadog-agent/pkg/logs/sender" + grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc" compressioncommon "github.com/DataDog/datadog-agent/pkg/util/compression" ) @@ -54,6 +55,10 @@ func NewPipeline( } else { encoder = processor.JSONServerlessInitEncoder } + } else if endpoints.UseGRPC { + // Throwaway code to test with existing pipelines + // TODO change to real encoder once State component is ready + encoder = grpcsender.MockEncoder } else if endpoints.UseHTTP { encoder = processor.JSONEncoder } else if endpoints.UseProto { @@ -105,13 +110,24 @@ func getStrategy( compressor logscompression.Component, instanceID string, ) sender.Strategy { - if endpoints.UseHTTP || serverlessMeta.IsEnabled() { + if endpoints.UseGRPC || endpoints.UseHTTP || serverlessMeta.IsEnabled() { var encoder compressioncommon.Compressor encoder = compressor.NewCompressor(compressioncommon.NoneKind, 0) if endpoints.Main.UseCompression { encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) } + if endpoints.UseGRPC { + // Throwaway code to test with existing pipelines + // TODO: Remove this once we have a real State component + // The interface of stateful transport layer is input channel to the GRPCBatchStrategy + // The input type is StatefulMessage, which should be emitted by the State component + // Here is the temporary translation from Message to StatefulMessage + statefulInputChan := make(chan *message.StatefulMessage, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size")) + grpcsender.StartMessageTranslator(inputChan, statefulInputChan) + + return grpcsender.NewBatchStrategy(statefulInputChan, outputChan, flushChan, endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID) + } return sender.NewBatchStrategy( inputChan, outputChan, diff --git a/pkg/logs/pipeline/provider.go b/pkg/logs/pipeline/provider.go index 9737f8a5c007..ad8c3002b800 100644 --- a/pkg/logs/pipeline/provider.go +++ b/pkg/logs/pipeline/provider.go @@ -22,6 +22,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/sender" + grpcsender "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc" httpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/http" tcpsender "github.com/DataDog/datadog-agent/pkg/logs/sender/tcp" "github.com/DataDog/datadog-agent/pkg/logs/status/statusinterface" @@ -88,7 +89,9 @@ func NewProvider( var senderImpl sender.PipelineComponent serverlessMeta := sender.NewServerlessMeta(serverless) - if endpoints.UseHTTP { + if endpoints.UseGRPC { + senderImpl = grpcsender.NewSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext) + } else if endpoints.UseHTTP { senderImpl = httpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, serverlessMeta, legacyMode) } else { senderImpl = tcpSender(numberOfPipelines, cfg, sink, endpoints, destinationsContext, status, serverlessMeta, legacyMode) diff --git a/pkg/logs/sender/grpc/batch_strategy.go b/pkg/logs/sender/grpc/batch_strategy.go new file mode 100644 index 000000000000..57b3775ab754 --- /dev/null +++ b/pkg/logs/sender/grpc/batch_strategy.go @@ -0,0 +1,238 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//nolint:revive // TODO(AML) Fix revive linter +package grpc + +import ( + "time" + + "github.com/benbjohnson/clock" + "google.golang.org/protobuf/proto" + + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/telemetry" + "github.com/DataDog/datadog-agent/pkg/util/compression" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +var ( + tlmDroppedTooLarge = telemetry.NewCounter("logs_sender_grpc_batch_strategy", "dropped_too_large", []string{"pipeline"}, "Number of payloads dropped due to being too large") +) + +// batchStrategy contains batching logic for gRPC sender without serializer +// It collects Datum objects from StatefulMessages and creates Payload with serialized DatumSequence +// Note: Serverless logs are not supported in this PoC implementation +type batchStrategy struct { + inputChan chan *message.StatefulMessage + outputChan chan *message.Payload + flushChan chan struct{} + buffer *sender.MessageBuffer + pipelineName string + batchWait time.Duration + compression compression.Compressor + stopChan chan struct{} // closed when the goroutine has finished + clock clock.Clock + + // For gRPC: store Datums separately since MessageBuffer only stores metadata + grpcDatums []*Datum + + // Telemetry + pipelineMonitor metrics.PipelineMonitor + utilization metrics.UtilizationMonitor + instanceID string +} + +// NewBatchStrategy returns a new gRPC batch strategy +func NewBatchStrategy(inputChan chan *message.StatefulMessage, + outputChan chan *message.Payload, + flushChan chan struct{}, + batchWait time.Duration, + maxBatchSize int, + maxContentSize int, + pipelineName string, + compression compression.Compressor, + pipelineMonitor metrics.PipelineMonitor, + instanceID string, +) sender.Strategy { + return newBatchStrategyWithClock(inputChan, outputChan, flushChan, batchWait, maxBatchSize, maxContentSize, pipelineName, clock.New(), compression, pipelineMonitor, instanceID) +} + +func newBatchStrategyWithClock(inputChan chan *message.StatefulMessage, + outputChan chan *message.Payload, + flushChan chan struct{}, + batchWait time.Duration, + maxBatchSize int, + maxContentSize int, + pipelineName string, + clock clock.Clock, + compression compression.Compressor, + pipelineMonitor metrics.PipelineMonitor, + instanceID string, +) sender.Strategy { + + return &batchStrategy{ + inputChan: inputChan, + outputChan: outputChan, + flushChan: flushChan, + buffer: sender.NewMessageBuffer(maxBatchSize, maxContentSize), + batchWait: batchWait, + compression: compression, + stopChan: make(chan struct{}), + pipelineName: pipelineName, + clock: clock, + grpcDatums: make([]*Datum, 0), + pipelineMonitor: pipelineMonitor, + utilization: pipelineMonitor.MakeUtilizationMonitor(metrics.StrategyTlmName, instanceID), + instanceID: instanceID, + } +} + +// Mostly copy/pasted from sender/bactch_strategy.go +func (s *batchStrategy) Stop() { + close(s.inputChan) + <-s.stopChan +} + +// Mostly copy/pasted from sender/bactch_strategy.go +func (s *batchStrategy) Start() { + go func() { + flushTicker := s.clock.Ticker(s.batchWait) + defer func() { + s.flushBuffer(s.outputChan) + flushTicker.Stop() + close(s.stopChan) + }() + for { + select { + case m, isOpen := <-s.inputChan: + if !isOpen { + // inputChan has been closed, no more payloads are expected + return + } + s.processMessage(m, s.outputChan) + case <-flushTicker.C: + // flush the payloads at a regular interval so pending messages don't wait here for too long. + s.flushBuffer(s.outputChan) + case <-s.flushChan: + // flush payloads on demand, used for infrequently running serverless functions + s.flushBuffer(s.outputChan) + } + } + }() +} + +func (s *batchStrategy) addMessage(m *message.StatefulMessage) (bool, error) { + // No utilization tracking here - just trivial slice operations + // Real work (proto marshaling) is tracked in sendMessagesWithDatums() + + // Validate Datum first + if m.Datum == nil { + return false, log.Errorf("StatefulMessage has nil Datum") + } + datum, ok := m.Datum.(*Datum) + if !ok { + return false, log.Errorf("StatefulMessage Datum has wrong type: %T", m.Datum) + } + + // Try to add to buffer + if s.buffer.AddMessageWithSize(m.Metadata, m.Metadata.RawDataLen) { + s.grpcDatums = append(s.grpcDatums, datum) + return true, nil + } + + // Buffer full (not an error) + return false, nil +} + +// Mostly copy/pasted from batch.go +func (s *batchStrategy) processMessage(m *message.StatefulMessage, outputChan chan *message.Payload) { + // Track latency stats from metadata + if m.Metadata.Origin != nil { + m.Metadata.Origin.LogSource.LatencyStats.Add(m.Metadata.GetLatency()) + } + + added, err := s.addMessage(m) + if err != nil { + log.Warnf("Invalid message in pipeline=%s: %v - dropping", s.pipelineName, err) + return + } + if !added || s.buffer.IsFull() { + s.flushBuffer(outputChan) + } + if !added { + // it's possible that the m could not be added because the buffer was full + // so we need to retry once again + added, err = s.addMessage(m) + if err != nil { + log.Warnf("Invalid message in pipeline=%s: %v - dropping", s.pipelineName, err) + return + } + if !added { + log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, m.Metadata.RawDataLen, s.buffer.ContentSizeLimit()) + tlmDroppedTooLarge.Inc(s.pipelineName) + } + } +} + +// flushBuffer sends all the messages that are stored in the buffer and forwards them +// to the next stage of the pipeline. +func (s *batchStrategy) flushBuffer(outputChan chan *message.Payload) { + if s.buffer.IsEmpty() { + return + } + + s.utilization.Start() + + messagesMetadata := s.buffer.GetMessages() + s.buffer.Clear() + + // Use the collected Datums and clear them + grpcDatums := s.grpcDatums + s.grpcDatums = make([]*Datum, 0) + + s.sendMessagesWithDatums(messagesMetadata, grpcDatums, outputChan) +} + +func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.MessageMetadata, grpcDatums []*Datum, outputChan chan *message.Payload) { + defer s.utilization.Stop() + + unencodedSize := 0 + for _, msgMeta := range messagesMetadata { + unencodedSize += msgMeta.RawDataLen + } + + // Create DatumSequence and marshal to bytes + datumSeq := &DatumSequence{ + Data: grpcDatums, + } + + serialized, err := proto.Marshal(datumSeq) + if err != nil { + log.Errorf("Failed to marshal DatumSequence: %v", err) + return + } + + // Compress the serialized protobuf data + compressed, err := s.compression.Compress(serialized) + if err != nil { + log.Errorf("Failed to compress DatumSequence: %v", err) + return + } + + // Create payload with compressed data + p := &message.Payload{ + MessageMetas: messagesMetadata, + Encoded: compressed, + Encoding: s.compression.ContentEncoding(), + UnencodedSize: unencodedSize, + } + + outputChan <- p + s.pipelineMonitor.ReportComponentEgress(p, metrics.StrategyTlmName, s.instanceID) + s.pipelineMonitor.ReportComponentIngress(p, metrics.SenderTlmName, metrics.SenderTlmInstanceID) +} diff --git a/pkg/logs/sender/grpc/batch_strategy_test.go b/pkg/logs/sender/grpc/batch_strategy_test.go new file mode 100644 index 000000000000..54ab52745cc4 --- /dev/null +++ b/pkg/logs/sender/grpc/batch_strategy_test.go @@ -0,0 +1,469 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package grpc + +import ( + "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/proto" + + compressionfx "github.com/DataDog/datadog-agent/comp/serializer/logscompression/fx-mock" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/util/compression" +) + +// Helper to create test StatefulMessage with Datum +func createTestStatefulMessage(content string) *message.StatefulMessage { + msg := message.NewMessage([]byte(content), nil, "", 0) + msg.MessageMetadata.RawDataLen = len(content) + + datum := &Datum{ + Data: &Datum_Logs{ + Logs: &Log{ + Timestamp: 12345, + Content: &Log_Raw{ + Raw: content, + }, + }, + }, + } + + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: datum, + } +} + +func TestBatchStrategySendsPayloadWhenBufferIsFull(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + s := NewBatchStrategy( + input, + output, + flushChan, + 100*time.Millisecond, + 2, // maxBatchSize + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message1 := createTestStatefulMessage("a") + input <- message1 + + message2 := createTestStatefulMessage("b") + input <- message2 + + // Expect payload to be sent because buffer is full + payload := <-output + assert.Equal(t, 2, len(payload.MessageMetas)) + assert.Equal(t, message1.Metadata, payload.MessageMetas[0]) + assert.Equal(t, message2.Metadata, payload.MessageMetas[1]) + assert.Equal(t, "identity", payload.Encoding) + assert.Equal(t, 2, payload.UnencodedSize) + + // Verify the payload contains valid DatumSequence + var datumSeq DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 2, len(datumSeq.Data)) + assert.Equal(t, "a", datumSeq.Data[0].GetLogs().GetRaw()) + assert.Equal(t, "b", datumSeq.Data[1].GetLogs().GetRaw()) + + s.Stop() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategySendsPayloadWhenBufferIsOutdated(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + timerInterval := 100 * time.Millisecond + + clk := clock.NewMock() + s := newBatchStrategyWithClock( + input, + output, + flushChan, + timerInterval, + 100, // maxBatchSize + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + for round := 0; round < 3; round++ { + m := createTestStatefulMessage("test") + input <- m + + // It should flush in this time + clk.Add(2 * timerInterval) + + payload := <-output + assert.EqualValues(t, m.Metadata, payload.MessageMetas[0]) + + // Verify payload contains valid DatumSequence + var datumSeq DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 1, len(datumSeq.Data)) + } + + s.Stop() + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategySendsPayloadWhenClosingInput(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + clk := clock.NewMock() + s := newBatchStrategyWithClock( + input, + output, + flushChan, + 100*time.Millisecond, + 2, + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message := createTestStatefulMessage("test") + input <- message + + go func() { + s.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + // Expect payload to be sent before timer, so we never advance the clock; if this + // doesn't work, the test will hang + payload := <-output + assert.Equal(t, message.Metadata, payload.MessageMetas[0]) +} + +func TestBatchStrategyShouldNotBlockWhenStoppingGracefully(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + s := NewBatchStrategy( + input, + output, + flushChan, + 100*time.Millisecond, + 2, + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + s.Start() + + message := createTestStatefulMessage("test") + input <- message + + go func() { + s.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + payload := <-output + assert.Equal(t, message.Metadata, payload.MessageMetas[0]) +} + +func TestBatchStrategySynchronousFlush(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + // Batch size is large so it will not flush until we trigger it manually + // Flush time is large so it won't automatically trigger during this test + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // All of these messages will get buffered + messages := []*message.StatefulMessage{ + createTestStatefulMessage("a"), + createTestStatefulMessage("b"), + createTestStatefulMessage("c"), + } + + messageMeta := make([]*message.MessageMetadata, len(messages)) + for idx, m := range messages { + input <- m + messageMeta[idx] = m.Metadata + } + + // Since the batch size is large there should be nothing on the output yet + select { + case <-output: + assert.Fail(t, "there should be nothing on the output channel yet") + default: + } + + go func() { + // Stop triggers the flush and make sure we can read the messages out now + strategy.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } + + payload := <-output + assert.ElementsMatch(t, messageMeta, payload.MessageMetas) + + select { + case <-output: + assert.Fail(t, "the output channel should still be empty") + default: + } +} + +func TestBatchStrategyFlushChannel(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload) + flushChan := make(chan struct{}) + + // Batch size is large so it will not flush until we trigger it manually + // Flush time is large so it won't automatically trigger during this test + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // All of these messages will get buffered + messages := []*message.StatefulMessage{ + createTestStatefulMessage("a"), + createTestStatefulMessage("b"), + createTestStatefulMessage("c"), + } + messageMeta := make([]*message.MessageMetadata, len(messages)) + for idx, m := range messages { + input <- m + messageMeta[idx] = m.Metadata + } + + // Since the batch size is large there should be nothing on the output yet + select { + case <-output: + assert.Fail(t, "there should be nothing on the output channel yet") + default: + } + + // Trigger a manual flush + flushChan <- struct{}{} + + payload := <-output + assert.ElementsMatch(t, messageMeta, payload.MessageMetas) + + // Ensure we read all of the messages + select { + case <-output: + assert.Fail(t, "the output channel should still be empty") + default: + } + + // End the test strategy + go func() { + // Stop triggers the flush and make sure we can read the messages out now + strategy.Stop() + }() + + if _, isOpen := <-input; isOpen { + assert.Fail(t, "input should be closed") + } +} + +func TestBatchStrategyMessageTooLarge(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10, // Small content size limit + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send a message that fits + normalMessage := createTestStatefulMessage("small") + input <- normalMessage + + // Send a message that's too large (will be dropped) + largeMessage := createTestStatefulMessage("this message is way too large for the content size limit") + input <- largeMessage + + // Trigger flush + flushChan <- struct{}{} + + // Should only receive the normal message + payload := <-output + assert.Equal(t, 1, len(payload.MessageMetas)) + assert.Equal(t, normalMessage.Metadata, payload.MessageMetas[0]) + + // Verify no more payloads + select { + case <-output: + assert.Fail(t, "should not receive more payloads") + default: + } + + strategy.Stop() +} + +func TestBatchStrategyInvalidDatum(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 1000, + "test", + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send message with nil Datum + msg1 := message.NewMessage([]byte("test"), nil, "", 0) + invalidMsg1 := &message.StatefulMessage{ + Metadata: &msg1.MessageMetadata, + Datum: nil, + } + input <- invalidMsg1 + + // Send message with wrong Datum type + msg2 := message.NewMessage([]byte("test"), nil, "", 0) + invalidMsg2 := &message.StatefulMessage{ + Metadata: &msg2.MessageMetadata, + Datum: "wrong type", + } + input <- invalidMsg2 + + // Send a valid message + validMsg := createTestStatefulMessage("valid") + input <- validMsg + + // Trigger flush + flushChan <- struct{}{} + + // Should only receive the valid message + payload := <-output + assert.Equal(t, 1, len(payload.MessageMetas)) + assert.Equal(t, validMsg.Metadata, payload.MessageMetas[0]) + + strategy.Stop() +} + +func TestBatchStrategyCompression(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent deadlock + flushChan := make(chan struct{}) + + // Use identity (no-op) compression for simplicity + // Testing actual compression behavior is covered by the compression package tests + compressor := compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1) + + strategy := NewBatchStrategy( + input, + output, + flushChan, + time.Hour, + 100, + 10000, + "test", + compressor, + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Send several messages + for i := 0; i < 5; i++ { + msg := createTestStatefulMessage("test message") + input <- msg + } + + // Trigger flush + flushChan <- struct{}{} + + payload := <-output + assert.Equal(t, 5, len(payload.MessageMetas)) + assert.Equal(t, "identity", payload.Encoding) + assert.NotEmpty(t, payload.Encoded) + + // Verify the payload contains valid DatumSequence (identity compression = no compression) + var datumSeq DatumSequence + err := proto.Unmarshal(payload.Encoded, &datumSeq) + require.NoError(t, err) + assert.Equal(t, 5, len(datumSeq.Data)) + for _, datum := range datumSeq.Data { + assert.Equal(t, "test message", datum.GetLogs().GetRaw()) + } + + strategy.Stop() +} diff --git a/pkg/logs/sender/grpc/inflight.go b/pkg/logs/sender/grpc/inflight.go new file mode 100644 index 000000000000..fc79deaa892e --- /dev/null +++ b/pkg/logs/sender/grpc/inflight.go @@ -0,0 +1,145 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "github.com/DataDog/datadog-agent/pkg/logs/message" +) + +// inflightTracker is a bounded FIFO queue that tracks payloads in two regions: +// 1. Sent but awaiting acknowledgment (head to sentTail) +// 2. Buffered but not yet sent to the network (sentTail to tail) +// +// Queue Layout: +// [--sent awaiting ack--][--buffered not sent--] +// ^ ^ ^ +// head sentTail tail +// +// BatchID tracking: +// - Sent payloads have sequential batchIDs: [headBatchID, headBatchID+1, ..., headBatchID+sentSize-1] +// - Only tracks headBatchID (oldest sent) and nextBatchID (next to be assigned) +type inflightTracker struct { + items []*message.Payload + head int // Index of the oldest sent item (awaiting ack) + sentTail int // Index of the first buffered item that's not yet sent + tail int // Index of the next available slot for new buffered items + cap int // Maximum total capacity of the tracker + headBatchID uint32 // BatchID of the oldest sent payload (at head) + batchIDCounter uint32 // Next batchID to be assigned when markSent is called +} + +// newInflightTracker creates a new bounded inflight tracker with the given capacity +// Allocates capacity+1 slots to implement the "waste one slot" ring buffer pattern +func newInflightTracker(capacity int) *inflightTracker { + return &inflightTracker{ + items: make([]*message.Payload, capacity+1), + cap: capacity, + } +} + +// hasSpace returns true if there is at least one free slot +func (t *inflightTracker) hasSpace() bool { + return t.totalCount() < t.cap +} + +// append adds a new payload to the buffered region (not yet sent) +// Returns true if the payload was added, false if the tracker is full +func (t *inflightTracker) append(payload *message.Payload) bool { + if !t.hasSpace() { + return false + } + t.items[t.tail] = payload + t.tail = (t.tail + 1) % len(t.items) + return true +} + +// pop removes and returns the oldest sent payload (at head) after receiving an ack +// Returns nil if there are no sent payloads +func (t *inflightTracker) pop() *message.Payload { + if t.head == t.sentTail { + return nil + } + payload := t.items[t.head] + t.items[t.head] = nil // Allow GC + t.head = (t.head + 1) % len(t.items) + + // Advance headBatchID for the next payload + if t.head != t.sentTail { + t.headBatchID++ + } + + return payload +} + +// hasUnacked returns true if there are sent payloads awaiting acknowledgment +func (t *inflightTracker) hasUnacked() bool { + return t.head != t.sentTail +} + +// hasUnSent returns true if there are buffered payloads not yet sent +func (t *inflightTracker) hasUnSent() bool { + return t.sentTail != t.tail +} + +// getHeadBatchID returns the expected batchID at the head (oldest sent payload) +// Caller must check hasUnacked() first to ensure there are sent payloads +func (t *inflightTracker) getHeadBatchID() uint32 { + return t.headBatchID +} + +// nextBatchID returns the batchID that will be assigned to the next sent item +// This is a peek operation (idempotent, no mutation) +func (t *inflightTracker) nextBatchID() uint32 { + return t.batchIDCounter +} + +// markSent moves a buffered payload to the sent region and assigns it a batchID +// Returns true if successful, false if there are no buffered payloads +func (t *inflightTracker) markSent() bool { + if t.sentTail == t.tail { + return false + } + + // If this is the first sent item, set headBatchID + if t.head == t.sentTail { + t.headBatchID = t.batchIDCounter + } + + t.sentTail = (t.sentTail + 1) % len(t.items) + t.batchIDCounter++ // Increment counter for next batch + return true +} + +// nextToSend returns the next buffered payload ready to be sent (without removing it) +// Returns nil if there are no buffered payloads +func (t *inflightTracker) nextToSend() *message.Payload { + if t.sentTail == t.tail { + return nil + } + return t.items[t.sentTail] +} + +// sentCount returns the number of sent payloads awaiting ack +func (t *inflightTracker) sentCount() int { + return (t.sentTail - t.head + len(t.items)) % len(t.items) +} + +// totalCount returns the total number of tracked payloads +func (t *inflightTracker) totalCount() int { + return (t.tail - t.head + len(t.items)) % len(t.items) +} + +// resetOnRotation set any un-acked payload as un-sent and reset the batchID. +func (t *inflightTracker) resetOnRotation() { + // Move all sent items back to buffered region by resetting sentTail to head + // This makes all items [head, tail) buffered again + t.sentTail = t.head + + // Reset batchID counter for the new stream + // Make the first batchID be 1, 0 is reserved for the snapshot state + t.headBatchID = 1 + t.batchIDCounter = 1 +} diff --git a/pkg/logs/sender/grpc/inflight_test.go b/pkg/logs/sender/grpc/inflight_test.go new file mode 100644 index 000000000000..653db10b44c7 --- /dev/null +++ b/pkg/logs/sender/grpc/inflight_test.go @@ -0,0 +1,482 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/message" +) + +// Helper function to create test payloads +func createTestPayload(content string) *message.Payload { + return &message.Payload{ + Encoded: []byte(content), + } +} + +func TestNewInflightTracker(t *testing.T) { + tracker := newInflightTracker(10) + + assert.NotNil(t, tracker) + assert.Equal(t, 10, tracker.cap) + assert.Equal(t, 0, tracker.head) + assert.Equal(t, 0, tracker.sentTail) + assert.Equal(t, 0, tracker.tail) + assert.Equal(t, uint32(0), tracker.headBatchID) + assert.Equal(t, uint32(0), tracker.batchIDCounter) + assert.True(t, tracker.hasSpace()) + assert.False(t, tracker.hasUnacked()) + assert.False(t, tracker.hasUnSent()) +} + +func TestInflightTrackerAppend(t *testing.T) { + tracker := newInflightTracker(10) + + // Append first payload + payload1 := createTestPayload("test1") + assert.True(t, tracker.append(payload1)) + assert.Equal(t, 1, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Append second payload + payload2 := createTestPayload("test2") + assert.True(t, tracker.append(payload2)) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + // Append third payload + payload3 := createTestPayload("test3") + assert.True(t, tracker.append(payload3)) + assert.Equal(t, 3, tracker.totalCount()) +} + +func TestInflightTrackerAppendWhenFull(t *testing.T) { + // Test filling buffer to absolute capacity from empty state + tracker := newInflightTracker(3) + + // Fill to capacity (3 items) + assert.True(t, tracker.append(createTestPayload("test1"))) + assert.Equal(t, 1, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + assert.True(t, tracker.append(createTestPayload("test2"))) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + assert.True(t, tracker.append(createTestPayload("test3"))) + assert.Equal(t, 3, tracker.totalCount()) + assert.False(t, tracker.hasSpace()) + + // Append should fail when full + assert.False(t, tracker.append(createTestPayload("test4"))) + assert.Equal(t, 3, tracker.totalCount()) +} + +func TestInflightTrackerMarkSent(t *testing.T) { + tracker := newInflightTracker(5) + + // Add buffered payloads + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + + assert.Equal(t, 0, tracker.sentCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Mark first as sent + assert.True(t, tracker.markSent()) + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + assert.True(t, tracker.hasUnacked()) + assert.True(t, tracker.hasUnSent()) + + // Mark second as sent + assert.True(t, tracker.markSent()) + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + assert.True(t, tracker.hasUnacked()) + assert.False(t, tracker.hasUnSent()) + + // Try to mark sent when no buffered items + assert.False(t, tracker.markSent()) +} + +func TestInflightTrackerPop(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and mark payloads as sent + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + tracker.markSent() + tracker.markSent() + + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + + // Pop first payload + popped1 := tracker.pop() + assert.Equal(t, payload1, popped1) + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.True(t, tracker.hasUnacked()) + + // Pop second payload + popped2 := tracker.pop() + assert.Equal(t, payload2, popped2) + assert.Equal(t, 0, tracker.sentCount()) + assert.False(t, tracker.hasUnacked()) + + // Pop when empty should return nil + poppedNil := tracker.pop() + assert.Nil(t, poppedNil) +} + +func TestInflightTrackerNextToSend(t *testing.T) { + tracker := newInflightTracker(5) + + // NextToSend on empty tracker should return nil + assert.Nil(t, tracker.nextToSend()) + + // Add buffered payloads + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + tracker.append(payload1) + tracker.append(payload2) + + // NextToSend should return first buffered payload + next := tracker.nextToSend() + assert.Equal(t, payload1, next) + + // Mark first as sent + tracker.markSent() + + // NextToSend should return second buffered payload + next = tracker.nextToSend() + assert.Equal(t, payload2, next) + + // Mark second as sent + tracker.markSent() + + // NextToSend should return nil when no buffered payloads + next = tracker.nextToSend() + assert.Nil(t, next) +} + +func TestInflightTrackerBatchIDSequence(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and send payloads + for i := 0; i < 3; i++ { + payload := createTestPayload("test") + tracker.append(payload) + } + + // Initial batchIDCounter should be 0 + assert.Equal(t, uint32(0), tracker.nextBatchID()) + + // Mark first as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + + // Mark second as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + // Mark third as sent + tracker.markSent() + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) + + // Pop first - headBatchID should advance + tracker.pop() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + + // Pop second - headBatchID should advance + tracker.pop() + assert.Equal(t, uint32(2), tracker.getHeadBatchID()) +} + +func TestInflightTrackerResetOnRotation(t *testing.T) { + tracker := newInflightTracker(5) + + // Add payloads and mark some as sent + for i := 0; i < 3; i++ { + payload := createTestPayload("test") + tracker.append(payload) + tracker.markSent() + } + + // Pop one ack + tracker.pop() + + // State before reset: 2 sent (awaiting ack), 0 buffered + assert.Equal(t, 2, tracker.sentCount()) + assert.Equal(t, 0, tracker.totalCount()-tracker.sentCount()) + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) + + // Reset on rotation + tracker.resetOnRotation() + + // After reset: 0 sent, 2 buffered (un-acked payloads become buffered) + assert.Equal(t, 0, tracker.sentCount()) + assert.Equal(t, 2, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Batch IDs should reset to 1 + assert.Equal(t, uint32(1), tracker.headBatchID) + assert.Equal(t, uint32(1), tracker.nextBatchID()) +} + +func TestInflightTrackerWrapAround(t *testing.T) { + // Test wrap-around behavior without filling to absolute capacity + tracker := newInflightTracker(6) + + // Fill and empty to advance head pointer + payload1 := createTestPayload("test1") + payload2 := createTestPayload("test2") + + // Add, send, and ack first two to advance pointers + tracker.append(payload1) + tracker.markSent() + tracker.pop() + + tracker.append(payload2) + tracker.markSent() + tracker.pop() + + // Now add more items that will wrap around in the ring buffer + payload3 := createTestPayload("test3") + payload4 := createTestPayload("test4") + payload5 := createTestPayload("test5") + + assert.True(t, tracker.append(payload3)) + assert.True(t, tracker.append(payload4)) + assert.True(t, tracker.append(payload5)) + + assert.Equal(t, 3, tracker.totalCount()) + assert.True(t, tracker.hasSpace()) + + // Mark all as sent and pop them + tracker.markSent() + tracker.markSent() + tracker.markSent() + + popped3 := tracker.pop() + popped4 := tracker.pop() + popped5 := tracker.pop() + + assert.Equal(t, payload3, popped3) + assert.Equal(t, payload4, popped4) + assert.Equal(t, payload5, popped5) + assert.Equal(t, 0, tracker.totalCount()) +} + +func TestInflightTrackerSentCount(t *testing.T) { + tracker := newInflightTracker(5) + + // Initially no sent items + assert.Equal(t, 0, tracker.sentCount()) + + // Add buffered payloads + tracker.append(createTestPayload("test1")) + tracker.append(createTestPayload("test2")) + tracker.append(createTestPayload("test3")) + + assert.Equal(t, 0, tracker.sentCount()) + + // Mark as sent + tracker.markSent() + assert.Equal(t, 1, tracker.sentCount()) + + tracker.markSent() + assert.Equal(t, 2, tracker.sentCount()) + + // Pop one + tracker.pop() + assert.Equal(t, 1, tracker.sentCount()) + + // Mark another as sent + tracker.markSent() + assert.Equal(t, 2, tracker.sentCount()) +} + +func TestInflightTrackerTotalCount(t *testing.T) { + tracker := newInflightTracker(5) + + // Initially empty + assert.Equal(t, 0, tracker.totalCount()) + + // Add buffered payloads + tracker.append(createTestPayload("test1")) + assert.Equal(t, 1, tracker.totalCount()) + + tracker.append(createTestPayload("test2")) + assert.Equal(t, 2, tracker.totalCount()) + + // Mark both as sent (doesn't change total count) + tracker.markSent() + tracker.markSent() + assert.Equal(t, 2, tracker.totalCount()) + + // Pop reduces total count + tracker.pop() + assert.Equal(t, 1, tracker.totalCount()) + + tracker.pop() + assert.Equal(t, 0, tracker.totalCount()) +} + +func TestInflightTrackerHasSpace(t *testing.T) { + tracker := newInflightTracker(10) + + // Initially has space + assert.True(t, tracker.hasSpace()) + + // Add several items + for i := 0; i < 5; i++ { + tracker.append(createTestPayload("test")) + } + assert.True(t, tracker.hasSpace()) + + // Pop one to verify space tracking + tracker.markSent() + tracker.pop() + assert.True(t, tracker.hasSpace()) +} + +func TestInflightTrackerMixedOperations(t *testing.T) { + // Test a realistic sequence of operations + tracker := newInflightTracker(5) + + // Add 3 buffered payloads + p1 := createTestPayload("msg1") + p2 := createTestPayload("msg2") + p3 := createTestPayload("msg3") + + tracker.append(p1) + tracker.append(p2) + tracker.append(p3) + + assert.Equal(t, 3, tracker.totalCount()) + assert.Equal(t, 0, tracker.sentCount()) + + // Send first 2 + tracker.markSent() + tracker.markSent() + + assert.Equal(t, 3, tracker.totalCount()) + assert.Equal(t, 2, tracker.sentCount()) + assert.True(t, tracker.hasUnacked()) + assert.True(t, tracker.hasUnSent()) + + // Receive ack for first + popped := tracker.pop() + assert.Equal(t, p1, popped) + assert.Equal(t, 2, tracker.totalCount()) + assert.Equal(t, 1, tracker.sentCount()) + + // Add more payloads + p4 := createTestPayload("msg4") + p5 := createTestPayload("msg5") + tracker.append(p4) + tracker.append(p5) + + assert.Equal(t, 4, tracker.totalCount()) + assert.Equal(t, 1, tracker.sentCount()) + + // Send remaining buffered + tracker.markSent() // p3 + tracker.markSent() // p4 + tracker.markSent() // p5 + + assert.Equal(t, 4, tracker.totalCount()) + assert.Equal(t, 4, tracker.sentCount()) + assert.False(t, tracker.hasUnSent()) + + // Receive all remaining acks + assert.Equal(t, p2, tracker.pop()) + assert.Equal(t, p3, tracker.pop()) + assert.Equal(t, p4, tracker.pop()) + assert.Equal(t, p5, tracker.pop()) + + assert.Equal(t, 0, tracker.totalCount()) + assert.False(t, tracker.hasUnacked()) +} + +func TestInflightTrackerResetOnRotationWithBuffered(t *testing.T) { + tracker := newInflightTracker(5) + + // Mix of sent and buffered payloads + tracker.append(createTestPayload("msg1")) + tracker.append(createTestPayload("msg2")) + tracker.append(createTestPayload("msg3")) + tracker.append(createTestPayload("msg4")) + + // Send first two + tracker.markSent() + tracker.markSent() + + // Ack first one + tracker.pop() + + // State: 1 sent, 2 buffered, total 3 + assert.Equal(t, 1, tracker.sentCount()) + assert.Equal(t, 3, tracker.totalCount()) + + // Reset on rotation + tracker.resetOnRotation() + + // All items should be buffered now + assert.Equal(t, 0, tracker.sentCount()) + assert.Equal(t, 3, tracker.totalCount()) + assert.True(t, tracker.hasUnSent()) + assert.False(t, tracker.hasUnacked()) + + // Batch IDs reset + assert.Equal(t, uint32(1), tracker.nextBatchID()) +} + +func TestInflightTrackerBatchIDAfterRotation(t *testing.T) { + tracker := newInflightTracker(5) + + // Add and send some payloads + tracker.append(createTestPayload("msg1")) + tracker.append(createTestPayload("msg2")) + tracker.markSent() + tracker.markSent() + + assert.Equal(t, uint32(0), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + // Reset on rotation + tracker.resetOnRotation() + + // Batch IDs should reset to 1 (0 is reserved for snapshot) + assert.Equal(t, uint32(1), tracker.nextBatchID()) + + // Send items with new batch IDs + tracker.markSent() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(2), tracker.nextBatchID()) + + tracker.markSent() + assert.Equal(t, uint32(1), tracker.getHeadBatchID()) + assert.Equal(t, uint32(3), tracker.nextBatchID()) +} diff --git a/pkg/logs/sender/grpc/mock_encoder.go b/pkg/logs/sender/grpc/mock_encoder.go new file mode 100644 index 000000000000..00e97fa58490 --- /dev/null +++ b/pkg/logs/sender/grpc/mock_encoder.go @@ -0,0 +1,23 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/processor" +) + +// MockEncoder is a no-op encoder for gRPC stateful streaming. +// This is temporary scaffolding until the real State component is ready. +// Encoding happens in StartMessageTranslator instead of the processor. +var MockEncoder processor.Encoder = &mockEncoder{} + +type mockEncoder struct{} + +// Encode is a no-op implementation that satisfies the processor.Encoder interface +func (g *mockEncoder) Encode(_ *message.Message, _ string) error { + return nil +} diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go new file mode 100644 index 000000000000..a21a5a90cb04 --- /dev/null +++ b/pkg/logs/sender/grpc/mock_state.go @@ -0,0 +1,75 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "strings" + "time" + "unicode/utf8" + + "github.com/DataDog/datadog-agent/pkg/logs/message" +) + +const nanoToMillis = 1000000 + +// StartMessageTranslator starts a goroutine that translates message.Message to message.StatefulMessage +// This is temporary scaffolding until the real State component is ready. +func StartMessageTranslator(inputChan chan *message.Message, outputChan chan *message.StatefulMessage) { + go func() { + defer close(outputChan) + + for msg := range inputChan { + // Get timestamp - prefer message timestamp if available + ts := time.Now().UTC() + if !msg.ServerlessExtra.Timestamp.IsZero() { + ts = msg.ServerlessExtra.Timestamp + } + + // Create the Log message using stateful_encoding.proto definitions + log := &Log{ + Timestamp: uint64(ts.UnixNano() / nanoToMillis), + Content: &Log_Raw{ + Raw: toValidUtf8(msg.GetContent()), + }, + } + + // Wrap the Log in a Datum + datum := &Datum{ + Data: &Datum_Logs{ + Logs: log, + }, + } + + // Create StatefulMessage with the Datum and metadata + statefulMsg := &message.StatefulMessage{ + Datum: datum, + Metadata: &msg.MessageMetadata, + } + + outputChan <- statefulMsg + } + }() +} + +// toValidUtf8 ensures all characters are UTF-8 +func toValidUtf8(data []byte) string { + if utf8.Valid(data) { + return string(data) + } + + var str strings.Builder + str.Grow(len(data)) + + for len(data) > 0 { + r, size := utf8.DecodeRune(data) + // in case of invalid utf-8, DecodeRune returns (utf8.RuneError, 1) + // and since RuneError is the same as unicode.ReplacementChar + // no need to handle the error explicitly + str.WriteRune(r) + data = data[size:] + } + return str.String() +} diff --git a/pkg/logs/sender/grpc/sender.go b/pkg/logs/sender/grpc/sender.go new file mode 100644 index 000000000000..5231e8e7a939 --- /dev/null +++ b/pkg/logs/sender/grpc/sender.go @@ -0,0 +1,264 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package grpc implements gRPC-based log sender +package grpc + +import ( + "context" + "crypto/tls" + "fmt" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/version" + + "go.uber.org/atomic" +) + +const ( + // inputChanBufferSize is the buffer size for worker input channels - may become configurable + inputChanBufferSize = 100 +) + +// headerCredentials implements credentials.PerRPCCredentials to add headers to RPC calls +type headerCredentials struct { + endpoint config.Endpoint +} + +// GetRequestMetadata adds required headers to each RPC call +func (h *headerCredentials) GetRequestMetadata(_ context.Context, _ ...string) (map[string]string, error) { + headers := map[string]string{ + "dd-api-key": h.endpoint.GetAPIKey(), + } + + // Add protocol header if specified + if h.endpoint.Protocol != "" { + headers["dd-protocol"] = string(h.endpoint.Protocol) + } + + // Add origin headers if specified + if h.endpoint.Origin != "" { + headers["dd-evp-origin"] = string(h.endpoint.Origin) + headers["dd-evp-origin-version"] = version.AgentVersion + } + + return headers, nil +} + +// RequireTransportSecurity indicates whether the credentials require transport security +func (h *headerCredentials) RequireTransportSecurity() bool { + return false // We handle TLS separately via WithTransportCredentials +} + +// Sender implements PipelineComponent interface for gRPC log transmission. +// It manages multiple streamWorker instances (one per pipeline) using round-robin distribution. +// It is similar to Sender/Worker architecture +type Sender struct { + // Configuration + endpoint config.Endpoint + destinationsContext *client.DestinationsContext + cfg pkgconfigmodel.Reader + numberOfWorkers int + + // Pipeline integration + pipelineMonitor metrics.PipelineMonitor + + // Stream management (similar to Sender's workers and queues) + workers []*streamWorker + queues []chan *message.Payload + idx *atomic.Uint32 + + // Auditor integration + sink sender.Sink + + // gRPC connection management (shared across all streams) + conn *grpc.ClientConn + client StatefulLogsServiceClient +} + +// NewSender creates a new gRPC sender that implements PipelineComponent +// numberOfPipelines determines how many streamWorker to create (same as number of pipelines) +func NewSender( + numberOfPipelines int, + cfg pkgconfigmodel.Reader, + sink sender.Sink, + endpoints *config.Endpoints, + destinationsCtx *client.DestinationsContext, +) *Sender { + + // For now, use the first reliable endpoint + // TODO: Support multiple endpoints with failover + var endpoint config.Endpoint + if len(endpoints.GetReliableEndpoints()) > 0 { + endpoint = endpoints.GetReliableEndpoints()[0] + } else { + log.Error("No reliable gRPC endpoints configured") + return nil + } + + // For the moment, we use the number of pipelines as the number of workers + numberOfWorkers := numberOfPipelines + + // Get stream lifetime from config + streamLifetime := config.StreamLifetime(cfg) + + sender := &Sender{ + endpoint: endpoint, + destinationsContext: destinationsCtx, + cfg: cfg, + numberOfWorkers: numberOfWorkers, + pipelineMonitor: metrics.NewTelemetryPipelineMonitor(), + workers: make([]*streamWorker, 0, numberOfWorkers), + queues: make([]chan *message.Payload, numberOfWorkers), + idx: &atomic.Uint32{}, + sink: sink, + } + + // Note: outputChan will be set in each streamWorker's start() method when sink.Channel() is available + + // Create gRPC connection (shared by all streams inside streamWorkers) + if err := sender.createConnection(); err != nil { + log.Errorf("Failed to create gRPC connection: %v", err) + return nil + } + + // Create multiple streamWorker instances (like Sender creates Workers) + for i := 0; i < numberOfWorkers; i++ { + workerID := fmt.Sprintf("worker-%d", i) + + // Create input queue for this worker (like Sender creates queues) + sender.queues[i] = make(chan *message.Payload, inputChanBufferSize) + + // Create streamWorker instance + worker := newStreamWorker( + workerID, + sender.queues[i], + destinationsCtx, + sender.conn, + sender.client, + sender.sink, + endpoint, + streamLifetime, + ) + + sender.workers = append(sender.workers, worker) + } + + log.Infof("Created gRPC sender with %d streams for endpoint %s:%d", + numberOfWorkers, endpoint.Host, endpoint.Port) + return sender +} + +// createConnection establishes the shared gRPC connection +func (s *Sender) createConnection() error { + log.Infof("Creating gRPC connection to %s:%d", s.endpoint.Host, s.endpoint.Port) + + // Build connection options + var opts []grpc.DialOption + + // Configure TLS + if s.endpoint.UseSSL() { + tlsConfig := &tls.Config{ + ServerName: s.endpoint.Host, + } + opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))) + } else { + opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + + // Configure keepalive + keepaliveParams := keepalive.ClientParameters{ + Time: 30 * time.Second, + Timeout: 5 * time.Second, + PermitWithoutStream: true, + } + opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) + + // Add user agent + userAgent := fmt.Sprintf("datadog-agent/%s", version.AgentVersion) + opts = append(opts, grpc.WithUserAgent(userAgent)) + + // Add headers via per-RPC credentials + headerCreds := &headerCredentials{endpoint: s.endpoint} + opts = append(opts, grpc.WithPerRPCCredentials(headerCreds)) + + // Add load balancing configuration, to utilize all available LB IPs + opts = append(opts, grpc.WithDefaultServiceConfig( + `{"loadBalancingPolicy":"round_robin"}`, + )) + + // Create connection, lazy connection establishment, does not block + address := fmt.Sprintf("%s:%d", s.endpoint.Host, s.endpoint.Port) + conn, err := grpc.NewClient(address, opts...) + if err != nil { + return fmt.Errorf("failed to create gRPC connection: %w", err) + } + + s.conn = conn + s.client = NewStatefulLogsServiceClient(conn) + + log.Infof("Successfully created gRPC connection to %s", address) + return nil +} + +// PipelineComponent interface implementation + +// In returns the input channel using round-robin distribution (same as Sender.In()) +func (s *Sender) In() chan *message.Payload { + idx := s.idx.Inc() % uint32(len(s.queues)) + return s.queues[idx] +} + +// PipelineMonitor returns the pipeline monitor +func (s *Sender) PipelineMonitor() metrics.PipelineMonitor { + return s.pipelineMonitor +} + +// Start starts all streamWorker instances (same pattern as Sender.Start()) +func (s *Sender) Start() { + log.Infof("Starting gRPC sender with %d workers", len(s.workers)) + + for _, worker := range s.workers { + worker.start() + } + + log.Info("All streamWorkers started") +} + +// Stop stops all streamWorker instances and closes the connection +func (s *Sender) Stop() { + log.Info("Stopping gRPC sender") + + // Stop all workers (same pattern as Sender.Stop()) + for _, worker := range s.workers { + worker.stop() + } + + // Close all queues + for _, queue := range s.queues { + close(queue) + } + + // Close the shared connection + if s.conn != nil { + if err := s.conn.Close(); err != nil { + log.Warnf("Error closing gRPC connection: %v", err) + } + } + + log.Info("gRPC sender stopped") +} diff --git a/pkg/logs/sender/grpc/stateful_encoding.pb.go b/pkg/logs/sender/grpc/stateful_encoding.pb.go new file mode 100644 index 000000000000..bed1be08ec6c --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding.pb.go @@ -0,0 +1,968 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.10 +// protoc v5.29.3 +// source: stateful_encoding.proto + +package grpc + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// See Status Code Mappings section below for more details +type BatchStatus_Status int32 + +const ( + BatchStatus_UNKNOWN BatchStatus_Status = 0 + BatchStatus_OK BatchStatus_Status = 1 +) + +// Enum value maps for BatchStatus_Status. +var ( + BatchStatus_Status_name = map[int32]string{ + 0: "UNKNOWN", + 1: "OK", + } + BatchStatus_Status_value = map[string]int32{ + "UNKNOWN": 0, + "OK": 1, + } +) + +func (x BatchStatus_Status) Enum() *BatchStatus_Status { + p := new(BatchStatus_Status) + *p = x + return p +} + +func (x BatchStatus_Status) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor { + return file_stateful_encoding_proto_enumTypes[0].Descriptor() +} + +func (BatchStatus_Status) Type() protoreflect.EnumType { + return &file_stateful_encoding_proto_enumTypes[0] +} + +func (x BatchStatus_Status) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use BatchStatus_Status.Descriptor instead. +func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} +} + +type DictEntryDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDefine) Reset() { + *x = DictEntryDefine{} + mi := &file_stateful_encoding_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDefine) ProtoMessage() {} + +func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead. +func (*DictEntryDefine) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{0} +} + +func (x *DictEntryDefine) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *DictEntryDefine) GetValue() string { + if x != nil { + return x.Value + } + return "" +} + +type DictEntryDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DictEntryDelete) Reset() { + *x = DictEntryDelete{} + mi := &file_stateful_encoding_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DictEntryDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictEntryDelete) ProtoMessage() {} + +func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead. +func (*DictEntryDelete) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{1} +} + +func (x *DictEntryDelete) GetId() uint64 { + if x != nil { + return x.Id + } + return 0 +} + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +type PatternDefine struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + Template string `protobuf:"bytes,2,opt,name=template,proto3" json:"template,omitempty"` + ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` + PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDefine) Reset() { + *x = PatternDefine{} + mi := &file_stateful_encoding_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDefine) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDefine) ProtoMessage() {} + +func (x *PatternDefine) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[2] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead. +func (*PatternDefine) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{2} +} + +func (x *PatternDefine) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *PatternDefine) GetTemplate() string { + if x != nil { + return x.Template + } + return "" +} + +func (x *PatternDefine) GetParamCount() uint32 { + if x != nil { + return x.ParamCount + } + return 0 +} + +func (x *PatternDefine) GetPosList() []uint32 { + if x != nil { + return x.PosList + } + return nil +} + +type PatternDelete struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *PatternDelete) Reset() { + *x = PatternDelete{} + mi := &file_stateful_encoding_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *PatternDelete) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PatternDelete) ProtoMessage() {} + +func (x *PatternDelete) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[3] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead. +func (*PatternDelete) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{3} +} + +func (x *PatternDelete) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +type Log struct { + state protoimpl.MessageState `protogen:"open.v1"` + Timestamp uint64 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"` + // Types that are valid to be assigned to Content: + // + // *Log_Structured + // *Log_Raw + Content isLog_Content `protobuf_oneof:"content"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Log) Reset() { + *x = Log{} + mi := &file_stateful_encoding_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Log) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Log) ProtoMessage() {} + +func (x *Log) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Log.ProtoReflect.Descriptor instead. +func (*Log) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{4} +} + +func (x *Log) GetTimestamp() uint64 { + if x != nil { + return x.Timestamp + } + return 0 +} + +func (x *Log) GetContent() isLog_Content { + if x != nil { + return x.Content + } + return nil +} + +func (x *Log) GetStructured() *StructuredLog { + if x != nil { + if x, ok := x.Content.(*Log_Structured); ok { + return x.Structured + } + } + return nil +} + +func (x *Log) GetRaw() string { + if x != nil { + if x, ok := x.Content.(*Log_Raw); ok { + return x.Raw + } + } + return "" +} + +type isLog_Content interface { + isLog_Content() +} + +type Log_Structured struct { + Structured *StructuredLog `protobuf:"bytes,2,opt,name=structured,proto3,oneof"` +} + +type Log_Raw struct { + Raw string `protobuf:"bytes,3,opt,name=raw,proto3,oneof"` +} + +func (*Log_Structured) isLog_Content() {} + +func (*Log_Raw) isLog_Content() {} + +type StructuredLog struct { + state protoimpl.MessageState `protogen:"open.v1"` + PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` + DynamicValues []*DynamicValue `protobuf:"bytes,2,rep,name=dynamic_values,json=dynamicValues,proto3" json:"dynamic_values,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StructuredLog) Reset() { + *x = StructuredLog{} + mi := &file_stateful_encoding_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StructuredLog) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StructuredLog) ProtoMessage() {} + +func (x *StructuredLog) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[5] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. +func (*StructuredLog) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{5} +} + +func (x *StructuredLog) GetPatternId() uint64 { + if x != nil { + return x.PatternId + } + return 0 +} + +func (x *StructuredLog) GetDynamicValues() []*DynamicValue { + if x != nil { + return x.DynamicValues + } + return nil +} + +// TODO not sure we need numeric type +type DynamicValue struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Value: + // + // *DynamicValue_IntValue + // *DynamicValue_FloatValue + // *DynamicValue_StringValue + // *DynamicValue_DictIndex + Value isDynamicValue_Value `protobuf_oneof:"value"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DynamicValue) Reset() { + *x = DynamicValue{} + mi := &file_stateful_encoding_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DynamicValue) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DynamicValue) ProtoMessage() {} + +func (x *DynamicValue) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[6] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. +func (*DynamicValue) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{6} +} + +func (x *DynamicValue) GetValue() isDynamicValue_Value { + if x != nil { + return x.Value + } + return nil +} + +func (x *DynamicValue) GetIntValue() int64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_IntValue); ok { + return x.IntValue + } + } + return 0 +} + +func (x *DynamicValue) GetFloatValue() float64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_FloatValue); ok { + return x.FloatValue + } + } + return 0 +} + +func (x *DynamicValue) GetStringValue() string { + if x != nil { + if x, ok := x.Value.(*DynamicValue_StringValue); ok { + return x.StringValue + } + } + return "" +} + +func (x *DynamicValue) GetDictIndex() uint64 { + if x != nil { + if x, ok := x.Value.(*DynamicValue_DictIndex); ok { + return x.DictIndex + } + } + return 0 +} + +type isDynamicValue_Value interface { + isDynamicValue_Value() +} + +type DynamicValue_IntValue struct { + IntValue int64 `protobuf:"varint,1,opt,name=int_value,json=intValue,proto3,oneof"` +} + +type DynamicValue_FloatValue struct { + FloatValue float64 `protobuf:"fixed64,2,opt,name=float_value,json=floatValue,proto3,oneof"` +} + +type DynamicValue_StringValue struct { + StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"` +} + +type DynamicValue_DictIndex struct { + DictIndex uint64 `protobuf:"varint,4,opt,name=dict_index,json=dictIndex,proto3,oneof"` +} + +func (*DynamicValue_IntValue) isDynamicValue_Value() {} + +func (*DynamicValue_FloatValue) isDynamicValue_Value() {} + +func (*DynamicValue_StringValue) isDynamicValue_Value() {} + +func (*DynamicValue_DictIndex) isDynamicValue_Value() {} + +type Datum struct { + state protoimpl.MessageState `protogen:"open.v1"` + // Types that are valid to be assigned to Data: + // + // *Datum_PatternDefine + // *Datum_PatternDelete + // *Datum_DictEntryDefine + // *Datum_DictEntryDelete + // *Datum_Logs + Data isDatum_Data `protobuf_oneof:"data"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Datum) Reset() { + *x = Datum{} + mi := &file_stateful_encoding_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Datum) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Datum) ProtoMessage() {} + +func (x *Datum) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[7] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Datum.ProtoReflect.Descriptor instead. +func (*Datum) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{7} +} + +func (x *Datum) GetData() isDatum_Data { + if x != nil { + return x.Data + } + return nil +} + +func (x *Datum) GetPatternDefine() *PatternDefine { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDefine); ok { + return x.PatternDefine + } + } + return nil +} + +func (x *Datum) GetPatternDelete() *PatternDelete { + if x != nil { + if x, ok := x.Data.(*Datum_PatternDelete); ok { + return x.PatternDelete + } + } + return nil +} + +func (x *Datum) GetDictEntryDefine() *DictEntryDefine { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDefine); ok { + return x.DictEntryDefine + } + } + return nil +} + +func (x *Datum) GetDictEntryDelete() *DictEntryDelete { + if x != nil { + if x, ok := x.Data.(*Datum_DictEntryDelete); ok { + return x.DictEntryDelete + } + } + return nil +} + +func (x *Datum) GetLogs() *Log { + if x != nil { + if x, ok := x.Data.(*Datum_Logs); ok { + return x.Logs + } + } + return nil +} + +type isDatum_Data interface { + isDatum_Data() +} + +type Datum_PatternDefine struct { + PatternDefine *PatternDefine `protobuf:"bytes,1,opt,name=pattern_define,json=patternDefine,proto3,oneof"` +} + +type Datum_PatternDelete struct { + PatternDelete *PatternDelete `protobuf:"bytes,2,opt,name=pattern_delete,json=patternDelete,proto3,oneof"` +} + +type Datum_DictEntryDefine struct { + DictEntryDefine *DictEntryDefine `protobuf:"bytes,3,opt,name=dict_entry_define,json=dictEntryDefine,proto3,oneof"` +} + +type Datum_DictEntryDelete struct { + DictEntryDelete *DictEntryDelete `protobuf:"bytes,4,opt,name=dict_entry_delete,json=dictEntryDelete,proto3,oneof"` +} + +type Datum_Logs struct { + Logs *Log `protobuf:"bytes,5,opt,name=logs,proto3,oneof"` +} + +func (*Datum_PatternDefine) isDatum_Data() {} + +func (*Datum_PatternDelete) isDatum_Data() {} + +func (*Datum_DictEntryDefine) isDatum_Data() {} + +func (*Datum_DictEntryDelete) isDatum_Data() {} + +func (*Datum_Logs) isDatum_Data() {} + +// DatumSequence wraps a sequence of Datum messages +// Used for serialization in application-level compression +type DatumSequence struct { + state protoimpl.MessageState `protogen:"open.v1"` + Data []*Datum `protobuf:"bytes,1,rep,name=data,proto3" json:"data,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *DatumSequence) Reset() { + *x = DatumSequence{} + mi := &file_stateful_encoding_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *DatumSequence) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DatumSequence) ProtoMessage() {} + +func (x *DatumSequence) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[8] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DatumSequence.ProtoReflect.Descriptor instead. +func (*DatumSequence) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{8} +} + +func (x *DatumSequence) GetData() []*Datum { + if x != nil { + return x.Data + } + return nil +} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +type StatefulBatch struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` // Contains serialized DatumSequence + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *StatefulBatch) Reset() { + *x = StatefulBatch{} + mi := &file_stateful_encoding_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *StatefulBatch) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StatefulBatch) ProtoMessage() {} + +func (x *StatefulBatch) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[9] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. +func (*StatefulBatch) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{9} +} + +func (x *StatefulBatch) GetBatchId() uint32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *StatefulBatch) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +type BatchStatus struct { + state protoimpl.MessageState `protogen:"open.v1"` + BatchId int32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=intake.BatchStatus_Status" json:"status,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *BatchStatus) Reset() { + *x = BatchStatus{} + mi := &file_stateful_encoding_proto_msgTypes[10] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *BatchStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BatchStatus) ProtoMessage() {} + +func (x *BatchStatus) ProtoReflect() protoreflect.Message { + mi := &file_stateful_encoding_proto_msgTypes[10] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. +func (*BatchStatus) Descriptor() ([]byte, []int) { + return file_stateful_encoding_proto_rawDescGZIP(), []int{10} +} + +func (x *BatchStatus) GetBatchId() int32 { + if x != nil { + return x.BatchId + } + return 0 +} + +func (x *BatchStatus) GetStatus() BatchStatus_Status { + if x != nil { + return x.Status + } + return BatchStatus_UNKNOWN +} + +var File_stateful_encoding_proto protoreflect.FileDescriptor + +const file_stateful_encoding_proto_rawDesc = "" + + "\n" + + "\x17stateful_encoding.proto\x12\x06intake\"7\n" + + "\x0fDictEntryDefine\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + + "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + + "\x0fDictEntryDelete\x12\x0e\n" + + "\x02id\x18\x01 \x01(\x04R\x02id\"\x86\x01\n" + + "\rPatternDefine\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12\x1a\n" + + "\btemplate\x18\x02 \x01(\tR\btemplate\x12\x1f\n" + + "\vparam_count\x18\x03 \x01(\rR\n" + + "paramCount\x12\x19\n" + + "\bpos_list\x18\x04 \x03(\rR\aposList\".\n" + + "\rPatternDelete\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" + + "\x03Log\x12\x1c\n" + + "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x127\n" + + "\n" + + "structured\x18\x02 \x01(\v2\x15.intake.StructuredLogH\x00R\n" + + "structured\x12\x12\n" + + "\x03raw\x18\x03 \x01(\tH\x00R\x03rawB\t\n" + + "\acontent\"k\n" + + "\rStructuredLog\x12\x1d\n" + + "\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12;\n" + + "\x0edynamic_values\x18\x02 \x03(\v2\x14.intake.DynamicValueR\rdynamicValues\"\x9f\x01\n" + + "\fDynamicValue\x12\x1d\n" + + "\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" + + "\vfloat_value\x18\x02 \x01(\x01H\x00R\n" + + "floatValue\x12#\n" + + "\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" + + "\n" + + "dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" + + "\x05value\"\xc0\x02\n" + + "\x05Datum\x12>\n" + + "\x0epattern_define\x18\x01 \x01(\v2\x15.intake.PatternDefineH\x00R\rpatternDefine\x12>\n" + + "\x0epattern_delete\x18\x02 \x01(\v2\x15.intake.PatternDeleteH\x00R\rpatternDelete\x12E\n" + + "\x11dict_entry_define\x18\x03 \x01(\v2\x17.intake.DictEntryDefineH\x00R\x0fdictEntryDefine\x12E\n" + + "\x11dict_entry_delete\x18\x04 \x01(\v2\x17.intake.DictEntryDeleteH\x00R\x0fdictEntryDelete\x12!\n" + + "\x04logs\x18\x05 \x01(\v2\v.intake.LogH\x00R\x04logsB\x06\n" + + "\x04data\"2\n" + + "\rDatumSequence\x12!\n" + + "\x04data\x18\x01 \x03(\v2\r.intake.DatumR\x04data\">\n" + + "\rStatefulBatch\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12\x12\n" + + "\x04data\x18\x02 \x01(\fR\x04data\"{\n" + + "\vBatchStatus\x12\x19\n" + + "\bbatch_id\x18\x01 \x01(\x05R\abatchId\x122\n" + + "\x06status\x18\x02 \x01(\x0e2\x1a.intake.BatchStatus.StatusR\x06status\"\x1d\n" + + "\x06Status\x12\v\n" + + "\aUNKNOWN\x10\x00\x12\x06\n" + + "\x02OK\x10\x012S\n" + + "\x13StatefulLogsService\x12<\n" + + "\n" + + "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B7Z5github.com/DataDog/datadog-agent/pkg/logs/sender/grpcb\x06proto3" + +var ( + file_stateful_encoding_proto_rawDescOnce sync.Once + file_stateful_encoding_proto_rawDescData []byte +) + +func file_stateful_encoding_proto_rawDescGZIP() []byte { + file_stateful_encoding_proto_rawDescOnce.Do(func() { + file_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_stateful_encoding_proto_rawDesc), len(file_stateful_encoding_proto_rawDesc))) + }) + return file_stateful_encoding_proto_rawDescData +} + +var file_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_stateful_encoding_proto_goTypes = []any{ + (BatchStatus_Status)(0), // 0: intake.BatchStatus.Status + (*DictEntryDefine)(nil), // 1: intake.DictEntryDefine + (*DictEntryDelete)(nil), // 2: intake.DictEntryDelete + (*PatternDefine)(nil), // 3: intake.PatternDefine + (*PatternDelete)(nil), // 4: intake.PatternDelete + (*Log)(nil), // 5: intake.Log + (*StructuredLog)(nil), // 6: intake.StructuredLog + (*DynamicValue)(nil), // 7: intake.DynamicValue + (*Datum)(nil), // 8: intake.Datum + (*DatumSequence)(nil), // 9: intake.DatumSequence + (*StatefulBatch)(nil), // 10: intake.StatefulBatch + (*BatchStatus)(nil), // 11: intake.BatchStatus +} +var file_stateful_encoding_proto_depIdxs = []int32{ + 6, // 0: intake.Log.structured:type_name -> intake.StructuredLog + 7, // 1: intake.StructuredLog.dynamic_values:type_name -> intake.DynamicValue + 3, // 2: intake.Datum.pattern_define:type_name -> intake.PatternDefine + 4, // 3: intake.Datum.pattern_delete:type_name -> intake.PatternDelete + 1, // 4: intake.Datum.dict_entry_define:type_name -> intake.DictEntryDefine + 2, // 5: intake.Datum.dict_entry_delete:type_name -> intake.DictEntryDelete + 5, // 6: intake.Datum.logs:type_name -> intake.Log + 8, // 7: intake.DatumSequence.data:type_name -> intake.Datum + 0, // 8: intake.BatchStatus.status:type_name -> intake.BatchStatus.Status + 10, // 9: intake.StatefulLogsService.LogsStream:input_type -> intake.StatefulBatch + 11, // 10: intake.StatefulLogsService.LogsStream:output_type -> intake.BatchStatus + 10, // [10:11] is the sub-list for method output_type + 9, // [9:10] is the sub-list for method input_type + 9, // [9:9] is the sub-list for extension type_name + 9, // [9:9] is the sub-list for extension extendee + 0, // [0:9] is the sub-list for field type_name +} + +func init() { file_stateful_encoding_proto_init() } +func file_stateful_encoding_proto_init() { + if File_stateful_encoding_proto != nil { + return + } + file_stateful_encoding_proto_msgTypes[4].OneofWrappers = []any{ + (*Log_Structured)(nil), + (*Log_Raw)(nil), + } + file_stateful_encoding_proto_msgTypes[6].OneofWrappers = []any{ + (*DynamicValue_IntValue)(nil), + (*DynamicValue_FloatValue)(nil), + (*DynamicValue_StringValue)(nil), + (*DynamicValue_DictIndex)(nil), + } + file_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ + (*Datum_PatternDefine)(nil), + (*Datum_PatternDelete)(nil), + (*Datum_DictEntryDefine)(nil), + (*Datum_DictEntryDelete)(nil), + (*Datum_Logs)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_stateful_encoding_proto_rawDesc), len(file_stateful_encoding_proto_rawDesc)), + NumEnums: 1, + NumMessages: 11, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_stateful_encoding_proto_goTypes, + DependencyIndexes: file_stateful_encoding_proto_depIdxs, + EnumInfos: file_stateful_encoding_proto_enumTypes, + MessageInfos: file_stateful_encoding_proto_msgTypes, + }.Build() + File_stateful_encoding_proto = out.File + file_stateful_encoding_proto_goTypes = nil + file_stateful_encoding_proto_depIdxs = nil +} diff --git a/pkg/logs/sender/grpc/stateful_encoding.proto b/pkg/logs/sender/grpc/stateful_encoding.proto new file mode 100644 index 000000000000..d0e9c24f147d --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding.proto @@ -0,0 +1,107 @@ +syntax = "proto3"; + +package intake; +option go_package = "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"; + +// --------------------------------------------------------------------------- +// Dictionary-encoded +// --------------------------------------------------------------------------- + +message DictEntryDefine { + uint64 id = 1; + string value = 2; +} + +message DictEntryDelete { + uint64 id = 1; +} + +// --------------------------------------------------------------------------- +// Pattern dictionary +// --------------------------------------------------------------------------- + +// pos_list is used to indicate where dynamic values should be inserted +// it's more accurate than a marker +message PatternDefine { + uint64 pattern_id = 1; + string template = 2; + uint32 param_count = 3; + repeated uint32 pos_list = 4; +} + +message PatternDelete { + uint64 pattern_id = 1; +} + +// --------------------------------------------------------------------------- +// Log payload +// --------------------------------------------------------------------------- + +message Log { + uint64 timestamp = 1; + oneof content { + StructuredLog structured = 2; + string raw = 3; + } +} + +message StructuredLog { + uint64 pattern_id = 1; + repeated DynamicValue dynamic_values = 2; +} + +// TODO not sure we need numeric type +message DynamicValue { + oneof value { + int64 int_value = 1; + double float_value = 2; + string string_value = 3; + uint64 dict_index = 4; + } +} + +// --------------------------------------------------------------------------- +// Streaming envelope +// --------------------------------------------------------------------------- + +message Datum { + oneof data { + PatternDefine pattern_define = 1; + PatternDelete pattern_delete = 2; + DictEntryDefine dict_entry_define = 3; + DictEntryDelete dict_entry_delete = 4; + Log logs = 5; + } +} + +// DatumSequence wraps a sequence of Datum messages +// Used for serialization in application-level compression +message DatumSequence { + repeated Datum data = 1; +} + +// data is sequence of pattern/dictionary changes + logs +// the ordering is significant, must be processed in order +message StatefulBatch { + uint32 batch_id = 1; + bytes data = 2; // Contains serialized DatumSequence +} + +message BatchStatus { + int32 batch_id = 1; + + // See Status Code Mappings section below for more details + enum Status { + UNKNOWN=0; + OK=1; + } + Status status = 2; +} + +// --------------------------------------------------------------------------- +// gRPC service definition (bi-directional streaming) +// --------------------------------------------------------------------------- + +service StatefulLogsService { + rpc LogsStream(stream StatefulBatch) returns (stream BatchStatus); +} diff --git a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go new file mode 100644 index 000000000000..dfcdaf5e26f7 --- /dev/null +++ b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go @@ -0,0 +1,115 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.5.1 +// - protoc v5.29.3 +// source: stateful_encoding.proto + +package grpc + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + StatefulLogsService_LogsStream_FullMethodName = "/intake.StatefulLogsService/LogsStream" +) + +// StatefulLogsServiceClient is the client API for StatefulLogsService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type StatefulLogsServiceClient interface { + LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) +} + +type statefulLogsServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { + return &statefulLogsServiceClient{cc} +} + +func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + stream, err := c.cc.NewStream(ctx, &StatefulLogsService_ServiceDesc.Streams[0], StatefulLogsService_LogsStream_FullMethodName, cOpts...) + if err != nil { + return nil, err + } + x := &grpc.GenericClientStream[StatefulBatch, BatchStatus]{ClientStream: stream} + return x, nil +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type StatefulLogsService_LogsStreamClient = grpc.BidiStreamingClient[StatefulBatch, BatchStatus] + +// StatefulLogsServiceServer is the server API for StatefulLogsService service. +// All implementations must embed UnimplementedStatefulLogsServiceServer +// for forward compatibility. +type StatefulLogsServiceServer interface { + LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error + mustEmbedUnimplementedStatefulLogsServiceServer() +} + +// UnimplementedStatefulLogsServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedStatefulLogsServiceServer struct{} + +func (UnimplementedStatefulLogsServiceServer) LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error { + return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") +} +func (UnimplementedStatefulLogsServiceServer) mustEmbedUnimplementedStatefulLogsServiceServer() {} +func (UnimplementedStatefulLogsServiceServer) testEmbeddedByValue() {} + +// UnsafeStatefulLogsServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to StatefulLogsServiceServer will +// result in compilation errors. +type UnsafeStatefulLogsServiceServer interface { + mustEmbedUnimplementedStatefulLogsServiceServer() +} + +func RegisterStatefulLogsServiceServer(s grpc.ServiceRegistrar, srv StatefulLogsServiceServer) { + // If the following call pancis, it indicates UnimplementedStatefulLogsServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&StatefulLogsService_ServiceDesc, srv) +} + +func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(StatefulLogsServiceServer).LogsStream(&grpc.GenericServerStream[StatefulBatch, BatchStatus]{ServerStream: stream}) +} + +// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. +type StatefulLogsService_LogsStreamServer = grpc.BidiStreamingServer[StatefulBatch, BatchStatus] + +// StatefulLogsService_ServiceDesc is the grpc.ServiceDesc for StatefulLogsService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var StatefulLogsService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "intake.StatefulLogsService", + HandlerType: (*StatefulLogsServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "LogsStream", + Handler: _StatefulLogsService_LogsStream_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "stateful_encoding.proto", +} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go new file mode 100644 index 000000000000..f5dcdff2f416 --- /dev/null +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -0,0 +1,680 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package grpc + +import ( + "context" + "errors" + "io" + "time" + + "github.com/benbjohnson/clock" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" + "google.golang.org/grpc/status" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/util/backoff" + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +// TODO For PoC Stage 1 +// - implement snapshot state transmission +// - better handle unrecoverable errors - auth/perm, protocol, stream-level gRPC status +// - telemetries (send/recv, failure, rotations) + +// TODO for PoC Stage 2 +// - implement more graceful shutdown, the current version we could lose some acks +// - currently, s.currentStream.stream.Send(batch) can still block, especially +// if we have a lot of buffered payloads to re-send after a stream rotation, +// especially if we are flow controlled. This will block the supervisor loop +// and potentially backpressure the input channel +// - implement proper "stream/ordered" backpressure + +// TODO for production +// - implement stream neotiation (state size, etc), able to downgrade to HTTP transport +// - Testing plan + +const ( + // Various constants - may become configurable + batchAckChanBuffer = 10 + maxInflight = 10000 + connectionTimeout = 10 * time.Second + drainTimeout = 5 * time.Second +) + +// streamState represents the current state of the stream worker +// +//go:generate stringer -type=streamState +type streamState int + +const ( + // disconnected is the initial state or stream creation failure backoff state + disconnected streamState = iota + // connecting is the state while waiting for asyncCreateNewStream to complete or fail + connecting + // active is the normal operating state with a valid stream + active + // draining waits for all acks to arrive before rotating to a new stream + draining +) + +// streamInfo holds all stream-related information +type streamInfo struct { + stream StatefulLogsService_LogsStreamClient + ctx context.Context + cancel context.CancelFunc +} + +// streamCreationResult represents the result of async stream creation +type streamCreationResult struct { + info *streamInfo + err error +} + +// batchAck wraps a batch acknowledgment with stream identity to prevent stale signals +type batchAck struct { + stream *streamInfo + status *BatchStatus +} + +// streamWorker manages a single gRPC bidirectional stream with Master-Slave threading model +// Architecture: One supervisor/sender goroutine + one receiver goroutine per worker +type streamWorker struct { + // Configuration + workerID string + destinationsContext *client.DestinationsContext + + // Pipeline integration + inputChan chan *message.Payload + outputChan chan *message.Payload // For auditor acknowledgments + sink sender.Sink // For getting auditor channel + + // gRPC connection management (shared with other streams) + conn *grpc.ClientConn + client StatefulLogsServiceClient + + // Stream management + currentStream *streamInfo + streamState streamState + recvFailureCh chan *streamInfo // Signal receiver failure with stream identity + batchAckCh chan *batchAck // Signal batch acknowledgments with stream identity + streamReadyCh chan streamCreationResult // Signal when async stream creation completes + streamLifetime time.Duration + streamTimer *clock.Timer // Timer for stream lifetime, trigger soft rotation + drainTimer *clock.Timer // In case of unacked payloads, drain/wait before soft rotation + backoffTimer *clock.Timer // In case of stream creation failure, backoff before retrying + + // Inflight tracking - tracks sent (awaiting ack) and buffered (not sent) payloads + inflight *inflightTracker + + // Retry backoff + backoffPolicy backoff.Policy + nbErrors int + + // Control + stopChan chan struct{} + done chan struct{} + clock clock.Clock +} + +// newStreamWorker creates a new gRPC stream worker +func newStreamWorker( + workerID string, + inputChan chan *message.Payload, + destinationsCtx *client.DestinationsContext, + conn *grpc.ClientConn, + client StatefulLogsServiceClient, + sink sender.Sink, + endpoint config.Endpoint, + streamLifetime time.Duration, +) *streamWorker { + return newStreamWorkerWithClock(workerID, inputChan, destinationsCtx, conn, client, sink, + endpoint, streamLifetime, clock.New(), nil) +} + +// newStreamWorkerWithClock creates a new gRPC stream worker with injectable clock for testing +func newStreamWorkerWithClock( + workerID string, + inputChan chan *message.Payload, + destinationsCtx *client.DestinationsContext, + conn *grpc.ClientConn, + client StatefulLogsServiceClient, + sink sender.Sink, + endpoint config.Endpoint, + streamLifetime time.Duration, + clock clock.Clock, + inflightTracker *inflightTracker, +) *streamWorker { + backoffPolicy := backoff.NewExpBackoffPolicy( + endpoint.BackoffFactor, + endpoint.BackoffBase, + endpoint.BackoffMax, + endpoint.RecoveryInterval, + endpoint.RecoveryReset, + ) + + // Use provided inflightTracker (testing) or create default one + if inflightTracker == nil { + inflightTracker = newInflightTracker(maxInflight) + } + + worker := &streamWorker{ + workerID: workerID, + destinationsContext: destinationsCtx, + inputChan: inputChan, + outputChan: nil, + sink: sink, + conn: conn, + client: client, + streamState: disconnected, + recvFailureCh: make(chan *streamInfo), + batchAckCh: make(chan *batchAck, batchAckChanBuffer), + streamReadyCh: make(chan streamCreationResult), + streamLifetime: streamLifetime, + inflight: inflightTracker, + backoffPolicy: backoffPolicy, + nbErrors: 0, + stopChan: make(chan struct{}), + done: make(chan struct{}), + clock: clock, + streamTimer: createStoppedTimer(clock, 0), + backoffTimer: createStoppedTimer(clock, 0), + drainTimer: createStoppedTimer(clock, 0), + } + + return worker +} + +// start begins the supervisor goroutine & creates a new stream asynchronously +func (s *streamWorker) start() { + log.Infof("Starting gRPC stream worker %s", s.workerID) + s.outputChan = s.sink.Channel() + + // Start supervisor/sender goroutine (master) + go s.supervisorLoop() + + s.asyncCreateNewStream() + + log.Infof("Worker %s: Started", s.workerID) +} + +// stop shuts down the stream worker +func (s *streamWorker) stop() { + log.Infof("Stopping gRPC stream worker %s", s.workerID) + close(s.stopChan) + <-s.done + log.Infof("Worker %s: Stopped", s.workerID) +} + +// supervisorLoop is the master goroutine that handles sending and stream lifecycle +func (s *streamWorker) supervisorLoop() { + defer close(s.done) + + // supervisor loop starts without a stream, but asyncCreateNewStream is called + // right after in streamWorker's start(), so we are in connecting state right away + s.streamState = connecting + + for { + // Conditional inputChan - only enabled when inflight tracker has space + // This backpressures to upstream when at capacity + var inputChan <-chan *message.Payload + if s.inflight.hasSpace() { + inputChan = s.inputChan // Enable reading + } else { + inputChan = nil // Disable reading + } + + select { + case payload := <-inputChan: + // Fires in any state (gated only by inflight capacity), payload is always + // added to the inflight tracker. But we only proceed to send if we are + // in the active state with a valid stream + s.inflight.append(payload) + s.sendPayloads() + + case ack := <-s.batchAckCh: + // Fires in any state + s.handleBatchAck(ack) + + case failedStream := <-s.recvFailureCh: + // Fires in active/draining/connecting states + s.handleRecvFailure(failedStream) + + case result := <-s.streamReadyCh: + // Fires only in connecting state + s.handleStreamReady(result) + + case <-s.streamTimer.C: + // Fires only in active state (except rare timing race, it's in connecting) + s.handleStreamTimeout() + + case <-s.drainTimer.C: + // Fires in draining state or (rarely) in connecting/active state + // If in non-draining state, it means acks arrival at the same time + // as the drain timer expiration, so we will skip the signal + s.handleDrainTimeout() + + case <-s.backoffTimer.C: + // Fires only in disconnected state + s.handleBackoffTimeout() + + case <-s.stopChan: + // Fires in any state + s.handleShutdown() + return + } + } +} + +// sendPayloads attempts to send all buffered payloads when in Active state +// the same function is used to send new payload in normal operation, and +// to send (or resend) buffered payloads after a stream rotation +func (s *streamWorker) sendPayloads() { + if s.streamState != active { + return + } + + // Send all buffered payloads in order + for { + payload := s.inflight.nextToSend() + if payload == nil { + // No more buffered payloads to send + break + } + + batchID := s.inflight.nextBatchID() + batch := s.payloadToBatch(payload, batchID) + + // TODO Send call can block, by TCP/HTTP2 flow controls + if err := s.currentStream.stream.Send(batch); err != nil { + log.Warnf("Worker %s: Send failed, initiating stream rotation: %v", s.workerID, err) + s.beginStreamRotation() + return // stop sending, payloads remain buffered for next rotation + } + + // Successfully sent, mark as sent in the inflight tracker + s.inflight.markSent() + } +} + +// handleBatchAck processes a BatchStatus acknowledgment from the server +func (s *streamWorker) handleBatchAck(ack *batchAck) { + // Ignore stale acks from old streams + if ack.stream != s.currentStream { + return + } + + receivedBatchID := uint32(ack.status.BatchId) + + // The two errors below should never happen if Intake is implemented + // correctly, but we are being defensive. + + // Verify we have "sent payloads" awaiting ack + if !s.inflight.hasUnacked() { + log.Errorf("Worker %s: Received ack for batch %d but no sent payloads in inflight tracker, "+ + "irrecoverable error - initiating stream rotation", s.workerID, receivedBatchID) + s.beginStreamRotation() + return + } + + // Verify batchID matches expected sequence + expectedBatchID := s.inflight.getHeadBatchID() + if receivedBatchID != expectedBatchID { + log.Errorf("Worker %s: BatchID mismatch! Expected %d, received %d. "+ + "ut-of-order or duplicate ack, irrecoverable error - initiating stream rotation", + s.workerID, expectedBatchID, receivedBatchID) + s.beginStreamRotation() + return + } + + // Pop the acknowledged payload and send to auditor + payload := s.inflight.pop() + if s.outputChan != nil { + select { + case s.outputChan <- payload: + // Successfully sent to auditor + default: + log.Warnf("Worker %s: Auditor channel full, dropping ack for batch %d", s.workerID, receivedBatchID) + } + } + + // If in Draining state and all acks received, transition to Connecting + if s.streamState == draining && !s.inflight.hasUnacked() { + log.Infof("Worker %s: All acks received in draining state, proceeding with rotation", s.workerID) + s.drainTimer.Stop() + s.beginStreamRotation() + } +} + +// handleRecvFailure processes receiver failure signals +func (s *streamWorker) handleRecvFailure(failedStream *streamInfo) { + // Ignore if: stale signal OR not in active/draining state + if failedStream != s.currentStream || (s.streamState != active && s.streamState != draining) { + return + } + + log.Infof("Worker %s: Receiver reported failure (state: %v), initiating stream rotation", s.workerID, s.streamState) + s.beginStreamRotation() +} + +// handleStreamReady processes async stream creation results +func (s *streamWorker) handleStreamReady(result streamCreationResult) { + if s.streamState != connecting { + return + } + + if result.err != nil { + s.nbErrors = s.backoffPolicy.IncError(s.nbErrors) + s.handleStreamCreationFailure(result.err) + } else { + s.nbErrors = s.backoffPolicy.DecError(s.nbErrors) + s.finishStreamRotation(result.info) + } +} + +// handleStreamTimeout processes stream lifetime expiration +func (s *streamWorker) handleStreamTimeout() { + if s.streamState != active { + return + } + + if s.inflight.hasUnacked() { + log.Infof("Worker %s: Stream lifetime expired with %d unacked payloads, entering Draining state", + s.workerID, s.inflight.sentCount()) + s.streamState = draining + s.drainTimer.Reset(drainTimeout) + } else { + log.Infof("Worker %s: Stream lifetime expired with no unacked payloads, rotating immediately", + s.workerID) + s.beginStreamRotation() + } +} + +// handleDrainTimeout handles drain timer expiration +func (s *streamWorker) handleDrainTimeout() { + if s.streamState != draining { + return + } + + log.Warnf("Worker %s: Drain timer expired in draining state, proceeding with rotation (may lose some acks)", + s.workerID) + s.beginStreamRotation() +} + +// handleBackoffTimeout processes backoff timer expiration and retries stream creation +func (s *streamWorker) handleBackoffTimeout() { + if s.streamState != disconnected { + return + } + + log.Infof("Worker %s: Backoff timer expired, retrying stream creation (error count: %d)", s.workerID, s.nbErrors) + s.streamState = connecting + s.asyncCreateNewStream() +} + +// handleShutdown performs graceful shutdown cleanup +func (s *streamWorker) handleShutdown() { + log.Infof("Worker %s: Shutting down", s.workerID) + s.streamTimer.Stop() + s.backoffTimer.Stop() + s.drainTimer.Stop() + s.closeStream(s.currentStream) +} + +// beginStreamRotation initiates stream rotation +// Closes current stream and starts async creation of a new stream +func (s *streamWorker) beginStreamRotation() { + log.Infof("Worker %s: Beginning stream rotation (state: %v → connecting)", s.workerID, s.streamState) + + s.closeStream(s.currentStream) + s.currentStream = nil + s.streamTimer.Stop() + s.drainTimer.Stop() + s.backoffTimer.Stop() + + s.streamState = connecting + s.asyncCreateNewStream() +} + +// finishStreamRotation completes stream rotation (Connecting → Active transition) +// Activates the newly created stream and starts the receiver +// Transmits the snapshot state first, then (if any) the buffered payloads +func (s *streamWorker) finishStreamRotation(streamInfo *streamInfo) { + log.Infof("Worker %s: Finishing stream rotation (state: connecting → active)", s.workerID) + + s.currentStream = streamInfo + s.streamState = active + + go s.receiverLoop(streamInfo) + + s.streamTimer.Reset(s.streamLifetime) + + // Convert all the unacked items to buffered items by resetting inflight tracker + // because we need to resent them. + s.inflight.resetOnRotation() + + log.Infof("Worker %s: Stream rotation complete, now active", s.workerID) + + // TODO implement: transmit the snapshot state first + // Then send the remaining buffered payloads + if s.inflight.hasUnSent() { + s.sendPayloads() + } +} + +// handleStreamCreationFailure processes stream creation failures with exponential backoff +func (s *streamWorker) handleStreamCreationFailure(err error) { + backoffDuration := s.backoffPolicy.GetBackoffDuration(s.nbErrors) + + log.Warnf("Worker %s: Stream creation failed: %v. Backing off for %v (error count: %d)", + s.workerID, err, backoffDuration, s.nbErrors) + + s.streamState = disconnected + + if backoffDuration > 0 { + s.backoffTimer.Reset(backoffDuration) + } else { + // it shouldn't happen, but be defensive + // retry immediately by transitioning directly to connecting + log.Infof("Worker %s: Zero backoff duration, retrying immediately", s.workerID) + s.streamState = connecting + s.asyncCreateNewStream() + } +} + +// asyncCreateNewStream creates a new gRPC stream asynchronously +// Signals completion (success or failure) via streamReadyCh +func (s *streamWorker) asyncCreateNewStream() { + go func() { + log.Infof("Worker %s: Starting async stream creation", s.workerID) + + var result streamCreationResult + + // Ensure the connection is ready, can block up to connectionTimeout + err := s.ensureConnectionReady() + if err != nil { + log.Errorf("Worker %s: Async stream creation failed (connection failure) %v", s.workerID, err) + result = streamCreationResult{info: nil, err: err} + } else { + // Create per-stream context derived from destinations context + streamCtx, streamCancel := context.WithCancel(s.destinationsContext.Context()) + + // Create the stream, shouldn't block at this point. + stream, err := s.client.LogsStream(streamCtx) + + if err != nil { + streamCancel() + log.Errorf("Worker %s: Async stream creation failed (post-connection): %v", s.workerID, err) + result = streamCreationResult{info: nil, err: err} + } else { + log.Infof("Worker %s: Async stream creation succeeded", s.workerID) + result = streamCreationResult{ + info: &streamInfo{ + stream: stream, + ctx: streamCtx, + cancel: streamCancel, + }, + err: nil, + } + } + } + + // Signal result to supervisor (blocks until received or stopped) + select { + case s.streamReadyCh <- result: + case <-s.stopChan: + // Worker stopped before supervisor could receive result + // We own cleanup since supervisor never got the stream + if result.info != nil { + s.closeStream(result.info) + } + } + }() +} + +func (s *streamWorker) ensureConnectionReady() error { + // Skip connection check if conn is nil (for testing with mock clients) + if s.conn == nil { + return nil + } + + connCtx, cancel := context.WithTimeout(s.destinationsContext.Context(), connectionTimeout) + defer cancel() + + // Nudge dialing if idle; doesn't block + s.conn.Connect() + + for { + state := s.conn.GetState() + switch state { + case connectivity.Ready: + return nil + case connectivity.Shutdown: + return errors.New("gRPC conn is shutdown") + } + // Wait for state change or timeout/cancel. + if !s.conn.WaitForStateChange(connCtx, state) { + // context done (timeout or cancellation) + return connCtx.Err() + } + } +} + +// closeStream safely closes a stream and cancels its context +func (s *streamWorker) closeStream(streamInfo *streamInfo) { + if streamInfo != nil { + if err := streamInfo.stream.CloseSend(); err != nil { + log.Debugf("Worker %s: Error closing stream send: %v", s.workerID, err) + } + streamInfo.cancel() + } +} + +// receiverLoop runs in the receiver goroutine to process server responses for a specific stream +// The receiver is stateless - it only forwards acks/errors to the supervisor +// This goroutine exits when the stream fails (after signaling the supervisor) +func (s *streamWorker) receiverLoop(streamInfo *streamInfo) { + stream := streamInfo.stream + for { + msg, err := stream.Recv() + if err == nil { + // Normal message (batch acknowledgment) - forward to supervisor + s.signalBatchAck(streamInfo, msg) + continue + } + + // Clean inbound close (server OK in trailers): policy = signal receiver failure + if errors.Is(err, io.EOF) { + log.Warnf("Worker %s: Stream closed by server", s.workerID) + s.signalRecvFailure(streamInfo) + return + } + + // Local cancel/deadline (supervisor rotated, worker shutdown): just exit + ctxErr := streamInfo.ctx.Err() + if errors.Is(ctxErr, context.Canceled) || errors.Is(ctxErr, context.DeadlineExceeded) { + log.Infof("Worker %s: Stream context cancelled, receiver exiting", s.workerID) + return + } + + // Stream-level gRPC status (non-OK): RPC is over → signal receiver failure or block terminal + if st, ok := status.FromError(err); ok { + switch st.Code() { + case codes.Unauthenticated, codes.PermissionDenied: + // Terminal until fixed; do not signal receiver failure here + s.handleIrrecoverableError("auth/perm: "+st.Message(), streamInfo) + return + case codes.InvalidArgument, codes.FailedPrecondition, codes.OutOfRange, codes.Unimplemented: + // Terminal protocol/semantic issue; do not signal receiver failure + s.handleIrrecoverableError("protocol: "+st.Message(), streamInfo) + return + default: + // All other non-OK statuses: signal receiver failure + log.Warnf("Worker %s: gRPC error (code %v): %v", s.workerID, st.Code(), err) + s.signalRecvFailure(streamInfo) + return + } + } + + // Transport error without status (RST/GOAWAY/TLS, socket close): signal receiver failure + log.Warnf("Worker %s: Transport error: %v", s.workerID, err) + s.signalRecvFailure(streamInfo) + return + } +} + +// signalRecvFailure signals the supervisor to rotate the stream +func (s *streamWorker) signalRecvFailure(streamInfo *streamInfo) { + // This signaling is blocking by design, it's okey to block the receiver, + // since the only way we get here is through an irrecoverable error. + select { + case s.recvFailureCh <- streamInfo: + case <-s.stopChan: + } +} + +// signalBatchAck forwards a batch acknowledgment to the supervisor +// If the worker is stopped, returns without delivering (shutdown is in progress anyway) +func (s *streamWorker) signalBatchAck(streamInfo *streamInfo, msg *BatchStatus) { + select { + case s.batchAckCh <- &batchAck{stream: streamInfo, status: msg}: + case <-s.stopChan: + } +} + +// handleIrrecoverableError are errors that shouldn't be retried, and ideally +// should be block the ingestion, until the error is resolved. +func (s *streamWorker) handleIrrecoverableError(_ string, streamInfo *streamInfo) { + // Currently this is treated as stream error, which will trigger a stream rotation + // and retry of the same payload, which loops on. this IS NOT the desired behavior. + // TODO: Implement proper handling of irrecoverable errors, by blocking the ingestion + s.signalRecvFailure(streamInfo) +} + +// payloadToBatch converts a message payload to a StatefulBatch +// The payload.Encoded contains serialized DatumSequence (from batch_strategy) +func (s *streamWorker) payloadToBatch(payload *message.Payload, batchID uint32) *StatefulBatch { + batch := &StatefulBatch{ + BatchId: batchID, + Data: payload.Encoded, + } + + return batch +} + +// createStoppedTimer creates a timer that is stopped and has its channel drained +func createStoppedTimer(clk clock.Clock, d time.Duration) *clock.Timer { + t := clk.Timer(d) + if !t.Stop() { + <-t.C + } + return t +} diff --git a/pkg/logs/sender/grpc/stream_worker_test.go b/pkg/logs/sender/grpc/stream_worker_test.go new file mode 100644 index 000000000000..3fb08a5a4cb8 --- /dev/null +++ b/pkg/logs/sender/grpc/stream_worker_test.go @@ -0,0 +1,789 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build test + +package grpc + +import ( + "context" + "errors" + "io" + "sync" + "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + + "github.com/DataDog/datadog-agent/comp/logs/agent/config" + "github.com/DataDog/datadog-agent/pkg/logs/client" + "github.com/DataDog/datadog-agent/pkg/logs/message" +) + +const ( + testTimeout = 100 * time.Millisecond + testTickInterval = 10 * time.Millisecond + testShortWait = 50 * time.Millisecond +) + +// mockSink implements sender.Sink for testing +type mockSink struct { + outputChan chan *message.Payload +} + +func newMockSink() *mockSink { + return &mockSink{ + outputChan: make(chan *message.Payload, 100), + } +} + +func (m *mockSink) Channel() chan *message.Payload { + return m.outputChan +} + +// mockLogsStream implements StatefulLogsService_LogsStreamClient for testing +type mockLogsStream struct { + grpc.ClientStream + + mu sync.Mutex + + // Channels for communication + sendCh chan *StatefulBatch // Batches sent by client + recvCh chan *BatchStatus // Acks to send to client + errCh chan error // To inject immediate errors in Recv() + + // Error control + sendErr error // If set, next Send() will return this error + recvErr error // If set, next Recv() will return this error + + // Track sent batches + sentBatches []*StatefulBatch + + // Context + ctx context.Context +} + +func newMockLogsStream(ctx context.Context) *mockLogsStream { + return &mockLogsStream{ + sendCh: make(chan *StatefulBatch, 100), + recvCh: make(chan *BatchStatus, 100), + errCh: make(chan error, 1), + sentBatches: make([]*StatefulBatch, 0), + ctx: ctx, + } +} + +func (m *mockLogsStream) Send(batch *StatefulBatch) error { + m.mu.Lock() + if m.sendErr != nil { + err := m.sendErr + m.mu.Unlock() + return err + } + m.mu.Unlock() + + select { + case m.sendCh <- batch: + m.mu.Lock() + m.sentBatches = append(m.sentBatches, batch) + m.mu.Unlock() + return nil + case <-m.ctx.Done(): + return m.ctx.Err() + } +} + +func (m *mockLogsStream) Recv() (*BatchStatus, error) { + m.mu.Lock() + if m.recvErr != nil { + err := m.recvErr + m.mu.Unlock() + return nil, err + } + m.mu.Unlock() + + select { + case ack := <-m.recvCh: + return ack, nil + case err := <-m.errCh: + return nil, err + case <-m.ctx.Done(): + return nil, m.ctx.Err() + } +} + +func (m *mockLogsStream) CloseSend() error { + return nil +} + +// Helper to set send error +func (m *mockLogsStream) setSendError(err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.sendErr = err +} + +// Helper to send an ack to the client +func (m *mockLogsStream) sendAck(batchID int32) { + m.recvCh <- &BatchStatus{ + BatchId: batchID, + } +} + +// Helper to inject an error immediately (unblocks Recv()) +func (m *mockLogsStream) injectRecvError(err error) { + m.errCh <- err +} + +// Helper to get sent batch count +func (m *mockLogsStream) getSentBatchCount() int { + m.mu.Lock() + defer m.mu.Unlock() + return len(m.sentBatches) +} + +// mockLogsClient implements StatefulLogsServiceClient for testing +type mockLogsClient struct { + mu sync.Mutex + + // Control stream creation + createStreamErr error // If set, LogsStream() will return this error + failStreamCreationFor int // Fail the next N stream creation attempts + currentStream *mockLogsStream + streamCtx context.Context + streamCancel context.CancelFunc +} + +func newMockLogsClient() *mockLogsClient { + return &mockLogsClient{} +} + +func (m *mockLogsClient) LogsStream(ctx context.Context, _ ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) { + m.mu.Lock() + defer m.mu.Unlock() + + // Check counter-based failure first + if m.failStreamCreationFor > 0 { + m.failStreamCreationFor-- + err := m.createStreamErr + // Clear error when counter reaches 0 + if m.failStreamCreationFor == 0 { + m.createStreamErr = nil + } + return nil, err + } + + // Check error-based failure (only if counter is not in use) + if m.createStreamErr != nil { + return nil, m.createStreamErr + } + + // Create a new stream with a child context + m.streamCtx, m.streamCancel = context.WithCancel(ctx) + m.currentStream = newMockLogsStream(m.streamCtx) + return m.currentStream, nil +} + +// Helper to fail the next N stream creation attempts +func (m *mockLogsClient) failNextStreamCreations(count int, err error) { + m.mu.Lock() + defer m.mu.Unlock() + m.failStreamCreationFor = count + m.createStreamErr = err +} + +// Helper to get current stream +func (m *mockLogsClient) getCurrentStream() *mockLogsStream { + m.mu.Lock() + defer m.mu.Unlock() + return m.currentStream +} + +// testFixture holds all the components needed for testing +type testFixture struct { + t *testing.T + mockClock *clock.Mock + mockClient *mockLogsClient + mockSink *mockSink + inputChan chan *message.Payload + outputChan chan *message.Payload + destCtx *client.DestinationsContext + endpoint config.Endpoint + streamLifetime time.Duration + worker *streamWorker +} + +// newTestFixture creates all the test infrastructure +func newTestFixture(t *testing.T) *testFixture { + // Create mock client + mockClient := newMockLogsClient() + + // Create mock sink + mockSink := newMockSink() + + // Create input channel + inputChan := make(chan *message.Payload, 100) + + // Create mock destination context + destCtx := client.NewDestinationsContext() + destCtx.Start() + + // Create endpoint config with test backoff settings + endpoint := config.Endpoint{ + BackoffFactor: 2.0, + BackoffBase: 1.0, + BackoffMax: 10.0, + RecoveryInterval: 2, + RecoveryReset: false, + } + + // Create mock clock + mockClock := clock.NewMock() + + fixture := &testFixture{ + t: t, + mockClock: mockClock, + mockClient: mockClient, + mockSink: mockSink, + inputChan: inputChan, + outputChan: mockSink.outputChan, + destCtx: destCtx, + endpoint: endpoint, + streamLifetime: 10 * time.Second, + } + + return fixture +} + +// createWorker creates a streamWorker with the fixture's components +func (f *testFixture) createWorker() *streamWorker { + return f.createWorkerWithInflight(nil) // nil = use default maxInflight +} + +// createWorkerWithInflight creates a streamWorker with custom inflight capacity for testing +func (f *testFixture) createWorkerWithInflight(inflight *inflightTracker) *streamWorker { + worker := newStreamWorkerWithClock( + "test-worker", + f.inputChan, + f.destCtx, + nil, // conn not needed with mock client + f.mockClient, + f.mockSink, + f.endpoint, + f.streamLifetime, + f.mockClock, + inflight, + ) + f.worker = worker + return worker +} + +// cleanup shuts down all resources +func (f *testFixture) cleanup() { + if f.worker != nil { + // Check if worker is still running before stopping + select { + case <-f.worker.done: + // Already stopped + default: + f.worker.stop() + } + } + if f.destCtx != nil { + f.destCtx.Stop() + } +} + +// Helper to create test payload for stream worker tests +func createWorkerTestPayload(content string) *message.Payload { + return &message.Payload{ + Encoded: []byte(content), + MessageMetas: []*message.MessageMetadata{ + { + RawDataLen: len(content), + }, + }, + } +} + +// TestStreamWorkerBasicStartStop tests the basic lifecycle +func TestStreamWorkerBasicStartStop(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + + // Start the worker + worker.start() + + // Wait for stream to become active (mocked stream creation should be quick) + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval, "Worker should transition to active state") + + // Verify stream was created + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream, "Stream should be created") + + // Stop the worker + worker.stop() + + // Verify clean shutdown + select { + case <-worker.done: + // Success + case <-time.After(testTimeout): + t.Fatal("Worker did not shut down in time") + } +} + +// TestStreamWorkerSendReceive tests basic message flow from input to output +func TestStreamWorkerSendReceive(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream) + + // Send one message + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for message to be sent to stream + require.Eventually(t, func() bool { + return stream.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Send ack for batch 1 + stream.sendAck(1) + + // Verify message appears in output channel + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerReceiverFailureRotation tests stream rotation on receiver failure +// with an inflight message that gets re-sent on the new stream +func TestStreamWorkerReceiverFailureRotation(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Send 1 message + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for message to be sent to stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Give receiverLoop time to enter Recv() and block + time.Sleep(testShortWait) + + // Inject receiver error immediately (this unblocks Recv() and triggers stream rotation) + // Note: We do NOT send an ack, so the message stays inflight + stream1.injectRecvError(io.EOF) + + // Wait for rotation to complete (stream changes and state is active again) + // Note: Rotation is very fast with mocks, so we just check for the new stream + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete stream rotation with new stream") + + // The inflight message should be re-sent on the new stream (after rotation reset, it's batch 1 again) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream") + + // Send ack for batch 1 on new stream + stream2.sendAck(1) + + // Verify message appears in output channel + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack on new stream") + } +} + +// TestStreamWorkerStreamTimeout tests stream rotation triggered by stream timer expiration +func TestStreamWorkerStreamTimeout(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Advance clock past stream lifetime to trigger stream timeout + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + // Wait for rotation to complete (new stream created and active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should rotate to new stream after timer expires") + + // Send a message on the new stream + payload := createWorkerTestPayload("test on stream2") + fixture.inputChan <- payload + + // Wait for message to be sent on stream2 + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be sent on new stream") + + // Send ack + stream2.sendAck(1) + + // Verify message appears in output + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerStreamTimeoutWithDrain tests graceful rotation when stream timer expires with inflight messages +func TestStreamWorkerStreamTimeoutWithDrain(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Step 1: Send 1 message on stream1, don't send ack + payload1 := createWorkerTestPayload("message 1") + fixture.inputChan <- payload1 + + // Wait for message to be sent on stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Step 2 & 3: Advance clock to trigger stream timeout, verify draining state + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + // Should transition to draining (not connecting) because there's an unacked message + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Should transition to draining state with unacked messages") + + // Step 4: Send another message, verify it's buffered (NOT sent on stream1) + payload2 := createWorkerTestPayload("message 2") + fixture.inputChan <- payload2 + + // Give time for message to be processed if it was going to be sent + time.Sleep(testShortWait) + + // stream1 should still only have 1 batch sent + require.Equal(t, 1, stream1.getSentBatchCount(), "Message 2 should be buffered, not sent on stream1") + + // Step 5 & 6 & 7: Send ack for batch 1, verify it appears in output + stream1.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload1, output, "First message should appear in output") + case <-time.After(testTimeout): + t.Fatal("Message 1 should appear in outputChan after ack") + } + + // Step 8: Verify stream2 is created (draining → connecting → active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete rotation to new stream after ack received") + + // Step 9: Verify message 2 is sent on stream2 (batch ID resets to 1 after rotation) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Buffered message 2 should be sent on new stream") + + // Send ack for batch 1 on stream2 to verify it's the second message + stream2.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload2, output, "Second message should appear in output") + case <-time.After(testTimeout): + t.Fatal("Message 2 should appear in outputChan after ack on stream2") + } +} + +// TestStreamWorkerDrainTimeout tests forced rotation when drain timer expires without receiving all acks +func TestStreamWorkerDrainTimeout(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream1 := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream1) + + // Step 1: Send message on stream1, don't send ack (stays inflight) + payload := createWorkerTestPayload("message 1") + fixture.inputChan <- payload + + // Wait for message to be sent on stream1 + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval) + + // Step 2: Advance clock to trigger stream timeout → enter draining + fixture.mockClock.Add(fixture.streamLifetime + time.Second) + + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Should transition to draining state") + + // Step 3: Advance clock to trigger drain timeout (without sending ack) → force rotation + fixture.mockClock.Add(drainTimeout + time.Second) + + // Step 4: Verify stream2 is created (draining → connecting → active) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Should complete rotation to new stream after drain timeout") + + // Step 5: Verify batch 1 is re-sent on stream2 (inflight message replayed) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Inflight message should be re-sent on new stream") + + // Send ack for batch 1 on stream2 + stream2.sendAck(1) + + // Verify message appears in output + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack on new stream") + } +} + +// TestStreamWorkerBackoff tests exponential backoff on stream creation failure +func TestStreamWorkerBackoff(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + + // Configure mock to fail stream creation once, then succeed + testErr := errors.New("simulated stream creation failure") + fixture.mockClient.failNextStreamCreations(1, testErr) + + // Start worker (will attempt to create stream and should fail) + worker.start() + + // Should fail to create stream and enter disconnected state + require.Eventually(t, func() bool { + return worker.streamState == disconnected + }, testTimeout, testTickInterval, "Should transition to disconnected state after stream creation failure") + + // Verify no stream was created + require.Nil(t, fixture.mockClient.getCurrentStream(), "No stream should be created on error") + + // Advance clock gradually to trigger backoff timer and verify stream is established + // For first error, backoff is between 1-2 seconds (base=1s, factor=2, jitter) + var stream *mockLogsStream + require.Eventually(t, func() bool { + fixture.mockClock.Add(500 * time.Millisecond) + stream = fixture.mockClient.getCurrentStream() + return stream != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Should transition to active state after backoff expires") + + // Verify we can send a message on the new stream + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + require.Eventually(t, func() bool { + return stream.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be sent on new stream") + + stream.sendAck(1) + + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after ack") + } +} + +// TestStreamWorkerBackpressure verifies that inputChan blocks when inflight is full +func TestStreamWorkerBackpressure(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + // Use small inflight capacity for fast test + smallInflight := newInflightTracker(5) + worker := fixture.createWorkerWithInflight(smallInflight) + worker.start() + + // Wait for active state + require.Eventually(t, func() bool { + return worker.streamState == active + }, testTimeout, testTickInterval) + + stream := fixture.mockClient.getCurrentStream() + require.NotNil(t, stream) + + // Send 5 messages (don't send acks, so they stay in "sent" state and fill inflight) + for i := 0; i < 5; i++ { + fixture.inputChan <- createWorkerTestPayload("test") + } + + // Wait for inflight to be full + require.Eventually(t, func() bool { + return !worker.inflight.hasSpace() + }, testTimeout, testTickInterval, "Inflight should be full") + + // Verify backpressure: send one more message, it should NOT be consumed + fixture.inputChan <- createWorkerTestPayload("blocked") + time.Sleep(testShortWait) + require.Equal(t, 1, len(fixture.inputChan), "Message should remain in inputChan due to backpressure") + + // Send ack for batch 1 to free up space + stream.sendAck(1) + + // Verify backpressure released: the blocked message should now be consumed + require.Eventually(t, func() bool { + return len(fixture.inputChan) == 0 + }, testTimeout, testTickInterval, "Message should be consumed after ack frees space") +} + +// TestStreamWorkerErrorRecovery tests that Send() and Recv() failures trigger rotation and retry +func TestStreamWorkerErrorRecovery(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + worker := fixture.createWorker() + worker.start() + + // Wait for initial stream to be active + var stream1 *mockLogsStream + require.Eventually(t, func() bool { + stream1 = fixture.mockClient.getCurrentStream() + return stream1 != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should reach active state") + + // Inject send error BEFORE sending message + stream1.setSendError(errors.New("simulated send failure")) + + // Send message - this will trigger Send() failure and rotation + payload := createWorkerTestPayload("test message") + fixture.inputChan <- payload + + // Wait for stream rotation (new stream created) + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should rotate to new stream after send error") + + // New stream should have retried the message (batch 1) + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be retried on new stream") + + // Send ack on new stream + stream2.sendAck(1) + + // Verify message appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, payload, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after rotation and ack") + } + + // Part 2: Test injectRecvError with retriable gRPC error + // Inject recv error (codes.Unavailable falls into default case -> rotation) + stream2.injectRecvError(status.Error(codes.Unavailable, "simulated unavailable error")) + + // Send another message + payload2 := createWorkerTestPayload("test message 2") + fixture.inputChan <- payload2 + + // Wait for stream rotation (new stream created) + var stream3 *mockLogsStream + require.Eventually(t, func() bool { + stream3 = fixture.mockClient.getCurrentStream() + return stream3 != nil && stream3 != stream2 && worker.streamState == active + }, testTimeout, testTickInterval, "Worker should rotate to new stream after recv error") + + // New stream should have retried the message (batch 1 - reset after rotation) + require.Eventually(t, func() bool { + return stream3.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Message should be retried on new stream after recv error") + + // Send ack on new stream + stream3.sendAck(1) + + // Verify message appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, payload2, output) + case <-time.After(testTimeout): + t.Fatal("Message should appear in outputChan after recv error rotation and ack") + } +} diff --git a/pkg/logs/sender/grpc/streamstate_string.go b/pkg/logs/sender/grpc/streamstate_string.go new file mode 100644 index 000000000000..6081ed22284e --- /dev/null +++ b/pkg/logs/sender/grpc/streamstate_string.go @@ -0,0 +1,27 @@ +// Code generated by "stringer -type=streamState"; DO NOT EDIT. + +package grpc + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[disconnected-0] + _ = x[connecting-1] + _ = x[active-2] + _ = x[draining-3] +} + +const _streamState_name = "disconnectedconnectingactivedraining" + +var _streamState_index = [...]uint8{0, 12, 22, 28, 36} + +func (i streamState) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_streamState_index)-1 { + return "streamState(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _streamState_name[_streamState_index[idx]:_streamState_index[idx+1]] +} diff --git a/pkg/logs/sender/message_buffer.go b/pkg/logs/sender/message_buffer.go index ad2112193eaa..b1185e16ade7 100644 --- a/pkg/logs/sender/message_buffer.go +++ b/pkg/logs/sender/message_buffer.go @@ -28,8 +28,15 @@ func NewMessageBuffer(batchSizeLimit int, contentSizeLimit int) *MessageBuffer { // returns true if the message was added. func (p *MessageBuffer) AddMessage(message *message.Message) bool { contentSize := len(message.GetContent()) + return p.AddMessageWithSize(&message.MessageMetadata, contentSize) +} + +// AddMessageWithSize adds a message to the buffer if there is still some free space, +// returns true if the message was added. +// As input it takes directly metadata and content size, instead of a message. +func (p *MessageBuffer) AddMessageWithSize(metadata *message.MessageMetadata, contentSize int) bool { if len(p.messageBuffer) < cap(p.messageBuffer) && p.contentSize+contentSize <= p.contentSizeLimit { - meta := message.MessageMetadata // Copy metadata instead of taking reference + meta := *metadata // Copy metadata instead of taking reference p.messageBuffer = append(p.messageBuffer, &meta) p.contentSize += contentSize return true From a5526c1acf3573c14b0d1c6f0dee1b2f87ae561d Mon Sep 17 00:00:00 2001 From: Joy Zhang Date: Wed, 5 Nov 2025 14:17:04 +0100 Subject: [PATCH 10/16] Handle state management and snapshot transmission --- pkg/logs/message/message.go | 2 + pkg/logs/sender/grpc/batch_strategy.go | 33 +++ pkg/logs/sender/grpc/batch_strategy_test.go | 190 ++++++++++++++ pkg/logs/sender/grpc/inflight.go | 106 +++++++- pkg/logs/sender/grpc/stream_worker.go | 50 +++- pkg/logs/sender/grpc/stream_worker_test.go | 263 ++++++++++++++++++++ 6 files changed, 626 insertions(+), 18 deletions(-) diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index a8f090390aea..448597f24062 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -38,6 +38,8 @@ type Payload struct { Encoding string // The size of the unencoded payload UnencodedSize int + // Extra information for Stateful gRPC streaming (batch-level state changes) + StatefulExtra any } // NewPayload creates a new payload with the given message metadata, encoded content, encoding type and unencoded size diff --git a/pkg/logs/sender/grpc/batch_strategy.go b/pkg/logs/sender/grpc/batch_strategy.go index 57b3775ab754..c2636dc0eb5d 100644 --- a/pkg/logs/sender/grpc/batch_strategy.go +++ b/pkg/logs/sender/grpc/batch_strategy.go @@ -24,6 +24,24 @@ var ( tlmDroppedTooLarge = telemetry.NewCounter("logs_sender_grpc_batch_strategy", "dropped_too_large", []string{"pipeline"}, "Number of payloads dropped due to being too large") ) +// StatefulExtra holds state changes (non-Log datums) from a batch +// Used by inflight tracker to maintain snapshot state for stream rotation +type StatefulExtra struct { + StateChanges []*Datum +} + +// isStateDatum returns true if the datum represents a state change +// (pattern/dict define/delete operations) +func isStateDatum(datum *Datum) bool { + switch datum.Data.(type) { + case *Datum_PatternDefine, *Datum_PatternDelete, + *Datum_DictEntryDefine, *Datum_DictEntryDelete: + return true + default: + return false + } +} + // batchStrategy contains batching logic for gRPC sender without serializer // It collects Datum objects from StatefulMessages and creates Payload with serialized DatumSequence // Note: Serverless logs are not supported in this PoC implementation @@ -206,6 +224,14 @@ func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.Messa unencodedSize += msgMeta.RawDataLen } + // Extract all state changes from this batch for snapshot management + var stateChanges []*Datum + for _, datum := range grpcDatums { + if isStateDatum(datum) { + stateChanges = append(stateChanges, datum) + } + } + // Create DatumSequence and marshal to bytes datumSeq := &DatumSequence{ Data: grpcDatums, @@ -232,6 +258,13 @@ func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.Messa UnencodedSize: unencodedSize, } + // Store batch-level state changes in payload + if len(stateChanges) > 0 { + p.StatefulExtra = &StatefulExtra{ + StateChanges: stateChanges, + } + } + outputChan <- p s.pipelineMonitor.ReportComponentEgress(p, metrics.StrategyTlmName, s.instanceID) s.pipelineMonitor.ReportComponentIngress(p, metrics.SenderTlmName, metrics.SenderTlmInstanceID) diff --git a/pkg/logs/sender/grpc/batch_strategy_test.go b/pkg/logs/sender/grpc/batch_strategy_test.go index 54ab52745cc4..d324bb5f29f4 100644 --- a/pkg/logs/sender/grpc/batch_strategy_test.go +++ b/pkg/logs/sender/grpc/batch_strategy_test.go @@ -467,3 +467,193 @@ func TestBatchStrategyCompression(t *testing.T) { strategy.Stop() } + +// TestBatchStrategyStatefulExtra tests that state changes are correctly tracked in StatefulExtra +func TestBatchStrategyStatefulExtra(t *testing.T) { + input := make(chan *message.StatefulMessage) + output := make(chan *message.Payload, 10) // Buffered to prevent blocking + flushChan := make(chan struct{}) + timerInterval := 100 * time.Millisecond + + clk := clock.NewMock() + strategy := newBatchStrategyWithClock( + input, + output, + flushChan, + timerInterval, + 10, // maxBatchSize - large enough to not trigger size-based flush + 1000, + "test", + clk, + compressionfx.NewMockCompressor().NewCompressor(compression.NoneKind, 1), + metrics.NewNoopPipelineMonitor(""), + "test") + strategy.Start() + + // Helper to create state change messages + createPatternDefineMsg := func(id uint64, template string) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &Datum{ + Data: &Datum_PatternDefine{ + PatternDefine: &PatternDefine{ + PatternId: id, + Template: template, + }, + }, + }, + } + } + + createDictEntryDefineMsg := func(id uint64, value string) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &Datum{ + Data: &Datum_DictEntryDefine{ + DictEntryDefine: &DictEntryDefine{ + Id: id, + Value: value, + }, + }, + }, + } + } + + createPatternDeleteMsg := func(id uint64) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &Datum{ + Data: &Datum_PatternDelete{ + PatternDelete: &PatternDelete{ + PatternId: id, + }, + }, + }, + } + } + + createDictEntryDeleteMsg := func(id uint64) *message.StatefulMessage { + msg := message.NewMessage([]byte(""), nil, "", 0) + msg.MessageMetadata.RawDataLen = 0 + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &Datum{ + Data: &Datum_DictEntryDelete{ + DictEntryDelete: &DictEntryDelete{ + Id: id, + }, + }, + }, + } + } + + createLogMsg := func(content string) *message.StatefulMessage { + msg := message.NewMessage([]byte(content), nil, "", 0) + msg.MessageMetadata.RawDataLen = len(content) + return &message.StatefulMessage{ + Metadata: &msg.MessageMetadata, + Datum: &Datum{ + Data: &Datum_Logs{ + Logs: &Log{ + Timestamp: 12345, + Content: &Log_Raw{ + Raw: content, + }, + }, + }, + }, + } + } + + // Send all 14 events in sequence + // Batch 1 (5 entries): add p1, add d1, log, add p2, add d2 + input <- createPatternDefineMsg(1, "pattern1") + input <- createDictEntryDefineMsg(1, "value1") + input <- createLogMsg("log with p1/d1") + input <- createPatternDefineMsg(2, "pattern2") + input <- createDictEntryDefineMsg(2, "value2") + + // Advance clock to trigger timer-based flush for batch 1 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 1 + payload1 := <-output + require.Equal(t, 5, len(payload1.MessageMetas), "Batch 1 should have 5 messages") + + // Verify StatefulExtra for Batch 1 + require.NotNil(t, payload1.StatefulExtra, "Batch 1 should have StatefulExtra") + extra1, ok := payload1.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 4, len(extra1.StateChanges), "Batch 1 should have 4 state changes") + + // Check specific state changes in Batch 1 + assert.Equal(t, uint64(1), extra1.StateChanges[0].GetPatternDefine().PatternId) + assert.Equal(t, "pattern1", extra1.StateChanges[0].GetPatternDefine().Template) + assert.Equal(t, uint64(1), extra1.StateChanges[1].GetDictEntryDefine().Id) + assert.Equal(t, "value1", extra1.StateChanges[1].GetDictEntryDefine().Value) + assert.Equal(t, uint64(2), extra1.StateChanges[2].GetPatternDefine().PatternId) + assert.Equal(t, "pattern2", extra1.StateChanges[2].GetPatternDefine().Template) + assert.Equal(t, uint64(2), extra1.StateChanges[3].GetDictEntryDefine().Id) + assert.Equal(t, "value2", extra1.StateChanges[3].GetDictEntryDefine().Value) + + // Batch 2 (6 entries): log, del p1, del d1, add p3, add d3, log + input <- createLogMsg("log with p2/d2") + input <- createPatternDeleteMsg(1) + input <- createDictEntryDeleteMsg(1) + input <- createPatternDefineMsg(3, "pattern3") + input <- createDictEntryDefineMsg(3, "value3") + input <- createLogMsg("log with p3/d3") + + // Advance clock to trigger timer-based flush for batch 2 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 2 + payload2 := <-output + require.Equal(t, 6, len(payload2.MessageMetas), "Batch 2 should have 6 messages") + + // Verify StatefulExtra for Batch 2 + require.NotNil(t, payload2.StatefulExtra, "Batch 2 should have StatefulExtra") + extra2, ok := payload2.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 4, len(extra2.StateChanges), "Batch 2 should have 4 state changes") + + // Check specific state changes in Batch 2 + assert.Equal(t, uint64(1), extra2.StateChanges[0].GetPatternDelete().PatternId) + assert.Equal(t, uint64(1), extra2.StateChanges[1].GetDictEntryDelete().Id) + assert.Equal(t, uint64(3), extra2.StateChanges[2].GetPatternDefine().PatternId) + assert.Equal(t, "pattern3", extra2.StateChanges[2].GetPatternDefine().Template) + assert.Equal(t, uint64(3), extra2.StateChanges[3].GetDictEntryDefine().Id) + assert.Equal(t, "value3", extra2.StateChanges[3].GetDictEntryDefine().Value) + + // Batch 3 (3 entries): add p4, add d4, log + input <- createPatternDefineMsg(4, "pattern4") + input <- createDictEntryDefineMsg(4, "value4") + input <- createLogMsg("log with p4/d4") + + // Advance clock to trigger timer-based flush for batch 3 + clk.Add(2 * timerInterval) + + // Receive and verify Batch 3 + payload3 := <-output + require.Equal(t, 3, len(payload3.MessageMetas), "Batch 3 should have 3 messages") + + // Verify StatefulExtra for Batch 3 + require.NotNil(t, payload3.StatefulExtra, "Batch 3 should have StatefulExtra") + extra3, ok := payload3.StatefulExtra.(*StatefulExtra) + require.True(t, ok, "StatefulExtra should be of type *StatefulExtra") + require.Equal(t, 2, len(extra3.StateChanges), "Batch 3 should have 2 state changes") + + // Check specific state changes in Batch 3 + assert.Equal(t, uint64(4), extra3.StateChanges[0].GetPatternDefine().PatternId) + assert.Equal(t, "pattern4", extra3.StateChanges[0].GetPatternDefine().Template) + assert.Equal(t, uint64(4), extra3.StateChanges[1].GetDictEntryDefine().Id) + assert.Equal(t, "value4", extra3.StateChanges[1].GetDictEntryDefine().Value) + + strategy.Stop() +} diff --git a/pkg/logs/sender/grpc/inflight.go b/pkg/logs/sender/grpc/inflight.go index fc79deaa892e..b42580c4563b 100644 --- a/pkg/logs/sender/grpc/inflight.go +++ b/pkg/logs/sender/grpc/inflight.go @@ -6,6 +6,8 @@ package grpc import ( + "google.golang.org/protobuf/proto" + "github.com/DataDog/datadog-agent/pkg/logs/message" ) @@ -21,22 +23,29 @@ import ( // BatchID tracking: // - Sent payloads have sequential batchIDs: [headBatchID, headBatchID+1, ..., headBatchID+sentSize-1] // - Only tracks headBatchID (oldest sent) and nextBatchID (next to be assigned) +// +// Snapshot State: +// - Maintains accumulated state changes for stream bootstrapping +// - Represents the state "before" the first payload in the queue +// - Updated when payloads are acknowledged (popped) type inflightTracker struct { items []*message.Payload - head int // Index of the oldest sent item (awaiting ack) - sentTail int // Index of the first buffered item that's not yet sent - tail int // Index of the next available slot for new buffered items - cap int // Maximum total capacity of the tracker - headBatchID uint32 // BatchID of the oldest sent payload (at head) - batchIDCounter uint32 // Next batchID to be assigned when markSent is called + head int // Index of the oldest sent item (awaiting ack) + sentTail int // Index of the first buffered item that's not yet sent + tail int // Index of the next available slot for new buffered items + cap int // Maximum total capacity of the tracker + headBatchID uint32 // BatchID of the oldest sent payload (at head) + batchIDCounter uint32 // Next batchID to be assigned when markSent is called + snapshot *snapshotState // Accumulated state for new streams } // newInflightTracker creates a new bounded inflight tracker with the given capacity // Allocates capacity+1 slots to implement the "waste one slot" ring buffer pattern func newInflightTracker(capacity int) *inflightTracker { return &inflightTracker{ - items: make([]*message.Payload, capacity+1), - cap: capacity, + items: make([]*message.Payload, capacity+1), + cap: capacity, + snapshot: newSnapshotState(), } } @@ -58,6 +67,7 @@ func (t *inflightTracker) append(payload *message.Payload) bool { // pop removes and returns the oldest sent payload (at head) after receiving an ack // Returns nil if there are no sent payloads +// Also applies any state changes from the payload to the snapshot state func (t *inflightTracker) pop() *message.Payload { if t.head == t.sentTail { return nil @@ -66,6 +76,13 @@ func (t *inflightTracker) pop() *message.Payload { t.items[t.head] = nil // Allow GC t.head = (t.head + 1) % len(t.items) + // Apply state changes from this payload to snapshot + if payload.StatefulExtra != nil { + if extra, ok := payload.StatefulExtra.(*StatefulExtra); ok { + t.snapshot.apply(extra) + } + } + // Advance headBatchID for the next payload if t.head != t.sentTail { t.headBatchID++ @@ -143,3 +160,76 @@ func (t *inflightTracker) resetOnRotation() { t.headBatchID = 1 t.batchIDCounter = 1 } + +// getSnapshot returns the current snapshot state for stream bootstrapping +// Returns serialized bytes (marshaled DatumSequence) or nil if empty +func (t *inflightTracker) getSnapshot() []byte { + return t.snapshot.serialize() +} + +// snapshotState maintains the accumulated state changes for stream bootstrapping +// It represents the state "before" the first payload in the inflight queue +type snapshotState struct { + dictMap map[uint64]*DictEntryDefine // Dictionary entries by ID + patternMap map[uint64]*PatternDefine // Patterns by ID +} + +// newSnapshotState creates a new empty snapshot state +func newSnapshotState() *snapshotState { + return &snapshotState{ + dictMap: make(map[uint64]*DictEntryDefine), + patternMap: make(map[uint64]*PatternDefine), + } +} + +// apply updates the snapshot state by processing state changes from a payload +func (s *snapshotState) apply(extra *StatefulExtra) { + if extra == nil { + return + } + + for _, datum := range extra.StateChanges { + switch d := datum.Data.(type) { + case *Datum_PatternDefine: + s.patternMap[d.PatternDefine.PatternId] = d.PatternDefine + case *Datum_PatternDelete: + delete(s.patternMap, d.PatternDelete.PatternId) + case *Datum_DictEntryDefine: + s.dictMap[d.DictEntryDefine.Id] = d.DictEntryDefine + case *Datum_DictEntryDelete: + delete(s.dictMap, d.DictEntryDelete.Id) + } + } +} + +// serialize returns the current snapshot state as serialized bytes +// Returns the marshaled DatumSequence containing all pattern and dictionary definitions +// Used to send snapshot on new stream creation +func (s *snapshotState) serialize() []byte { + // Calculate total datums needed + totalSize := len(s.patternMap) + len(s.dictMap) + + if totalSize == 0 { + return nil + } + + datums := make([]*Datum, 0, totalSize) + + for _, pattern := range s.patternMap { + datums = append(datums, &Datum{ + Data: &Datum_PatternDefine{PatternDefine: pattern}, + }) + } + for _, entry := range s.dictMap { + datums = append(datums, &Datum{ + Data: &Datum_DictEntryDefine{DictEntryDefine: entry}, + }) + } + + datumSeq := &DatumSequence{ + Data: datums, + } + + serialized, _ := proto.Marshal(datumSeq) + return serialized +} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go index f5dcdff2f416..e0f344462e78 100644 --- a/pkg/logs/sender/grpc/stream_worker.go +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -291,7 +291,7 @@ func (s *streamWorker) sendPayloads() { } batchID := s.inflight.nextBatchID() - batch := s.payloadToBatch(payload, batchID) + batch := s.createBatch(payload.Encoded, batchID) // TODO Send call can block, by TCP/HTTP2 flow controls if err := s.currentStream.stream.Send(batch); err != nil { @@ -305,6 +305,30 @@ func (s *streamWorker) sendPayloads() { } } +// sendSnapshot sends the snapshot state as batch 0 on a new stream +// Returns true if successful, initiates stream rotation and returns false if failed +func (s *streamWorker) sendSnapshot() bool { + serialized := s.inflight.getSnapshot() + + // Snapshot is empty means no state + if serialized == nil { + return true + } + + // Create batch with batchID 0 (reserved for snapshot) + batch := s.createBatch(serialized, 0) + + // Send snapshot + if err := s.currentStream.stream.Send(batch); err != nil { + log.Warnf("Worker %s: Failed to send snapshot: %v, initiating stream rotation", s.workerID, err) + s.beginStreamRotation() + return false + } + + log.Infof("Worker %s: Sent snapshot (%d bytes)", s.workerID, len(serialized)) + return true +} + // handleBatchAck processes a BatchStatus acknowledgment from the server func (s *streamWorker) handleBatchAck(ack *batchAck) { // Ignore stale acks from old streams @@ -314,6 +338,11 @@ func (s *streamWorker) handleBatchAck(ack *batchAck) { receivedBatchID := uint32(ack.status.BatchId) + // Handle snapshot/state ack (batch 0) - no payload to pop + if receivedBatchID == 0 { + return + } + // The two errors below should never happen if Intake is implemented // correctly, but we are being defensive. @@ -463,8 +492,12 @@ func (s *streamWorker) finishStreamRotation(streamInfo *streamInfo) { log.Infof("Worker %s: Stream rotation complete, now active", s.workerID) - // TODO implement: transmit the snapshot state first - // Then send the remaining buffered payloads + // Send snapshot state first (batch 0) + if !s.sendSnapshot() { + return + } + + // Then send the remaining buffered payloads (batch 1, 2, ...) if s.inflight.hasUnSent() { s.sendPayloads() } @@ -659,15 +692,12 @@ func (s *streamWorker) handleIrrecoverableError(_ string, streamInfo *streamInfo s.signalRecvFailure(streamInfo) } -// payloadToBatch converts a message payload to a StatefulBatch -// The payload.Encoded contains serialized DatumSequence (from batch_strategy) -func (s *streamWorker) payloadToBatch(payload *message.Payload, batchID uint32) *StatefulBatch { - batch := &StatefulBatch{ +// createBatch creates a StatefulBatch from serialized data and batch ID +func (s *streamWorker) createBatch(data []byte, batchID uint32) *StatefulBatch { + return &StatefulBatch{ BatchId: batchID, - Data: payload.Encoded, + Data: data, } - - return batch } // createStoppedTimer creates a timer that is stopped and has its channel drained diff --git a/pkg/logs/sender/grpc/stream_worker_test.go b/pkg/logs/sender/grpc/stream_worker_test.go index 3fb08a5a4cb8..fce20f4338d1 100644 --- a/pkg/logs/sender/grpc/stream_worker_test.go +++ b/pkg/logs/sender/grpc/stream_worker_test.go @@ -20,6 +20,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" + "google.golang.org/protobuf/proto" "github.com/DataDog/datadog-agent/comp/logs/agent/config" "github.com/DataDog/datadog-agent/pkg/logs/client" @@ -148,6 +149,16 @@ func (m *mockLogsStream) getSentBatchCount() int { return len(m.sentBatches) } +// Helper to get a specific sent batch by index +func (m *mockLogsStream) getSentBatch(index int) *StatefulBatch { + m.mu.Lock() + defer m.mu.Unlock() + if index < 0 || index >= len(m.sentBatches) { + return nil + } + return m.sentBatches[index] +} + // mockLogsClient implements StatefulLogsServiceClient for testing type mockLogsClient struct { mu sync.Mutex @@ -787,3 +798,255 @@ func TestStreamWorkerErrorRecovery(t *testing.T) { t.Fatal("Message should appear in outputChan after recv error rotation and ack") } } + +// Helper functions to create Datum objects for testing +func createPatternDefine(id uint64, template string) *Datum { + return &Datum{ + Data: &Datum_PatternDefine{ + PatternDefine: &PatternDefine{ + PatternId: id, + Template: template, + }, + }, + } +} + +func createPatternDelete(id uint64) *Datum { + return &Datum{ + Data: &Datum_PatternDelete{ + PatternDelete: &PatternDelete{ + PatternId: id, + }, + }, + } +} + +func createDictEntryDefine(id uint64, value string) *Datum { + return &Datum{ + Data: &Datum_DictEntryDefine{ + DictEntryDefine: &DictEntryDefine{ + Id: id, + Value: value, + }, + }, + } +} + +func createDictEntryDelete(id uint64) *Datum { + return &Datum{ + Data: &Datum_DictEntryDelete{ + DictEntryDelete: &DictEntryDelete{ + Id: id, + }, + }, + } +} + +// createPayloadWithState creates a payload with state changes in StatefulExtra +func createPayloadWithState(content string, stateChanges []*Datum) *message.Payload { + payload := createWorkerTestPayload(content) + if len(stateChanges) > 0 { + payload.StatefulExtra = &StatefulExtra{ + StateChanges: stateChanges, + } + } + return payload +} + +// verifySnapshotContents checks if a snapshot batch contains the expected state +func verifySnapshotContents(t *testing.T, batch *StatefulBatch, expectedPatterns map[uint64]string, expectedDictEntries map[uint64]string) { + require.NotNil(t, batch) + require.Equal(t, uint32(0), batch.BatchId, "Snapshot should have batch ID 0") + + // Deserialize the snapshot data (it's a DatumSequence) + var datumSeq DatumSequence + err := proto.Unmarshal(batch.Data, &datumSeq) + require.NoError(t, err) + + // Count what we find + foundPatterns := make(map[uint64]string) + foundDictEntries := make(map[uint64]string) + + for _, datum := range datumSeq.Data { + switch d := datum.Data.(type) { + case *Datum_PatternDefine: + foundPatterns[d.PatternDefine.PatternId] = d.PatternDefine.Template + case *Datum_DictEntryDefine: + foundDictEntries[d.DictEntryDefine.Id] = d.DictEntryDefine.Value + default: + t.Fatalf("Snapshot should only contain PatternDefine and DictEntryDefine, got: %T", datum.Data) + } + } + + require.Equal(t, expectedPatterns, foundPatterns, "Snapshot patterns mismatch") + require.Equal(t, expectedDictEntries, foundDictEntries, "Snapshot dict entries mismatch") +} + +// TestStreamWorkerSnapshot tests the snapshot functionality across stream rotations +func TestStreamWorkerSnapshot(t *testing.T) { + fixture := newTestFixture(t) + defer fixture.cleanup() + + // Override stream lifetime for this test + fixture.streamLifetime = time.Second + worker := fixture.createWorker() + worker.start() + + // Wait for initial stream to be ready + var stream1 *mockLogsStream + require.Eventually(t, func() bool { + stream1 = fixture.mockClient.getCurrentStream() + return stream1 != nil && worker.streamState == active + }, testTimeout, testTickInterval, "Initial stream should be established") + + // === Step 1: Send Batch 1 (5 entries) === + batch1StateChanges := []*Datum{ + createPatternDefine(1, "pattern1"), + createDictEntryDefine(1, "value1"), + createPatternDefine(2, "pattern2"), + createDictEntryDefine(2, "value2"), + } + batch1Payload := createPayloadWithState("log with p1/d1", batch1StateChanges) + fixture.inputChan <- batch1Payload + + // Wait for batch 1 to be sent + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 1 + }, testTimeout, testTickInterval, "Batch 1 should be sent") + + // === Step 2: Ack Batch 1 === + stream1.sendAck(1) + + // Verify batch 1 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch1Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 1 should appear in outputChan") + } + + // === Step 3: Send Batch 2 (6 entries) === + batch2StateChanges := []*Datum{ + createPatternDelete(1), + createDictEntryDelete(1), + createPatternDefine(3, "pattern3"), + createDictEntryDefine(3, "value3"), + } + batch2Payload := createPayloadWithState("log with p2/d2 and p3/d3", batch2StateChanges) + fixture.inputChan <- batch2Payload + + // Wait for batch 2 to be sent + require.Eventually(t, func() bool { + return stream1.getSentBatchCount() == 2 + }, testTimeout, testTickInterval, "Batch 2 should be sent") + + // === Step 4: Cut stream with recv failure (before acking batch 2) === + stream1.injectRecvError(io.EOF) + + // Wait for stream rotation + var stream2 *mockLogsStream + require.Eventually(t, func() bool { + stream2 = fixture.mockClient.getCurrentStream() + return stream2 != nil && stream2 != stream1 && worker.streamState == active + }, testTimeout, testTickInterval, "Stream should rotate after recv failure") + + // === Step 5: Verify snapshot on new stream === + // Snapshot should contain state BEFORE batch 2: {p1, p2, d1, d2} + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() >= 1 + }, testTimeout, testTickInterval, "Snapshot should be sent on new stream") + + snapshotBatch := stream2.getSentBatch(0) + expectedPatterns1 := map[uint64]string{ + 1: "pattern1", + 2: "pattern2", + } + expectedDictEntries1 := map[uint64]string{ + 1: "value1", + 2: "value2", + } + verifySnapshotContents(t, snapshotBatch, expectedPatterns1, expectedDictEntries1) + + // === Step 6: Ack snapshot (batch 0) === + stream2.sendAck(0) + + // === Step 7: Verify Batch 2 is retransmitted === + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 2 + }, testTimeout, testTickInterval, "Batch 2 should be retransmitted") + + batch2Retransmitted := stream2.getSentBatch(1) + require.Equal(t, uint32(1), batch2Retransmitted.BatchId) + + // === Step 8: Ack Batch 2 === + stream2.sendAck(1) + + // Verify batch 2 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch2Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 2 should appear in outputChan") + } + + // === Step 9: Send Batch 3 (3 entries) === + batch3StateChanges := []*Datum{ + createPatternDefine(4, "pattern4"), + createDictEntryDefine(4, "value4"), + } + batch3Payload := createPayloadWithState("log with p4/d4", batch3StateChanges) + fixture.inputChan <- batch3Payload + + // Wait for batch 3 to be sent + require.Eventually(t, func() bool { + return stream2.getSentBatchCount() == 3 + }, testTimeout, testTickInterval, "Batch 3 should be sent") + + // === Step 10: Stream timer expires === + fixture.mockClock.Add(time.Second) + + // Worker should enter draining state (batch 3 is still inflight) + require.Eventually(t, func() bool { + return worker.streamState == draining + }, testTimeout, testTickInterval, "Worker should enter draining state") + + // === Step 11: Drain timer expires (force rotation) === + fixture.mockClock.Add(5 * time.Second) // drainTimeout is 5 seconds + + // Wait for new stream to be created + var stream3 *mockLogsStream + require.Eventually(t, func() bool { + stream3 = fixture.mockClient.getCurrentStream() + return stream3 != nil && stream3 != stream2 && worker.streamState == active + }, testTimeout, testTickInterval, "Stream should rotate after drain timeout") + + // === Step 12: Verify snapshot on new stream === + // Snapshot should contain state AFTER batch 2, BEFORE batch 3: {p2, p3, d2, d3} + // (p1/d1 were deleted in batch 2) + require.Eventually(t, func() bool { + return stream3.getSentBatchCount() >= 1 + }, testTimeout, testTickInterval, "Snapshot should be sent on new stream") + + snapshotBatch2 := stream3.getSentBatch(0) + expectedPatterns2 := map[uint64]string{ + 2: "pattern2", + 3: "pattern3", + } + expectedDictEntries2 := map[uint64]string{ + 2: "value2", + 3: "value3", + } + verifySnapshotContents(t, snapshotBatch2, expectedPatterns2, expectedDictEntries2) + + // Ack snapshot and batch 3 + stream3.sendAck(0) + stream3.sendAck(1) + + // Verify batch 3 appears in outputChan + select { + case output := <-fixture.outputChan: + require.Equal(t, batch3Payload, output) + case <-time.After(testTimeout): + t.Fatal("Batch 3 should appear in outputChan") + } +} From 90e0d97ce62b0c00c6d00731fab6c41d7c18cfa0 Mon Sep 17 00:00:00 2001 From: Jake Saferstein Date: Wed, 5 Nov 2025 18:31:39 +0000 Subject: [PATCH 11/16] move stateful encoding proto to proper location adapted the stateful transport implementation to import the new location --- pkg/logs/message/message.go | 4 +- pkg/logs/sender/grpc/batch_strategy.go | 49 ++-- pkg/logs/sender/grpc/batch_strategy_test.go | 55 ++-- pkg/logs/sender/grpc/inflight.go | 29 +- pkg/logs/sender/grpc/mock_state.go | 9 +- pkg/logs/sender/grpc/sender.go | 5 +- .../sender/grpc/stateful_encoding_grpc.pb.go | 115 -------- pkg/logs/sender/grpc/stream_worker.go | 21 +- pkg/logs/sender/grpc/stream_worker_test.go | 73 ++--- .../datadog/stateful}/stateful_encoding.proto | 7 +- .../pbgo/statefulpb}/stateful_encoding.pb.go | 256 +++++++++++++----- tasks/protobuf.py | 1 + 12 files changed, 310 insertions(+), 314 deletions(-) delete mode 100644 pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go rename pkg/{logs/sender/grpc => proto/datadog/stateful}/stateful_encoding.proto (90%) rename pkg/{logs/sender/grpc => proto/pbgo/statefulpb}/stateful_encoding.pb.go (71%) diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 448597f24062..652b826ff015 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -12,6 +12,7 @@ import ( "time" "github.com/DataDog/datadog-agent/pkg/logs/sources" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -74,9 +75,8 @@ type Message struct { // StatefulMessage represents a log message for gRPC stateful streaming // It contains a Datum (from stateful_encoding.proto) and associated metadata -// Datum is stored as `any` to avoid import cycle with sender/grpc package type StatefulMessage struct { - Datum any // Will hold *grpc.Datum + Datum *statefulpb.Datum Metadata *MessageMetadata } diff --git a/pkg/logs/sender/grpc/batch_strategy.go b/pkg/logs/sender/grpc/batch_strategy.go index c2636dc0eb5d..3bac15e1c61b 100644 --- a/pkg/logs/sender/grpc/batch_strategy.go +++ b/pkg/logs/sender/grpc/batch_strategy.go @@ -15,6 +15,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/telemetry" "github.com/DataDog/datadog-agent/pkg/util/compression" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -27,15 +28,15 @@ var ( // StatefulExtra holds state changes (non-Log datums) from a batch // Used by inflight tracker to maintain snapshot state for stream rotation type StatefulExtra struct { - StateChanges []*Datum + StateChanges []*statefulpb.Datum } // isStateDatum returns true if the datum represents a state change // (pattern/dict define/delete operations) -func isStateDatum(datum *Datum) bool { +func isStateDatum(datum *statefulpb.Datum) bool { switch datum.Data.(type) { - case *Datum_PatternDefine, *Datum_PatternDelete, - *Datum_DictEntryDefine, *Datum_DictEntryDelete: + case *statefulpb.Datum_PatternDefine, *statefulpb.Datum_PatternDelete, + *statefulpb.Datum_DictEntryDefine, *statefulpb.Datum_DictEntryDelete: return true default: return false @@ -57,7 +58,7 @@ type batchStrategy struct { clock clock.Clock // For gRPC: store Datums separately since MessageBuffer only stores metadata - grpcDatums []*Datum + grpcDatums []*statefulpb.Datum // Telemetry pipelineMonitor metrics.PipelineMonitor @@ -103,7 +104,7 @@ func newBatchStrategyWithClock(inputChan chan *message.StatefulMessage, stopChan: make(chan struct{}), pipelineName: pipelineName, clock: clock, - grpcDatums: make([]*Datum, 0), + grpcDatums: make([]*statefulpb.Datum, 0), pipelineMonitor: pipelineMonitor, utilization: pipelineMonitor.MakeUtilizationMonitor(metrics.StrategyTlmName, instanceID), instanceID: instanceID, @@ -144,27 +145,23 @@ func (s *batchStrategy) Start() { }() } -func (s *batchStrategy) addMessage(m *message.StatefulMessage) (bool, error) { +func (s *batchStrategy) addMessage(m *message.StatefulMessage) bool { // No utilization tracking here - just trivial slice operations // Real work (proto marshaling) is tracked in sendMessagesWithDatums() - // Validate Datum first + // Defensive check - should never happen with proper message construction if m.Datum == nil { - return false, log.Errorf("StatefulMessage has nil Datum") - } - datum, ok := m.Datum.(*Datum) - if !ok { - return false, log.Errorf("StatefulMessage Datum has wrong type: %T", m.Datum) + return false } // Try to add to buffer if s.buffer.AddMessageWithSize(m.Metadata, m.Metadata.RawDataLen) { - s.grpcDatums = append(s.grpcDatums, datum) - return true, nil + s.grpcDatums = append(s.grpcDatums, m.Datum) + return true } // Buffer full (not an error) - return false, nil + return false } // Mostly copy/pasted from batch.go @@ -174,22 +171,14 @@ func (s *batchStrategy) processMessage(m *message.StatefulMessage, outputChan ch m.Metadata.Origin.LogSource.LatencyStats.Add(m.Metadata.GetLatency()) } - added, err := s.addMessage(m) - if err != nil { - log.Warnf("Invalid message in pipeline=%s: %v - dropping", s.pipelineName, err) - return - } + added := s.addMessage(m) if !added || s.buffer.IsFull() { s.flushBuffer(outputChan) } if !added { // it's possible that the m could not be added because the buffer was full // so we need to retry once again - added, err = s.addMessage(m) - if err != nil { - log.Warnf("Invalid message in pipeline=%s: %v - dropping", s.pipelineName, err) - return - } + added = s.addMessage(m) if !added { log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, m.Metadata.RawDataLen, s.buffer.ContentSizeLimit()) tlmDroppedTooLarge.Inc(s.pipelineName) @@ -211,12 +200,12 @@ func (s *batchStrategy) flushBuffer(outputChan chan *message.Payload) { // Use the collected Datums and clear them grpcDatums := s.grpcDatums - s.grpcDatums = make([]*Datum, 0) + s.grpcDatums = make([]*statefulpb.Datum, 0) s.sendMessagesWithDatums(messagesMetadata, grpcDatums, outputChan) } -func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.MessageMetadata, grpcDatums []*Datum, outputChan chan *message.Payload) { +func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.MessageMetadata, grpcDatums []*statefulpb.Datum, outputChan chan *message.Payload) { defer s.utilization.Stop() unencodedSize := 0 @@ -225,7 +214,7 @@ func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.Messa } // Extract all state changes from this batch for snapshot management - var stateChanges []*Datum + var stateChanges []*statefulpb.Datum for _, datum := range grpcDatums { if isStateDatum(datum) { stateChanges = append(stateChanges, datum) @@ -233,7 +222,7 @@ func (s *batchStrategy) sendMessagesWithDatums(messagesMetadata []*message.Messa } // Create DatumSequence and marshal to bytes - datumSeq := &DatumSequence{ + datumSeq := &statefulpb.DatumSequence{ Data: grpcDatums, } diff --git a/pkg/logs/sender/grpc/batch_strategy_test.go b/pkg/logs/sender/grpc/batch_strategy_test.go index d324bb5f29f4..4a88d2deb216 100644 --- a/pkg/logs/sender/grpc/batch_strategy_test.go +++ b/pkg/logs/sender/grpc/batch_strategy_test.go @@ -19,6 +19,7 @@ import ( compressionfx "github.com/DataDog/datadog-agent/comp/serializer/logscompression/fx-mock" "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/compression" ) @@ -27,11 +28,11 @@ func createTestStatefulMessage(content string) *message.StatefulMessage { msg := message.NewMessage([]byte(content), nil, "", 0) msg.MessageMetadata.RawDataLen = len(content) - datum := &Datum{ - Data: &Datum_Logs{ - Logs: &Log{ + datum := &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ Timestamp: 12345, - Content: &Log_Raw{ + Content: &statefulpb.Log_Raw{ Raw: content, }, }, @@ -77,7 +78,7 @@ func TestBatchStrategySendsPayloadWhenBufferIsFull(t *testing.T) { assert.Equal(t, 2, payload.UnencodedSize) // Verify the payload contains valid DatumSequence - var datumSeq DatumSequence + var datumSeq statefulpb.DatumSequence err := proto.Unmarshal(payload.Encoded, &datumSeq) require.NoError(t, err) assert.Equal(t, 2, len(datumSeq.Data)) @@ -123,7 +124,7 @@ func TestBatchStrategySendsPayloadWhenBufferIsOutdated(t *testing.T) { assert.EqualValues(t, m.Metadata, payload.MessageMetas[0]) // Verify payload contains valid DatumSequence - var datumSeq DatumSequence + var datumSeq statefulpb.DatumSequence err := proto.Unmarshal(payload.Encoded, &datumSeq) require.NoError(t, err) assert.Equal(t, 1, len(datumSeq.Data)) @@ -397,13 +398,7 @@ func TestBatchStrategyInvalidDatum(t *testing.T) { } input <- invalidMsg1 - // Send message with wrong Datum type - msg2 := message.NewMessage([]byte("test"), nil, "", 0) - invalidMsg2 := &message.StatefulMessage{ - Metadata: &msg2.MessageMetadata, - Datum: "wrong type", - } - input <- invalidMsg2 + // Note: With strongly-typed Datum field, wrong type is prevented at compile time // Send a valid message validMsg := createTestStatefulMessage("valid") @@ -457,7 +452,7 @@ func TestBatchStrategyCompression(t *testing.T) { assert.NotEmpty(t, payload.Encoded) // Verify the payload contains valid DatumSequence (identity compression = no compression) - var datumSeq DatumSequence + var datumSeq statefulpb.DatumSequence err := proto.Unmarshal(payload.Encoded, &datumSeq) require.NoError(t, err) assert.Equal(t, 5, len(datumSeq.Data)) @@ -496,9 +491,9 @@ func TestBatchStrategyStatefulExtra(t *testing.T) { msg.MessageMetadata.RawDataLen = 0 return &message.StatefulMessage{ Metadata: &msg.MessageMetadata, - Datum: &Datum{ - Data: &Datum_PatternDefine{ - PatternDefine: &PatternDefine{ + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{ + PatternDefine: &statefulpb.PatternDefine{ PatternId: id, Template: template, }, @@ -512,9 +507,9 @@ func TestBatchStrategyStatefulExtra(t *testing.T) { msg.MessageMetadata.RawDataLen = 0 return &message.StatefulMessage{ Metadata: &msg.MessageMetadata, - Datum: &Datum{ - Data: &Datum_DictEntryDefine{ - DictEntryDefine: &DictEntryDefine{ + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{ + DictEntryDefine: &statefulpb.DictEntryDefine{ Id: id, Value: value, }, @@ -528,9 +523,9 @@ func TestBatchStrategyStatefulExtra(t *testing.T) { msg.MessageMetadata.RawDataLen = 0 return &message.StatefulMessage{ Metadata: &msg.MessageMetadata, - Datum: &Datum{ - Data: &Datum_PatternDelete{ - PatternDelete: &PatternDelete{ + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDelete{ + PatternDelete: &statefulpb.PatternDelete{ PatternId: id, }, }, @@ -543,9 +538,9 @@ func TestBatchStrategyStatefulExtra(t *testing.T) { msg.MessageMetadata.RawDataLen = 0 return &message.StatefulMessage{ Metadata: &msg.MessageMetadata, - Datum: &Datum{ - Data: &Datum_DictEntryDelete{ - DictEntryDelete: &DictEntryDelete{ + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDelete{ + DictEntryDelete: &statefulpb.DictEntryDelete{ Id: id, }, }, @@ -558,11 +553,11 @@ func TestBatchStrategyStatefulExtra(t *testing.T) { msg.MessageMetadata.RawDataLen = len(content) return &message.StatefulMessage{ Metadata: &msg.MessageMetadata, - Datum: &Datum{ - Data: &Datum_Logs{ - Logs: &Log{ + Datum: &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ Timestamp: 12345, - Content: &Log_Raw{ + Content: &statefulpb.Log_Raw{ Raw: content, }, }, diff --git a/pkg/logs/sender/grpc/inflight.go b/pkg/logs/sender/grpc/inflight.go index b42580c4563b..07437f5d99b6 100644 --- a/pkg/logs/sender/grpc/inflight.go +++ b/pkg/logs/sender/grpc/inflight.go @@ -9,6 +9,7 @@ import ( "google.golang.org/protobuf/proto" "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" ) // inflightTracker is a bounded FIFO queue that tracks payloads in two regions: @@ -170,15 +171,15 @@ func (t *inflightTracker) getSnapshot() []byte { // snapshotState maintains the accumulated state changes for stream bootstrapping // It represents the state "before" the first payload in the inflight queue type snapshotState struct { - dictMap map[uint64]*DictEntryDefine // Dictionary entries by ID - patternMap map[uint64]*PatternDefine // Patterns by ID + dictMap map[uint64]*statefulpb.DictEntryDefine + patternMap map[uint64]*statefulpb.PatternDefine } // newSnapshotState creates a new empty snapshot state func newSnapshotState() *snapshotState { return &snapshotState{ - dictMap: make(map[uint64]*DictEntryDefine), - patternMap: make(map[uint64]*PatternDefine), + dictMap: make(map[uint64]*statefulpb.DictEntryDefine), + patternMap: make(map[uint64]*statefulpb.PatternDefine), } } @@ -190,13 +191,13 @@ func (s *snapshotState) apply(extra *StatefulExtra) { for _, datum := range extra.StateChanges { switch d := datum.Data.(type) { - case *Datum_PatternDefine: + case *statefulpb.Datum_PatternDefine: s.patternMap[d.PatternDefine.PatternId] = d.PatternDefine - case *Datum_PatternDelete: + case *statefulpb.Datum_PatternDelete: delete(s.patternMap, d.PatternDelete.PatternId) - case *Datum_DictEntryDefine: + case *statefulpb.Datum_DictEntryDefine: s.dictMap[d.DictEntryDefine.Id] = d.DictEntryDefine - case *Datum_DictEntryDelete: + case *statefulpb.Datum_DictEntryDelete: delete(s.dictMap, d.DictEntryDelete.Id) } } @@ -213,20 +214,20 @@ func (s *snapshotState) serialize() []byte { return nil } - datums := make([]*Datum, 0, totalSize) + datums := make([]*statefulpb.Datum, 0, totalSize) for _, pattern := range s.patternMap { - datums = append(datums, &Datum{ - Data: &Datum_PatternDefine{PatternDefine: pattern}, + datums = append(datums, &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{PatternDefine: pattern}, }) } for _, entry := range s.dictMap { - datums = append(datums, &Datum{ - Data: &Datum_DictEntryDefine{DictEntryDefine: entry}, + datums = append(datums, &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{DictEntryDefine: entry}, }) } - datumSeq := &DatumSequence{ + datumSeq := &statefulpb.DatumSequence{ Data: datums, } diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go index a21a5a90cb04..4c8d2713854b 100644 --- a/pkg/logs/sender/grpc/mock_state.go +++ b/pkg/logs/sender/grpc/mock_state.go @@ -11,6 +11,7 @@ import ( "unicode/utf8" "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" ) const nanoToMillis = 1000000 @@ -29,16 +30,16 @@ func StartMessageTranslator(inputChan chan *message.Message, outputChan chan *me } // Create the Log message using stateful_encoding.proto definitions - log := &Log{ + log := &statefulpb.Log{ Timestamp: uint64(ts.UnixNano() / nanoToMillis), - Content: &Log_Raw{ + Content: &statefulpb.Log_Raw{ Raw: toValidUtf8(msg.GetContent()), }, } // Wrap the Log in a Datum - datum := &Datum{ - Data: &Datum_Logs{ + datum := &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ Logs: log, }, } diff --git a/pkg/logs/sender/grpc/sender.go b/pkg/logs/sender/grpc/sender.go index 5231e8e7a939..e4dd93c4a1b1 100644 --- a/pkg/logs/sender/grpc/sender.go +++ b/pkg/logs/sender/grpc/sender.go @@ -23,6 +23,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/DataDog/datadog-agent/pkg/version" @@ -87,7 +88,7 @@ type Sender struct { // gRPC connection management (shared across all streams) conn *grpc.ClientConn - client StatefulLogsServiceClient + client statefulpb.StatefulLogsServiceClient } // NewSender creates a new gRPC sender that implements PipelineComponent @@ -209,7 +210,7 @@ func (s *Sender) createConnection() error { } s.conn = conn - s.client = NewStatefulLogsServiceClient(conn) + s.client = statefulpb.NewStatefulLogsServiceClient(conn) log.Infof("Successfully created gRPC connection to %s", address) return nil diff --git a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go deleted file mode 100644 index dfcdaf5e26f7..000000000000 --- a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go +++ /dev/null @@ -1,115 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.5.1 -// - protoc v5.29.3 -// source: stateful_encoding.proto - -package grpc - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.64.0 or later. -const _ = grpc.SupportPackageIsVersion9 - -const ( - StatefulLogsService_LogsStream_FullMethodName = "/intake.StatefulLogsService/LogsStream" -) - -// StatefulLogsServiceClient is the client API for StatefulLogsService service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type StatefulLogsServiceClient interface { - LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) -} - -type statefulLogsServiceClient struct { - cc grpc.ClientConnInterface -} - -func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { - return &statefulLogsServiceClient{cc} -} - -func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) { - cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) - stream, err := c.cc.NewStream(ctx, &StatefulLogsService_ServiceDesc.Streams[0], StatefulLogsService_LogsStream_FullMethodName, cOpts...) - if err != nil { - return nil, err - } - x := &grpc.GenericClientStream[StatefulBatch, BatchStatus]{ClientStream: stream} - return x, nil -} - -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type StatefulLogsService_LogsStreamClient = grpc.BidiStreamingClient[StatefulBatch, BatchStatus] - -// StatefulLogsServiceServer is the server API for StatefulLogsService service. -// All implementations must embed UnimplementedStatefulLogsServiceServer -// for forward compatibility. -type StatefulLogsServiceServer interface { - LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error - mustEmbedUnimplementedStatefulLogsServiceServer() -} - -// UnimplementedStatefulLogsServiceServer must be embedded to have -// forward compatible implementations. -// -// NOTE: this should be embedded by value instead of pointer to avoid a nil -// pointer dereference when methods are called. -type UnimplementedStatefulLogsServiceServer struct{} - -func (UnimplementedStatefulLogsServiceServer) LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error { - return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") -} -func (UnimplementedStatefulLogsServiceServer) mustEmbedUnimplementedStatefulLogsServiceServer() {} -func (UnimplementedStatefulLogsServiceServer) testEmbeddedByValue() {} - -// UnsafeStatefulLogsServiceServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to StatefulLogsServiceServer will -// result in compilation errors. -type UnsafeStatefulLogsServiceServer interface { - mustEmbedUnimplementedStatefulLogsServiceServer() -} - -func RegisterStatefulLogsServiceServer(s grpc.ServiceRegistrar, srv StatefulLogsServiceServer) { - // If the following call pancis, it indicates UnimplementedStatefulLogsServiceServer was - // embedded by pointer and is nil. This will cause panics if an - // unimplemented method is ever invoked, so we test this at initialization - // time to prevent it from happening at runtime later due to I/O. - if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { - t.testEmbeddedByValue() - } - s.RegisterService(&StatefulLogsService_ServiceDesc, srv) -} - -func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { - return srv.(StatefulLogsServiceServer).LogsStream(&grpc.GenericServerStream[StatefulBatch, BatchStatus]{ServerStream: stream}) -} - -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type StatefulLogsService_LogsStreamServer = grpc.BidiStreamingServer[StatefulBatch, BatchStatus] - -// StatefulLogsService_ServiceDesc is the grpc.ServiceDesc for StatefulLogsService service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var StatefulLogsService_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "intake.StatefulLogsService", - HandlerType: (*StatefulLogsServiceServer)(nil), - Methods: []grpc.MethodDesc{}, - Streams: []grpc.StreamDesc{ - { - StreamName: "LogsStream", - Handler: _StatefulLogsService_LogsStream_Handler, - ServerStreams: true, - ClientStreams: true, - }, - }, - Metadata: "stateful_encoding.proto", -} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go index e0f344462e78..e7cd3b571f22 100644 --- a/pkg/logs/sender/grpc/stream_worker.go +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -21,6 +21,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/client" "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/sender" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/backoff" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -68,7 +69,7 @@ const ( // streamInfo holds all stream-related information type streamInfo struct { - stream StatefulLogsService_LogsStreamClient + stream statefulpb.StatefulLogsService_LogsStreamClient ctx context.Context cancel context.CancelFunc } @@ -82,7 +83,7 @@ type streamCreationResult struct { // batchAck wraps a batch acknowledgment with stream identity to prevent stale signals type batchAck struct { stream *streamInfo - status *BatchStatus + status *statefulpb.BatchStatus } // streamWorker manages a single gRPC bidirectional stream with Master-Slave threading model @@ -99,7 +100,7 @@ type streamWorker struct { // gRPC connection management (shared with other streams) conn *grpc.ClientConn - client StatefulLogsServiceClient + client statefulpb.StatefulLogsServiceClient // Stream management currentStream *streamInfo @@ -131,7 +132,7 @@ func newStreamWorker( inputChan chan *message.Payload, destinationsCtx *client.DestinationsContext, conn *grpc.ClientConn, - client StatefulLogsServiceClient, + client statefulpb.StatefulLogsServiceClient, sink sender.Sink, endpoint config.Endpoint, streamLifetime time.Duration, @@ -146,7 +147,7 @@ func newStreamWorkerWithClock( inputChan chan *message.Payload, destinationsCtx *client.DestinationsContext, conn *grpc.ClientConn, - client StatefulLogsServiceClient, + client statefulpb.StatefulLogsServiceClient, sink sender.Sink, endpoint config.Endpoint, streamLifetime time.Duration, @@ -291,7 +292,7 @@ func (s *streamWorker) sendPayloads() { } batchID := s.inflight.nextBatchID() - batch := s.createBatch(payload.Encoded, batchID) + batch := createBatch(payload.Encoded, batchID) // TODO Send call can block, by TCP/HTTP2 flow controls if err := s.currentStream.stream.Send(batch); err != nil { @@ -316,7 +317,7 @@ func (s *streamWorker) sendSnapshot() bool { } // Create batch with batchID 0 (reserved for snapshot) - batch := s.createBatch(serialized, 0) + batch := createBatch(serialized, 0) // Send snapshot if err := s.currentStream.stream.Send(batch); err != nil { @@ -676,7 +677,7 @@ func (s *streamWorker) signalRecvFailure(streamInfo *streamInfo) { // signalBatchAck forwards a batch acknowledgment to the supervisor // If the worker is stopped, returns without delivering (shutdown is in progress anyway) -func (s *streamWorker) signalBatchAck(streamInfo *streamInfo, msg *BatchStatus) { +func (s *streamWorker) signalBatchAck(streamInfo *streamInfo, msg *statefulpb.BatchStatus) { select { case s.batchAckCh <- &batchAck{stream: streamInfo, status: msg}: case <-s.stopChan: @@ -693,8 +694,8 @@ func (s *streamWorker) handleIrrecoverableError(_ string, streamInfo *streamInfo } // createBatch creates a StatefulBatch from serialized data and batch ID -func (s *streamWorker) createBatch(data []byte, batchID uint32) *StatefulBatch { - return &StatefulBatch{ +func createBatch(data []byte, batchID uint32) *statefulpb.StatefulBatch { + return &statefulpb.StatefulBatch{ BatchId: batchID, Data: data, } diff --git a/pkg/logs/sender/grpc/stream_worker_test.go b/pkg/logs/sender/grpc/stream_worker_test.go index fce20f4338d1..59c5114646e2 100644 --- a/pkg/logs/sender/grpc/stream_worker_test.go +++ b/pkg/logs/sender/grpc/stream_worker_test.go @@ -25,6 +25,7 @@ import ( "github.com/DataDog/datadog-agent/comp/logs/agent/config" "github.com/DataDog/datadog-agent/pkg/logs/client" "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" ) const ( @@ -55,16 +56,16 @@ type mockLogsStream struct { mu sync.Mutex // Channels for communication - sendCh chan *StatefulBatch // Batches sent by client - recvCh chan *BatchStatus // Acks to send to client - errCh chan error // To inject immediate errors in Recv() + sendCh chan *statefulpb.StatefulBatch // Batches sent by client + recvCh chan *statefulpb.BatchStatus // Acks to send to client + errCh chan error // To inject immediate errors in Recv() // Error control sendErr error // If set, next Send() will return this error recvErr error // If set, next Recv() will return this error // Track sent batches - sentBatches []*StatefulBatch + sentBatches []*statefulpb.StatefulBatch // Context ctx context.Context @@ -72,15 +73,15 @@ type mockLogsStream struct { func newMockLogsStream(ctx context.Context) *mockLogsStream { return &mockLogsStream{ - sendCh: make(chan *StatefulBatch, 100), - recvCh: make(chan *BatchStatus, 100), + sendCh: make(chan *statefulpb.StatefulBatch, 100), + recvCh: make(chan *statefulpb.BatchStatus, 100), errCh: make(chan error, 1), - sentBatches: make([]*StatefulBatch, 0), + sentBatches: make([]*statefulpb.StatefulBatch, 0), ctx: ctx, } } -func (m *mockLogsStream) Send(batch *StatefulBatch) error { +func (m *mockLogsStream) Send(batch *statefulpb.StatefulBatch) error { m.mu.Lock() if m.sendErr != nil { err := m.sendErr @@ -100,7 +101,7 @@ func (m *mockLogsStream) Send(batch *StatefulBatch) error { } } -func (m *mockLogsStream) Recv() (*BatchStatus, error) { +func (m *mockLogsStream) Recv() (*statefulpb.BatchStatus, error) { m.mu.Lock() if m.recvErr != nil { err := m.recvErr @@ -132,7 +133,7 @@ func (m *mockLogsStream) setSendError(err error) { // Helper to send an ack to the client func (m *mockLogsStream) sendAck(batchID int32) { - m.recvCh <- &BatchStatus{ + m.recvCh <- &statefulpb.BatchStatus{ BatchId: batchID, } } @@ -150,7 +151,7 @@ func (m *mockLogsStream) getSentBatchCount() int { } // Helper to get a specific sent batch by index -func (m *mockLogsStream) getSentBatch(index int) *StatefulBatch { +func (m *mockLogsStream) getSentBatch(index int) *statefulpb.StatefulBatch { m.mu.Lock() defer m.mu.Unlock() if index < 0 || index >= len(m.sentBatches) { @@ -175,7 +176,7 @@ func newMockLogsClient() *mockLogsClient { return &mockLogsClient{} } -func (m *mockLogsClient) LogsStream(ctx context.Context, _ ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) { +func (m *mockLogsClient) LogsStream(ctx context.Context, _ ...grpc.CallOption) (statefulpb.StatefulLogsService_LogsStreamClient, error) { m.mu.Lock() defer m.mu.Unlock() @@ -800,10 +801,10 @@ func TestStreamWorkerErrorRecovery(t *testing.T) { } // Helper functions to create Datum objects for testing -func createPatternDefine(id uint64, template string) *Datum { - return &Datum{ - Data: &Datum_PatternDefine{ - PatternDefine: &PatternDefine{ +func createPatternDefine(id uint64, template string) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDefine{ + PatternDefine: &statefulpb.PatternDefine{ PatternId: id, Template: template, }, @@ -811,20 +812,20 @@ func createPatternDefine(id uint64, template string) *Datum { } } -func createPatternDelete(id uint64) *Datum { - return &Datum{ - Data: &Datum_PatternDelete{ - PatternDelete: &PatternDelete{ +func createPatternDelete(id uint64) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_PatternDelete{ + PatternDelete: &statefulpb.PatternDelete{ PatternId: id, }, }, } } -func createDictEntryDefine(id uint64, value string) *Datum { - return &Datum{ - Data: &Datum_DictEntryDefine{ - DictEntryDefine: &DictEntryDefine{ +func createDictEntryDefine(id uint64, value string) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDefine{ + DictEntryDefine: &statefulpb.DictEntryDefine{ Id: id, Value: value, }, @@ -832,10 +833,10 @@ func createDictEntryDefine(id uint64, value string) *Datum { } } -func createDictEntryDelete(id uint64) *Datum { - return &Datum{ - Data: &Datum_DictEntryDelete{ - DictEntryDelete: &DictEntryDelete{ +func createDictEntryDelete(id uint64) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_DictEntryDelete{ + DictEntryDelete: &statefulpb.DictEntryDelete{ Id: id, }, }, @@ -843,7 +844,7 @@ func createDictEntryDelete(id uint64) *Datum { } // createPayloadWithState creates a payload with state changes in StatefulExtra -func createPayloadWithState(content string, stateChanges []*Datum) *message.Payload { +func createPayloadWithState(content string, stateChanges []*statefulpb.Datum) *message.Payload { payload := createWorkerTestPayload(content) if len(stateChanges) > 0 { payload.StatefulExtra = &StatefulExtra{ @@ -854,12 +855,12 @@ func createPayloadWithState(content string, stateChanges []*Datum) *message.Payl } // verifySnapshotContents checks if a snapshot batch contains the expected state -func verifySnapshotContents(t *testing.T, batch *StatefulBatch, expectedPatterns map[uint64]string, expectedDictEntries map[uint64]string) { +func verifySnapshotContents(t *testing.T, batch *statefulpb.StatefulBatch, expectedPatterns map[uint64]string, expectedDictEntries map[uint64]string) { require.NotNil(t, batch) require.Equal(t, uint32(0), batch.BatchId, "Snapshot should have batch ID 0") // Deserialize the snapshot data (it's a DatumSequence) - var datumSeq DatumSequence + var datumSeq statefulpb.DatumSequence err := proto.Unmarshal(batch.Data, &datumSeq) require.NoError(t, err) @@ -869,9 +870,9 @@ func verifySnapshotContents(t *testing.T, batch *StatefulBatch, expectedPatterns for _, datum := range datumSeq.Data { switch d := datum.Data.(type) { - case *Datum_PatternDefine: + case *statefulpb.Datum_PatternDefine: foundPatterns[d.PatternDefine.PatternId] = d.PatternDefine.Template - case *Datum_DictEntryDefine: + case *statefulpb.Datum_DictEntryDefine: foundDictEntries[d.DictEntryDefine.Id] = d.DictEntryDefine.Value default: t.Fatalf("Snapshot should only contain PatternDefine and DictEntryDefine, got: %T", datum.Data) @@ -900,7 +901,7 @@ func TestStreamWorkerSnapshot(t *testing.T) { }, testTimeout, testTickInterval, "Initial stream should be established") // === Step 1: Send Batch 1 (5 entries) === - batch1StateChanges := []*Datum{ + batch1StateChanges := []*statefulpb.Datum{ createPatternDefine(1, "pattern1"), createDictEntryDefine(1, "value1"), createPatternDefine(2, "pattern2"), @@ -926,7 +927,7 @@ func TestStreamWorkerSnapshot(t *testing.T) { } // === Step 3: Send Batch 2 (6 entries) === - batch2StateChanges := []*Datum{ + batch2StateChanges := []*statefulpb.Datum{ createPatternDelete(1), createDictEntryDelete(1), createPatternDefine(3, "pattern3"), @@ -990,7 +991,7 @@ func TestStreamWorkerSnapshot(t *testing.T) { } // === Step 9: Send Batch 3 (3 entries) === - batch3StateChanges := []*Datum{ + batch3StateChanges := []*statefulpb.Datum{ createPatternDefine(4, "pattern4"), createDictEntryDefine(4, "value4"), } diff --git a/pkg/logs/sender/grpc/stateful_encoding.proto b/pkg/proto/datadog/stateful/stateful_encoding.proto similarity index 90% rename from pkg/logs/sender/grpc/stateful_encoding.proto rename to pkg/proto/datadog/stateful/stateful_encoding.proto index d0e9c24f147d..4d92b4cae9f4 100644 --- a/pkg/logs/sender/grpc/stateful_encoding.proto +++ b/pkg/proto/datadog/stateful/stateful_encoding.proto @@ -1,7 +1,7 @@ syntax = "proto3"; package intake; -option go_package = "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"; +option go_package = "pkg/proto/pbgo/statefulpb"; // --------------------------------------------------------------------------- // Dictionary-encoded @@ -84,7 +84,10 @@ message DatumSequence { // the ordering is significant, must be processed in order message StatefulBatch { uint32 batch_id = 1; - bytes data = 2; // Contains serialized DatumSequence + + // Bytes of a serialized DatumSequence. Eventually this will also be compressed. + // This allows for Datums to be compressed while they are buffered in memory before being acked by the server. + bytes data = 2; } message BatchStatus { diff --git a/pkg/logs/sender/grpc/stateful_encoding.pb.go b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go similarity index 71% rename from pkg/logs/sender/grpc/stateful_encoding.pb.go rename to pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go index bed1be08ec6c..810308ca7fa3 100644 --- a/pkg/logs/sender/grpc/stateful_encoding.pb.go +++ b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go @@ -2,11 +2,15 @@ // versions: // protoc-gen-go v1.36.10 // protoc v5.29.3 -// source: stateful_encoding.proto +// source: datadog/stateful/stateful_encoding.proto -package grpc +package statefulpb import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" reflect "reflect" @@ -52,11 +56,11 @@ func (x BatchStatus_Status) String() string { } func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor { - return file_stateful_encoding_proto_enumTypes[0].Descriptor() + return file_datadog_stateful_stateful_encoding_proto_enumTypes[0].Descriptor() } func (BatchStatus_Status) Type() protoreflect.EnumType { - return &file_stateful_encoding_proto_enumTypes[0] + return &file_datadog_stateful_stateful_encoding_proto_enumTypes[0] } func (x BatchStatus_Status) Number() protoreflect.EnumNumber { @@ -65,7 +69,7 @@ func (x BatchStatus_Status) Number() protoreflect.EnumNumber { // Deprecated: Use BatchStatus_Status.Descriptor instead. func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} } type DictEntryDefine struct { @@ -78,7 +82,7 @@ type DictEntryDefine struct { func (x *DictEntryDefine) Reset() { *x = DictEntryDefine{} - mi := &file_stateful_encoding_proto_msgTypes[0] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -90,7 +94,7 @@ func (x *DictEntryDefine) String() string { func (*DictEntryDefine) ProtoMessage() {} func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[0] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[0] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -103,7 +107,7 @@ func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { // Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead. func (*DictEntryDefine) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{0} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{0} } func (x *DictEntryDefine) GetId() uint64 { @@ -129,7 +133,7 @@ type DictEntryDelete struct { func (x *DictEntryDelete) Reset() { *x = DictEntryDelete{} - mi := &file_stateful_encoding_proto_msgTypes[1] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -141,7 +145,7 @@ func (x *DictEntryDelete) String() string { func (*DictEntryDelete) ProtoMessage() {} func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[1] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -154,7 +158,7 @@ func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { // Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead. func (*DictEntryDelete) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{1} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{1} } func (x *DictEntryDelete) GetId() uint64 { @@ -178,7 +182,7 @@ type PatternDefine struct { func (x *PatternDefine) Reset() { *x = PatternDefine{} - mi := &file_stateful_encoding_proto_msgTypes[2] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -190,7 +194,7 @@ func (x *PatternDefine) String() string { func (*PatternDefine) ProtoMessage() {} func (x *PatternDefine) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[2] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -203,7 +207,7 @@ func (x *PatternDefine) ProtoReflect() protoreflect.Message { // Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead. func (*PatternDefine) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{2} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{2} } func (x *PatternDefine) GetPatternId() uint64 { @@ -243,7 +247,7 @@ type PatternDelete struct { func (x *PatternDelete) Reset() { *x = PatternDelete{} - mi := &file_stateful_encoding_proto_msgTypes[3] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -255,7 +259,7 @@ func (x *PatternDelete) String() string { func (*PatternDelete) ProtoMessage() {} func (x *PatternDelete) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[3] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -268,7 +272,7 @@ func (x *PatternDelete) ProtoReflect() protoreflect.Message { // Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead. func (*PatternDelete) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{3} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{3} } func (x *PatternDelete) GetPatternId() uint64 { @@ -292,7 +296,7 @@ type Log struct { func (x *Log) Reset() { *x = Log{} - mi := &file_stateful_encoding_proto_msgTypes[4] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -304,7 +308,7 @@ func (x *Log) String() string { func (*Log) ProtoMessage() {} func (x *Log) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[4] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -317,7 +321,7 @@ func (x *Log) ProtoReflect() protoreflect.Message { // Deprecated: Use Log.ProtoReflect.Descriptor instead. func (*Log) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{4} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{4} } func (x *Log) GetTimestamp() uint64 { @@ -378,7 +382,7 @@ type StructuredLog struct { func (x *StructuredLog) Reset() { *x = StructuredLog{} - mi := &file_stateful_encoding_proto_msgTypes[5] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -390,7 +394,7 @@ func (x *StructuredLog) String() string { func (*StructuredLog) ProtoMessage() {} func (x *StructuredLog) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[5] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -403,7 +407,7 @@ func (x *StructuredLog) ProtoReflect() protoreflect.Message { // Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. func (*StructuredLog) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{5} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{5} } func (x *StructuredLog) GetPatternId() uint64 { @@ -436,7 +440,7 @@ type DynamicValue struct { func (x *DynamicValue) Reset() { *x = DynamicValue{} - mi := &file_stateful_encoding_proto_msgTypes[6] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -448,7 +452,7 @@ func (x *DynamicValue) String() string { func (*DynamicValue) ProtoMessage() {} func (x *DynamicValue) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[6] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -461,7 +465,7 @@ func (x *DynamicValue) ProtoReflect() protoreflect.Message { // Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. func (*DynamicValue) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{6} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{6} } func (x *DynamicValue) GetValue() isDynamicValue_Value { @@ -551,7 +555,7 @@ type Datum struct { func (x *Datum) Reset() { *x = Datum{} - mi := &file_stateful_encoding_proto_msgTypes[7] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -563,7 +567,7 @@ func (x *Datum) String() string { func (*Datum) ProtoMessage() {} func (x *Datum) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[7] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -576,7 +580,7 @@ func (x *Datum) ProtoReflect() protoreflect.Message { // Deprecated: Use Datum.ProtoReflect.Descriptor instead. func (*Datum) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{7} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{7} } func (x *Datum) GetData() isDatum_Data { @@ -676,7 +680,7 @@ type DatumSequence struct { func (x *DatumSequence) Reset() { *x = DatumSequence{} - mi := &file_stateful_encoding_proto_msgTypes[8] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -688,7 +692,7 @@ func (x *DatumSequence) String() string { func (*DatumSequence) ProtoMessage() {} func (x *DatumSequence) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[8] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -701,7 +705,7 @@ func (x *DatumSequence) ProtoReflect() protoreflect.Message { // Deprecated: Use DatumSequence.ProtoReflect.Descriptor instead. func (*DatumSequence) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{8} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{8} } func (x *DatumSequence) GetData() []*Datum { @@ -714,16 +718,18 @@ func (x *DatumSequence) GetData() []*Datum { // data is sequence of pattern/dictionary changes + logs // the ordering is significant, must be processed in order type StatefulBatch struct { - state protoimpl.MessageState `protogen:"open.v1"` - BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` - Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` // Contains serialized DatumSequence + state protoimpl.MessageState `protogen:"open.v1"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + // Bytes of a serialized DatumSequence. Eventually this will also be compressed. + // This allows for Datums to be compressed while they are buffered in memory before being acked by the server. + Data []byte `protobuf:"bytes,2,opt,name=data,proto3" json:"data,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *StatefulBatch) Reset() { *x = StatefulBatch{} - mi := &file_stateful_encoding_proto_msgTypes[9] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -735,7 +741,7 @@ func (x *StatefulBatch) String() string { func (*StatefulBatch) ProtoMessage() {} func (x *StatefulBatch) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[9] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -748,7 +754,7 @@ func (x *StatefulBatch) ProtoReflect() protoreflect.Message { // Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. func (*StatefulBatch) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{9} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{9} } func (x *StatefulBatch) GetBatchId() uint32 { @@ -775,7 +781,7 @@ type BatchStatus struct { func (x *BatchStatus) Reset() { *x = BatchStatus{} - mi := &file_stateful_encoding_proto_msgTypes[10] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -787,7 +793,7 @@ func (x *BatchStatus) String() string { func (*BatchStatus) ProtoMessage() {} func (x *BatchStatus) ProtoReflect() protoreflect.Message { - mi := &file_stateful_encoding_proto_msgTypes[10] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -800,7 +806,7 @@ func (x *BatchStatus) ProtoReflect() protoreflect.Message { // Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. func (*BatchStatus) Descriptor() ([]byte, []int) { - return file_stateful_encoding_proto_rawDescGZIP(), []int{10} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10} } func (x *BatchStatus) GetBatchId() int32 { @@ -817,11 +823,11 @@ func (x *BatchStatus) GetStatus() BatchStatus_Status { return BatchStatus_UNKNOWN } -var File_stateful_encoding_proto protoreflect.FileDescriptor +var File_datadog_stateful_stateful_encoding_proto protoreflect.FileDescriptor -const file_stateful_encoding_proto_rawDesc = "" + +const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" + "\n" + - "\x17stateful_encoding.proto\x12\x06intake\"7\n" + + "(datadog/stateful/stateful_encoding.proto\x12\x06intake\"7\n" + "\x0fDictEntryDefine\x12\x0e\n" + "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + @@ -876,23 +882,23 @@ const file_stateful_encoding_proto_rawDesc = "" + "\x02OK\x10\x012S\n" + "\x13StatefulLogsService\x12<\n" + "\n" + - "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B7Z5github.com/DataDog/datadog-agent/pkg/logs/sender/grpcb\x06proto3" + "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B\x1bZ\x19pkg/proto/pbgo/statefulpbb\x06proto3" var ( - file_stateful_encoding_proto_rawDescOnce sync.Once - file_stateful_encoding_proto_rawDescData []byte + file_datadog_stateful_stateful_encoding_proto_rawDescOnce sync.Once + file_datadog_stateful_stateful_encoding_proto_rawDescData []byte ) -func file_stateful_encoding_proto_rawDescGZIP() []byte { - file_stateful_encoding_proto_rawDescOnce.Do(func() { - file_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_stateful_encoding_proto_rawDesc), len(file_stateful_encoding_proto_rawDesc))) +func file_datadog_stateful_stateful_encoding_proto_rawDescGZIP() []byte { + file_datadog_stateful_stateful_encoding_proto_rawDescOnce.Do(func() { + file_datadog_stateful_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc))) }) - return file_stateful_encoding_proto_rawDescData + return file_datadog_stateful_stateful_encoding_proto_rawDescData } -var file_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) -var file_stateful_encoding_proto_goTypes = []any{ +var file_datadog_stateful_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_datadog_stateful_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_datadog_stateful_stateful_encoding_proto_goTypes = []any{ (BatchStatus_Status)(0), // 0: intake.BatchStatus.Status (*DictEntryDefine)(nil), // 1: intake.DictEntryDefine (*DictEntryDelete)(nil), // 2: intake.DictEntryDelete @@ -906,7 +912,7 @@ var file_stateful_encoding_proto_goTypes = []any{ (*StatefulBatch)(nil), // 10: intake.StatefulBatch (*BatchStatus)(nil), // 11: intake.BatchStatus } -var file_stateful_encoding_proto_depIdxs = []int32{ +var file_datadog_stateful_stateful_encoding_proto_depIdxs = []int32{ 6, // 0: intake.Log.structured:type_name -> intake.StructuredLog 7, // 1: intake.StructuredLog.dynamic_values:type_name -> intake.DynamicValue 3, // 2: intake.Datum.pattern_define:type_name -> intake.PatternDefine @@ -925,22 +931,22 @@ var file_stateful_encoding_proto_depIdxs = []int32{ 0, // [0:9] is the sub-list for field type_name } -func init() { file_stateful_encoding_proto_init() } -func file_stateful_encoding_proto_init() { - if File_stateful_encoding_proto != nil { +func init() { file_datadog_stateful_stateful_encoding_proto_init() } +func file_datadog_stateful_stateful_encoding_proto_init() { + if File_datadog_stateful_stateful_encoding_proto != nil { return } - file_stateful_encoding_proto_msgTypes[4].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[4].OneofWrappers = []any{ (*Log_Structured)(nil), (*Log_Raw)(nil), } - file_stateful_encoding_proto_msgTypes[6].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[6].OneofWrappers = []any{ (*DynamicValue_IntValue)(nil), (*DynamicValue_FloatValue)(nil), (*DynamicValue_StringValue)(nil), (*DynamicValue_DictIndex)(nil), } - file_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ (*Datum_PatternDefine)(nil), (*Datum_PatternDelete)(nil), (*Datum_DictEntryDefine)(nil), @@ -951,18 +957,130 @@ func file_stateful_encoding_proto_init() { out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: unsafe.Slice(unsafe.StringData(file_stateful_encoding_proto_rawDesc), len(file_stateful_encoding_proto_rawDesc)), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc)), NumEnums: 1, NumMessages: 11, NumExtensions: 0, NumServices: 1, }, - GoTypes: file_stateful_encoding_proto_goTypes, - DependencyIndexes: file_stateful_encoding_proto_depIdxs, - EnumInfos: file_stateful_encoding_proto_enumTypes, - MessageInfos: file_stateful_encoding_proto_msgTypes, + GoTypes: file_datadog_stateful_stateful_encoding_proto_goTypes, + DependencyIndexes: file_datadog_stateful_stateful_encoding_proto_depIdxs, + EnumInfos: file_datadog_stateful_stateful_encoding_proto_enumTypes, + MessageInfos: file_datadog_stateful_stateful_encoding_proto_msgTypes, }.Build() - File_stateful_encoding_proto = out.File - file_stateful_encoding_proto_goTypes = nil - file_stateful_encoding_proto_depIdxs = nil + File_datadog_stateful_stateful_encoding_proto = out.File + file_datadog_stateful_stateful_encoding_proto_goTypes = nil + file_datadog_stateful_stateful_encoding_proto_depIdxs = nil +} + +// Reference imports to suppress errors if they are not otherwise used. +var _ context.Context +var _ grpc.ClientConnInterface + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +const _ = grpc.SupportPackageIsVersion6 + +// StatefulLogsServiceClient is the client API for StatefulLogsService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream. +type StatefulLogsServiceClient interface { + LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) +} + +type statefulLogsServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { + return &statefulLogsServiceClient{cc} +} + +func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) { + stream, err := c.cc.NewStream(ctx, &_StatefulLogsService_serviceDesc.Streams[0], "/intake.StatefulLogsService/LogsStream", opts...) + if err != nil { + return nil, err + } + x := &statefulLogsServiceLogsStreamClient{stream} + return x, nil +} + +type StatefulLogsService_LogsStreamClient interface { + Send(*StatefulBatch) error + Recv() (*BatchStatus, error) + grpc.ClientStream +} + +type statefulLogsServiceLogsStreamClient struct { + grpc.ClientStream +} + +func (x *statefulLogsServiceLogsStreamClient) Send(m *StatefulBatch) error { + return x.ClientStream.SendMsg(m) +} + +func (x *statefulLogsServiceLogsStreamClient) Recv() (*BatchStatus, error) { + m := new(BatchStatus) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// StatefulLogsServiceServer is the server API for StatefulLogsService service. +type StatefulLogsServiceServer interface { + LogsStream(StatefulLogsService_LogsStreamServer) error +} + +// UnimplementedStatefulLogsServiceServer can be embedded to have forward compatible implementations. +type UnimplementedStatefulLogsServiceServer struct { +} + +func (*UnimplementedStatefulLogsServiceServer) LogsStream(StatefulLogsService_LogsStreamServer) error { + return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") +} + +func RegisterStatefulLogsServiceServer(s *grpc.Server, srv StatefulLogsServiceServer) { + s.RegisterService(&_StatefulLogsService_serviceDesc, srv) +} + +func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(StatefulLogsServiceServer).LogsStream(&statefulLogsServiceLogsStreamServer{stream}) +} + +type StatefulLogsService_LogsStreamServer interface { + Send(*BatchStatus) error + Recv() (*StatefulBatch, error) + grpc.ServerStream +} + +type statefulLogsServiceLogsStreamServer struct { + grpc.ServerStream +} + +func (x *statefulLogsServiceLogsStreamServer) Send(m *BatchStatus) error { + return x.ServerStream.SendMsg(m) +} + +func (x *statefulLogsServiceLogsStreamServer) Recv() (*StatefulBatch, error) { + m := new(StatefulBatch) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +var _StatefulLogsService_serviceDesc = grpc.ServiceDesc{ + ServiceName: "intake.StatefulLogsService", + HandlerType: (*StatefulLogsServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "LogsStream", + Handler: _StatefulLogsService_LogsStream_Handler, + ServerStreams: true, + ClientStreams: true, + }, + }, + Metadata: "datadog/stateful/stateful_encoding.proto", } diff --git a/tasks/protobuf.py b/tasks/protobuf.py index 78ef3e412f06..cf1e7a3fa320 100644 --- a/tasks/protobuf.py +++ b/tasks/protobuf.py @@ -21,6 +21,7 @@ 'remoteagent': False, 'autodiscovery': False, 'trace/idx': False, + 'stateful': False, } CLI_EXTRAS = { From b26c453b42fbf7f57d7d45387e7a8dcf9aefffda Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Tue, 11 Nov 2025 17:58:40 -0500 Subject: [PATCH 12/16] lint, naming, new merging and pattern logic for regenetive merging, expose some hard rotation handle logic --- pkg/logs/patterns/automaton/tokenizer.go | 12 +- pkg/logs/patterns/automaton/tokenizer_test.go | 114 ++++- pkg/logs/patterns/automaton/trie.go | 2 +- pkg/logs/patterns/clustering/cluster.go | 437 +++++++--------- .../patterns/clustering/cluster_manager.go | 186 +++---- .../clustering/cluster_manager_test.go | 406 ++++----------- pkg/logs/patterns/clustering/cluster_test.go | 396 +++++++++----- .../patterns/clustering/merging/merging.go | 61 +-- .../clustering/merging/merging_test.go | 72 --- pkg/logs/patterns/clustering/pattern.go | 160 ++++++ pkg/logs/patterns/clustering/pattern_test.go | 481 ++++++++++++++++++ pkg/logs/patterns/token/signature.go | 10 +- pkg/logs/patterns/token/signature_test.go | 40 +- pkg/logs/patterns/token/token.go | 7 +- 14 files changed, 1393 insertions(+), 991 deletions(-) create mode 100644 pkg/logs/patterns/clustering/pattern.go create mode 100644 pkg/logs/patterns/clustering/pattern_test.go diff --git a/pkg/logs/patterns/automaton/tokenizer.go b/pkg/logs/patterns/automaton/tokenizer.go index d092b64477ae..ab9f0b514965 100644 --- a/pkg/logs/patterns/automaton/tokenizer.go +++ b/pkg/logs/patterns/automaton/tokenizer.go @@ -200,10 +200,9 @@ func (t *Tokenizer) handleWhitespaceState(char rune) bool { } // handleSpecialState processes special characters -func (t *Tokenizer) handleSpecialState(char rune) bool { - // Treat each special char as separate token - t.addToBuffer(char) - t.pos++ +func (t *Tokenizer) handleSpecialState(_ rune) bool { + // The special character is already in buffer from handleStartState + // Just create the token and reset state t.createSpecialToken() t.setState(StateStart) return true @@ -212,7 +211,7 @@ func (t *Tokenizer) handleSpecialState(char rune) bool { // classifyToken attempts to classify a single token's type using trie and terminal rules. func (t *Tokenizer) classifyToken(value string) (token.TokenType, error) { if len(value) == 0 { - return token.TokenUnknown, fmt.Errorf("cannot classify empty srting token value") + return token.TokenUnknown, fmt.Errorf("cannot classify empty string token value") } return globalTrie.Match(value), nil } @@ -295,7 +294,8 @@ func (t *Tokenizer) createNumericToken() { } func (t *Tokenizer) createWhitespaceToken() { - value := t.bufferToString() + // Normalize all whitespace (tabs, spaces, newlines, multiple spaces) to single space + value := " " // Whitespace never becomes wildcard tok := token.NewToken(token.TokenWhitespace, value, token.NotWildcard) t.tokens = append(t.tokens, tok) diff --git a/pkg/logs/patterns/automaton/tokenizer_test.go b/pkg/logs/patterns/automaton/tokenizer_test.go index 284b9200a92d..5fdf716836bb 100644 --- a/pkg/logs/patterns/automaton/tokenizer_test.go +++ b/pkg/logs/patterns/automaton/tokenizer_test.go @@ -297,9 +297,10 @@ func TestComplexLogScenarios(t *testing.T) { expected: []token.TokenType{ token.TokenWord, // Price token.TokenWhitespace, // space - token.TokenWord, // @ (with trailing space) - token.TokenWord, // $1 - token.TokenNumeric, // 0 + token.TokenWord, // @ + token.TokenWhitespace, // space + token.TokenWord, // $ + token.TokenNumeric, // 10 token.TokenWhitespace, // space token.TokenWord, // each }, @@ -316,7 +317,8 @@ func TestComplexLogScenarios(t *testing.T) { token.TokenWhitespace, // space token.TokenNumeric, // 2 token.TokenWhitespace, // space - token.TokenWord, // = (with trailing space) + token.TokenWord, // = + token.TokenWhitespace, // space token.TokenNumeric, // 5 }, }, @@ -324,9 +326,10 @@ func TestComplexLogScenarios(t *testing.T) { name: "False Positive - Phone number is not Date", input: "Phone: 123-456-7890", expected: []token.TokenType{ - token.TokenWord, // Phone - token.TokenWord, // : (with trailing space) - token.TokenNumeric, // 123-456-7890 stays numeric, not date + token.TokenWord, // Phone + token.TokenWord, // : + token.TokenWhitespace, // space + token.TokenNumeric, // 123-456-7890 stays numeric, not date }, }, } @@ -348,6 +351,103 @@ func TestComplexLogScenarios(t *testing.T) { } } +// TestWhitespaceNormalization tests that all whitespace types are normalized to single space +func TestWhitespaceNormalization(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Single space", + input: "Error: message", + expected: " ", + }, + { + name: "Tab character", + input: "Error:\tmessage", + expected: " ", + }, + { + name: "Multiple spaces", + input: "Error: message", + expected: " ", + }, + { + name: "Multiple tabs", + input: "Error:\t\tmessage", + expected: " ", + }, + { + name: "Mixed tabs and spaces", + input: "Error: \t message", + expected: " ", + }, + { + name: "Newline", + input: "Error:\nmessage", + expected: " ", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + tokenList := TokenizeString(test.input) + + // Find whitespace token + var whitespaceToken *token.Token + for i := range tokenList.Tokens { + if tokenList.Tokens[i].Type == token.TokenWhitespace { + whitespaceToken = &tokenList.Tokens[i] + break + } + } + + assert.NotNil(t, whitespaceToken, "Expected to find whitespace token") + + if whitespaceToken != nil { + assert.Equal(t, test.expected, whitespaceToken.Value, + "Whitespace should be normalized to single space") + assert.Equal(t, token.NotWildcard, whitespaceToken.Wildcard, + "Whitespace should be NotWildcard") + } + }) + } +} + +// TestWhitespaceNormalization_Signature tests if whitespace normalization would allows logs with different whitespace to merge into the same pattern +func TestWhitespaceNormalization_Signature(t *testing.T) { + // These logs differ only in whitespace - they should tokenize identically + log1 := "Error: connection failed" // single space + log2 := "Error:\tconnection failed" // tab + log3 := "Error: connection failed" // double space + + tl1 := TokenizeString(log1) + tl2 := TokenizeString(log2) + tl3 := TokenizeString(log3) + + // All should have same token count + assert.Equal(t, tl1.Length(), tl2.Length(), "Token counts should match") + assert.Equal(t, tl1.Length(), tl3.Length(), "Token counts should match") + + // All whitespace tokens should be normalized to single space + for i := 0; i < tl1.Length(); i++ { + if tl1.Tokens[i].Type == token.TokenWhitespace { + assert.Equal(t, " ", tl1.Tokens[i].Value, "Whitespace in log1 should be normalized") + assert.Equal(t, " ", tl2.Tokens[i].Value, "Whitespace in log2 should be normalized") + assert.Equal(t, " ", tl3.Tokens[i].Value, "Whitespace in log3 should be normalized") + } + } + + // Signatures should be identical + sig1 := token.NewSignature(tl1) + sig2 := token.NewSignature(tl2) + sig3 := token.NewSignature(tl3) + + assert.True(t, sig1.Equals(sig2), "Signatures should be equal after normalization") + assert.True(t, sig1.Equals(sig3), "Signatures should be equal after normalization") +} + // =============================== // Helper functions // =============================== diff --git a/pkg/logs/patterns/automaton/trie.go b/pkg/logs/patterns/automaton/trie.go index 01b7f0a9fcb2..540b73a588f3 100644 --- a/pkg/logs/patterns/automaton/trie.go +++ b/pkg/logs/patterns/automaton/trie.go @@ -32,7 +32,7 @@ var globalRuleManager *RuleManager var globalTrie *Trie // init initializes the global trie and rule manager -// todo: componentilize this eventually +// todo: componentize this eventually func init() { globalTrie = NewTrie() globalRuleManager = NewRuleManager() diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index 12be88b1ac89..f037daec8a9d 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -16,335 +16,264 @@ import ( ) // Cluster represents a group of TokenLists with identical signatures. +// A cluster may contain multiple patterns if token lists with the same signature cannot be merged since structural Fidelity is Valuable. +// Examples: +// "Status: OK" → HTTP response format +// "Status; OK" → CSV-like format +// "Status OK" → Plain text format +// These are different log formats, even if semantically similar → we need to keep them separate. type Cluster struct { - Signature token.Signature - TokenLists []*token.TokenList - Pattern *token.TokenList - WildcardMap map[int]bool - PatternID uint64 - - // Timestamp tracking for stateful encoding - CreatedAt time.Time // When pattern was first created - UpdatedAt time.Time // When pattern was last modified - LastSentAt time.Time // When we last sent this pattern to gRPC + Signature token.Signature + Patterns []*Pattern // Multiple patterns per cluster + + // Timestamp tracking for the cluster itself + CreatedAt time.Time // When cluster was first created + UpdatedAt time.Time // When cluster was last modified (any pattern changed) } // NewCluster creates a new cluster. func NewCluster(signature token.Signature, tokenList *token.TokenList) *Cluster { now := time.Now() return &Cluster{ - Signature: signature, - TokenLists: []*token.TokenList{tokenList}, - Pattern: nil, - WildcardMap: make(map[int]bool), - PatternID: 0, // Will be assigned when pattern is generated - CreatedAt: now, - UpdatedAt: now, - LastSentAt: time.Time{}, // Zero time - never sent - } -} - -// Add adds a TokenList to this cluster if it has a matching signature. -func (c *Cluster) Add(tokenList *token.TokenList) bool { - signature := token.NewSignature(tokenList) - - if !c.Signature.Equals(signature) { - return false + Signature: signature, + Patterns: nil, // Will be generated when needed + CreatedAt: now, + UpdatedAt: now, } - - c.TokenLists = append(c.TokenLists, tokenList) - - c.Pattern = nil - c.WildcardMap = make(map[int]bool) - c.UpdatedAt = time.Now() // Pattern will change when regenerated z - - return true -} - -// Size returns the number of TokenLists in this cluster. -func (c *Cluster) Size() int { - return len(c.TokenLists) } -// GeneratePattern analyzes all TokenLists in the cluster to identify wildcard positions. -// Uses intelligent mergeability logic to determine which positions can be wildcarded. -// If the cluster contains heterogeneous TokenLists that can't merge, uses the largest -// mergeable group for pattern generation. -func (c *Cluster) GeneratePattern() *token.TokenList { - if c.Pattern != nil { - return c.Pattern +// ============================================================================= +// Core Clustering Logic +// ============================================================================= + +// AddTokenListToPatterns adds a TokenList to the appropriate pattern in the cluster. +// If no matching pattern exists, creates a new one. +func (c *Cluster) AddTokenListToPatterns(tokenList *token.TokenList) *Pattern { + // Ensure patterns are generated + if len(c.Patterns) == 0 { + // No patterns yet, create first one + patternID := generatePatternID() + pattern := newPattern(tokenList, patternID) + + c.Patterns = []*Pattern{pattern} + // Update the cluster's new pattern at timestamp + c.UpdatedAt = time.Now() + return pattern } - if len(c.TokenLists) == 0 { - return nil - } - - if len(c.TokenLists) == 1 { - c.Pattern = c.TokenLists[0] - return c.Pattern - } - - // Check if cluster is heterogeneous - contains unmergeable sub-groups - groups := merging.FindMergeableGroups(c.TokenLists) - - // If we have multiple groups, the cluster is heterogeneous - // Use the largest group for pattern generation - var primaryGroup []*token.TokenList - if len(groups) > 1 { - // Find the largest group - maxSize := 0 - for _, group := range groups { - if len(group) > maxSize { - maxSize = len(group) - primaryGroup = group - } + // Try to find a matching pattern + for _, p := range c.Patterns { + // Check if this TokenList can merge with this pattern's sample + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + // Merge into existing pattern (same PatternID is preserved) + p.LogCount++ + p.UpdatedAt = time.Now() + c.UpdatedAt = time.Now() + + // Incrementally merge the new token list into the pattern template + c.regeneratePattern(p, tokenList) + return p // Return existing pattern with updated template } - // TODO: Need to handle semantic mergeability of different patterns in the group - } else { - primaryGroup = groups[0] } - // Now generate pattern from the primary group using merging logic - template := primaryGroup[0] - if template.Length() == 0 { - return nil - } - - // Start with the template - pattern := template + // No matching pattern found, create a new one + patternID := generatePatternID() + pattern := newPattern(tokenList, patternID) + c.Patterns = append(c.Patterns, pattern) + c.UpdatedAt = time.Now() + return pattern +} - // Progressively merge with each TokenList in the group - for i := 1; i < len(primaryGroup); i++ { - merged := merging.MergeTokenLists(pattern, primaryGroup[i]) - if merged != nil { - pattern = merged - } - // If merge fails (shouldn't happen since FindMergeableGroups verified it), keep current pattern +// regeneratePattern incrementally merges a new token list into the pattern. +func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) { + if p.Template == nil { + return } - // Build wildcard map and handle special path patterns - c.WildcardMap = make(map[int]bool) - patternTokens := make([]token.Token, pattern.Length()) + // Incremental merge: merge new log with existing template + merged := merging.MergeTokenLists(p.Template, newTokenList) + if merged == nil { + // Merge failed (shouldn't happen since CanMergeTokenLists passed), keep current template + return + } - for i := 0; i < pattern.Length(); i++ { - tok := pattern.Tokens[i] + p.Template = merged + p.Positions = make([]int, 0, merged.Length()) + // Build wildcard positions list + for i := 0; i < merged.Length(); i++ { + tok := merged.Tokens[i] if tok.Wildcard == token.IsWildcard { - c.WildcardMap[i] = true + p.Positions = append(p.Positions, i) // Special handling for path wildcards - if tok.Type == token.TokenAbsolutePath && len(primaryGroup) > 0 { - firstPath := primaryGroup[0].Tokens[i].Value - tok.Value = getPathPattern(firstPath) + if tok.Type == token.TokenAbsolutePath && p.Sample != nil && i < p.Sample.Length() { + firstPath := p.Sample.Tokens[i].Value + merged.Tokens[i].Value = getPathPattern(firstPath) } } - - patternTokens[i] = tok } - c.Pattern = token.NewTokenListWithTokens(patternTokens) - return c.Pattern + p.UpdatedAt = time.Now() } -// GetWildcardPositions returns wildcard token positions (indices in token array). -func (c *Cluster) GetWildcardPositions() []int { - if c.Pattern == nil { - c.GeneratePattern() - } - - var positions []int - for pos := range c.WildcardMap { - positions = append(positions, pos) - } - - return positions -} +// ============================================================================= +// Pattern Access Methods +// ============================================================================= -// GetWildcardCharPositions returns character positions where wildcards appear in the pattern string. -// This is used for stateful encoding where the intake needs to know where to insert dynamic values. -func (c *Cluster) GetWildcardCharPositions() []int { - if c.Pattern == nil { - c.GeneratePattern() +// FindMatchingPattern finds the Pattern that matches the given TokenList. +// Returns the matching Pattern, or nil if no match found. +func (c *Cluster) FindMatchingPattern(tokenList *token.TokenList) *Pattern { + // Ensure patterns are generated + if len(c.Patterns) == 0 { + return nil } - var charPositions []int - currentPos := 0 - - for _, tok := range c.Pattern.Tokens { - // Clean the token value for proper length calculation - cleaned := sanitizeForTemplate(tok.Value) - - if tok.Wildcard == token.IsWildcard { - // Record the current character position for this wildcard - charPositions = append(charPositions, currentPos) - // Wildcard is represented as "*" (1 character) - currentPos += 1 - } else if cleaned != "" { - // Add the length of the cleaned token value - currentPos += len(cleaned) + // Try to find a Pattern where the TokenList can merge + for _, p := range c.Patterns { + // Check if this TokenList can merge with the pattern's sample + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + return p } } - return charPositions + // Fallback: return most common pattern (largest group) + return c.GetMostCommonPattern() } -// HasWildcards returns true if this cluster contains wildcard positions. -func (c *Cluster) HasWildcards() bool { - if c.Pattern == nil { - c.GeneratePattern() +// GetPatternString returns a string representation of the most common pattern. +// For backward compatibility. +func (c *Cluster) GetPatternString() string { + primary := c.GetMostCommonPattern() + if primary == nil { + return "" } - - return len(c.WildcardMap) > 0 + return primary.getPatternString() } -// GetWildcardValues extracts the actual values from the most recent token list that correspond to wildcard positions -func (c *Cluster) GetWildcardValues() []string { - if c.Pattern == nil { - c.GeneratePattern() - } - - // Get the most recent token list - if len(c.TokenLists) == 0 { +// GetMostCommonPattern returns the pattern with the highest log count in this cluster. +// When a cluster contains multiple patterns (due to structural differences like special characters), +// this returns the most frequently occurring pattern, which is typically the most representative. +func (c *Cluster) GetMostCommonPattern() *Pattern { + if len(c.Patterns) == 0 { return nil } - lastTokenList := c.TokenLists[len(c.TokenLists)-1] - // Extract values at wildcard positions - var values []string - for i, tok := range c.Pattern.Tokens { - if tok.Wildcard == token.IsWildcard { - if i < len(lastTokenList.Tokens) { - values = append(values, lastTokenList.Tokens[i].Value) - } + mostCommonIdx := 0 + maxLogCount := c.Patterns[0].LogCount + for idx, p := range c.Patterns { + if p.LogCount > maxLogCount { + maxLogCount = p.LogCount + mostCommonIdx = idx } } + return c.Patterns[mostCommonIdx] +} - return values +// GetAllPatterns returns all Patterns in this cluster. +func (c *Cluster) GetAllPatterns() []*Pattern { + return c.Patterns } -// ExtractWildcardValues extracts the wildcard values from a specific TokenList -func (c *Cluster) ExtractWildcardValues(tokenList *token.TokenList) []string { - if c.Pattern == nil { - c.GeneratePattern() +// GetPatternID returns the pattern ID for the most common pattern. +// For backward compatibility. +func (c *Cluster) GetPatternID() uint64 { + primary := c.GetMostCommonPattern() + if primary == nil { + return 0 } + return primary.PatternID +} - if len(c.WildcardMap) == 0 { - return []string{} - } +// ============================================================================= +// Wildcard Methods +// ============================================================================= - var wildcardValues []string - for i := 0; i < tokenList.Length(); i++ { - if c.WildcardMap[i] { - wildcardValues = append(wildcardValues, tokenList.Tokens[i].Value) +// HasWildcards returns true if any pattern in this cluster contains wildcard positions. +func (c *Cluster) HasWildcards() bool { + for _, p := range c.Patterns { + if p.hasWildcards() { + return true } } - - return wildcardValues + return false } -// GetPatternString returns a string representation of the pattern -func (c *Cluster) GetPatternString() string { - if c.Pattern == nil { - c.GeneratePattern() +// GetWildcardPositions returns wildcard token positions for the most common pattern. +// For backward compatibility. +func (c *Cluster) GetWildcardPositions() []int { + primary := c.GetMostCommonPattern() + if primary == nil { + return nil } + return primary.getWildcardPositions() +} - if c.Pattern == nil { - return "" +// GetWildcardCharPositions returns character positions where wildcards appear in the most common pattern string. +// For backward compatibility. +func (c *Cluster) GetWildcardCharPositions() []int { + primary := c.GetMostCommonPattern() + if primary == nil { + return nil } + return primary.getWildcardCharPositions() +} - var parts []string - for _, tok := range c.Pattern.Tokens { - // Use "*" for wildcard positions, actual value otherwise - if tok.Wildcard == token.IsWildcard { - parts = append(parts, "*") - } else { - // Only use printable ASCII/UTF-8 characters in the template - cleaned := sanitizeForTemplate(tok.Value) - if cleaned != "" { - parts = append(parts, cleaned) - } - } +// GetWildcardValues extracts the actual values from the most recent token list in the most common pattern. +// For backward compatibility. +func (c *Cluster) GetWildcardValues() []string { + primary := c.GetMostCommonPattern() + if primary == nil { + return nil } - return strings.Join(parts, "") + return primary.getWildcardValues() } -// sanitizeForTemplate removes non-printable characters from template strings -func sanitizeForTemplate(s string) string { - runes := []rune(s) - result := make([]rune, 0, len(runes)) - for _, r := range runes { - // Keep only printable characters (space and above, excluding DEL) - if r >= ' ' && r != 0x7F && r < 0xFFFD { - result = append(result, r) - } +// ExtractWildcardValues extracts the wildcard values from a specific TokenList. +// Uses the matching Pattern to determine wildcard positions. +func (c *Cluster) ExtractWildcardValues(tokenList *token.TokenList) []string { + // Find the matching pattern for this TokenList + p := c.FindMatchingPattern(tokenList) + if p == nil { + return []string{} } - return string(result) + return p.extractWildcardValues(tokenList) } -// GetPatternID returns the pattern ID for this cluster -func (c *Cluster) GetPatternID() uint64 { - return c.PatternID -} +// ============================================================================= +// State Management & Metadata +// ============================================================================= -// SetPatternID sets the pattern ID for this cluster -func (c *Cluster) SetPatternID(id uint64) { - c.PatternID = id +// Size returns the total number of TokenLists across all patterns in this cluster. +func (c *Cluster) Size() int { + total := 0 + for _, p := range c.Patterns { + total += p.size() + } + return total } -// MarkAsSent updates the LastSentAt timestamp to indicate this pattern was sent to gRPC +// MarkAsSent updates the LastSentAt timestamp for all patterns. func (c *Cluster) MarkAsSent() { - c.LastSentAt = time.Now() + for _, p := range c.Patterns { + p.markAsSent() + } } -// NeedsSending returns true if this pattern has never been sent or has been updated since last sent +// NeedsSending returns true if any pattern has never been sent or has been updated since last sent. func (c *Cluster) NeedsSending() bool { - return c.LastSentAt.IsZero() || c.UpdatedAt.After(c.LastSentAt) -} - -// IsNewPattern returns true if this pattern has never been sent -func (c *Cluster) IsNewPattern() bool { - return c.LastSentAt.IsZero() -} - -// WasUpdatedSinceLastSent returns true if pattern was updated since last sent -func (c *Cluster) WasUpdatedSinceLastSent() bool { - return !c.LastSentAt.IsZero() && c.UpdatedAt.After(c.LastSentAt) -} - -// MergeTokensIfFits attempts to merge this cluster with another cluster. -// This is used for batch consolidation where clusters with the same signature -// might be further consolidated based on semantic mergeability. -func (c *Cluster) MergeTokensIfFits(other *Cluster) bool { - // Check if clusters have the same structure - if c.Signature.Position != other.Signature.Position || c.Signature.Length != other.Signature.Length { - return false - } - - // Check if tokens can be merged at each position - if len(c.TokenLists) == 0 || len(other.TokenLists) == 0 { - return false - } - - // Use the first TokenList from each cluster for comparison - tokenList1 := c.TokenLists[0] - tokenList2 := other.TokenLists[0] - - // Delegate to merging package for semantic mergeability check - if !merging.CanMergeTokenLists(tokenList1, tokenList2) { - return false + for _, p := range c.Patterns { + if p.needsSending() { + return true + } } - - // Merge is possible - add other cluster's TokenLists to this cluster - c.TokenLists = append(c.TokenLists, other.TokenLists...) - - // Invalidate pattern cache since cluster has changed - c.Pattern = nil - c.WildcardMap = make(map[int]bool) - c.UpdatedAt = time.Now() - - return true + return false } +// ============================================================================= +// Helper Functions +// ============================================================================= + // getPathPattern converts a path to hierarchical wildcard pattern func getPathPattern(path string) string { if path == "/" { diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go index 96ab52c9e5a3..30ea4f2d1893 100644 --- a/pkg/logs/patterns/clustering/cluster_manager.go +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -10,6 +10,7 @@ package clustering import ( "crypto/rand" "encoding/binary" + "sync" "time" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" @@ -29,64 +30,95 @@ const ( // ClusterManager manages the clustering of TokenLists using hash-based bucketing. type ClusterManager struct { - hashBuckets map[uint64][]*Cluster - totalTokenLists int - totalClusters int + mu sync.RWMutex + hashBuckets map[uint64][]*Cluster } // NewClusterManager creates a new ClusterManager. func NewClusterManager() *ClusterManager { return &ClusterManager{ - hashBuckets: make(map[uint64][]*Cluster), - totalTokenLists: 0, - totalClusters: 0, + hashBuckets: make(map[uint64][]*Cluster), } } // Add processes a TokenList and adds it to the appropriate cluster. -// Returns the cluster and a PatternChangeType indicating what changed. -func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Cluster, PatternChangeType) { +// Returns the pattern that was created/updated and a PatternChangeType indicating what changed. +func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChangeType) { if tokenList == nil || tokenList.IsEmpty() { return nil, PatternNoChange } + cm.mu.Lock() + defer cm.mu.Unlock() + + // Create new signature and hash it signature := token.NewSignature(tokenList) hash := signature.Hash + // Get hash bucket clusters := cm.hashBuckets[hash] + // Look for existing cluster with matching signature for _, cluster := range clusters { if cluster.Signature.Equals(signature) { - // Check if pattern will be updated - // If cluster already has a pattern and we're adding more token lists, - // the pattern might gain new wildcards - willUpdate := cluster.Size() > 1 && cluster.Pattern != nil - - cluster.Add(tokenList) - cm.totalTokenLists++ + // Track the state before adding + hadPatterns := len(cluster.Patterns) > 0 + oldPatternCount := len(cluster.Patterns) + + // Track if patterns had wildcards before + hadWildcards := false + if hadPatterns { + for _, p := range cluster.Patterns { + if p.hasWildcards() { + hadWildcards = true + break + } + } + } - if willUpdate { - return cluster, PatternUpdated + // Add to appropriate pattern within the cluster + pattern := cluster.AddTokenListToPatterns(tokenList) + + // Determine if this created a new pattern or updated an existing one + if pattern != nil { + newPatternCount := len(cluster.Patterns) + if newPatternCount > oldPatternCount { + // New pattern was created within the cluster (multi-pattern scenario) + return pattern, PatternNew + } + + // Check if wildcards were added to an existing pattern + if hadPatterns && pattern.hasWildcards() && !hadWildcards { + // Pattern gained wildcards + return pattern, PatternUpdated + } + + // If pattern already had wildcards and got more, it's also an update + if hadPatterns && hadWildcards && pattern.size() > 2 { + // Pattern structure may have changed (more wildcards) + return pattern, PatternUpdated + } } - return cluster, PatternNoChange + return pattern, PatternNoChange } } // Creating a new cluster means a new pattern newCluster := NewCluster(signature, tokenList) - newCluster.SetPatternID(generatePatternID()) + // Add the token list to create the first pattern + pattern := newCluster.AddTokenListToPatterns(tokenList) cm.hashBuckets[hash] = append(clusters, newCluster) - cm.totalTokenLists++ - cm.totalClusters++ - - return newCluster, PatternNew + return pattern, PatternNew } // GetCluster retrieves the cluster with the given signature. func (cm *ClusterManager) GetCluster(signature token.Signature) *Cluster { hash := signature.Hash + cm.mu.RLock() + defer cm.mu.RUnlock() + clusters, exists := cm.hashBuckets[hash] if !exists { return nil @@ -101,108 +133,32 @@ func (cm *ClusterManager) GetCluster(signature token.Signature) *Cluster { return nil } -// GetClustersWithPatterns returns all clusters that have patterns defined. -// This is useful for re-sending pattern state after stream rotation. -func (cm *ClusterManager) GetClustersWithPatterns() []*Cluster { - var clustersWithPatterns []*Cluster +// GetAllPatterns returns all patterns across all clusters. +// This is useful for re-sending pattern state after stream hard rotation or shutdown. +// Patterns are returned in no particular order since we are resending all patterns. +// Quite expensive for now, might need to be optimized later. +func (cm *ClusterManager) GetAllPatterns() []*Pattern { + var allPatterns []*Pattern + + cm.mu.RLock() + defer cm.mu.RUnlock() + // Iterate through all clusters in all hash buckets for _, clusters := range cm.hashBuckets { for _, cluster := range clusters { - // Only include clusters with actual patterns - if cluster.Pattern != nil { - clustersWithPatterns = append(clustersWithPatterns, cluster) - } + // Collect all patterns from this cluster + allPatterns = append(allPatterns, cluster.Patterns...) } } - return clustersWithPatterns + return allPatterns } -// Clear removes all clusters and resets statistics. +// Clear removes all clusters. func (cm *ClusterManager) Clear() { + cm.mu.Lock() + defer cm.mu.Unlock() cm.hashBuckets = make(map[uint64][]*Cluster) - cm.totalTokenLists = 0 - cm.totalClusters = 0 -} - -// GetAllClusters returns all clusters in the manager. -func (cm *ClusterManager) GetAllClusters() []*Cluster { - var allClusters []*Cluster - - for _, clusters := range cm.hashBuckets { - allClusters = append(allClusters, clusters...) - } - - return allClusters -} - -// GetClustersByLength returns clusters by length. -func (cm *ClusterManager) GetClustersByLength(length int) []*Cluster { - var result []*Cluster - - for _, clusters := range cm.hashBuckets { - for _, cluster := range clusters { - if cluster.Signature.Length == length { - result = append(result, cluster) - } - } - } - - return result -} - -// GetClustersByHash returns clusters by hash. -func (cm *ClusterManager) GetClustersByHash(hash uint64) []*Cluster { - if clusters, exists := cm.hashBuckets[hash]; exists { - result := make([]*Cluster, len(clusters)) - copy(result, clusters) - return result - } - - return []*Cluster{} -} - -// Stats returns statistics about the clustering. -type ClusterStats struct { - TotalTokenLists int - TotalClusters int - HashBuckets int - AverageClusterSize float64 -} - -// GetStats returns current clustering statistics. -func (cm *ClusterManager) GetStats() ClusterStats { - avgSize := 0.0 - if cm.totalClusters > 0 { - avgSize = float64(cm.totalTokenLists) / float64(cm.totalClusters) - } - - return ClusterStats{ - TotalTokenLists: cm.totalTokenLists, - TotalClusters: cm.totalClusters, - HashBuckets: len(cm.hashBuckets), - AverageClusterSize: avgSize, - } -} - -// GetLargestClusters returns the N largest clusters. -func (cm *ClusterManager) GetLargestClusters(n int) []*Cluster { - allClusters := cm.GetAllClusters() - - // Simple bubble sort for small N - for i := 0; i < len(allClusters)-1; i++ { - for j := 0; j < len(allClusters)-i-1; j++ { - if allClusters[j].Size() < allClusters[j+1].Size() { - allClusters[j], allClusters[j+1] = allClusters[j+1], allClusters[j] - } - } - } - - if n > len(allClusters) { - n = len(allClusters) - } - - return allClusters[:n] } // generatePatternID generates a unique pattern ID diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go index 0ade17a247a6..60016e92f7d9 100644 --- a/pkg/logs/patterns/clustering/cluster_manager_test.go +++ b/pkg/logs/patterns/clustering/cluster_manager_test.go @@ -8,20 +8,15 @@ package clustering import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) func TestClusterManager_NewClusterManager(t *testing.T) { cm := NewClusterManager() - if cm == nil { - t.Fatal("ClusterManager should not be nil") - } - - stats := cm.GetStats() - if stats.TotalTokenLists != 0 || stats.TotalClusters != 0 || stats.HashBuckets != 0 { - t.Error("New ClusterManager should have zero stats") - } + assert.NotNil(t, cm, "ClusterManager should not be nil") } func TestClusterManager_Add_NewCluster(t *testing.T) { @@ -29,31 +24,17 @@ func TestClusterManager_Add_NewCluster(t *testing.T) { // Create TokenList tokens := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } tokenList := token.NewTokenListWithTokens(tokens) - cluster, changeType := cm.Add(tokenList) + pattern, changeType := cm.Add(tokenList) - if cluster == nil { - t.Fatal("Should return a cluster") - } - - if cluster.Size() != 1 { - t.Errorf("Cluster should have size 1, got %d", cluster.Size()) - } - - if changeType != PatternNew { - t.Errorf("Expected PatternNew for first add, got %v", changeType) - } - - stats := cm.GetStats() - if stats.TotalTokenLists != 1 || stats.TotalClusters != 1 { - t.Errorf("Expected 1 TokenList and 1 cluster, got %d TokenLists and %d clusters", - stats.TotalTokenLists, stats.TotalClusters) - } + assert.NotNil(t, pattern, "Should return a pattern") + assert.Equal(t, 1, pattern.LogCount, "Pattern should have log count 1") + assert.Equal(t, PatternNew, changeType, "Expected PatternNew for first add") } func TestClusterManager_Add_ExistingCluster(t *testing.T) { @@ -61,12 +42,12 @@ func TestClusterManager_Add_ExistingCluster(t *testing.T) { // Create two TokenLists with same signature tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, // Different value, same type + {Value: "POST", Type: token.TokenHTTPMethod}, // Different value, same type {Value: " ", Type: token.TokenWhitespace}, {Value: "/users", Type: token.TokenAbsolutePath}, // Different value, same type } @@ -74,31 +55,16 @@ func TestClusterManager_Add_ExistingCluster(t *testing.T) { tokenList1 := token.NewTokenListWithTokens(tokens1) tokenList2 := token.NewTokenListWithTokens(tokens2) - cluster1, changeType1 := cm.Add(tokenList1) - cluster2, changeType2 := cm.Add(tokenList2) - - // Should be the same cluster - if cluster1 != cluster2 { - t.Error("TokenLists with same signature should go to same cluster") - } - - if cluster1.Size() != 2 { - t.Errorf("Cluster should have size 2, got %d", cluster1.Size()) - } - - if changeType1 != PatternNew { - t.Errorf("Expected PatternNew for first add, got %v", changeType1) - } + pattern1, changeType1 := cm.Add(tokenList1) + pattern2, changeType2 := cm.Add(tokenList2) - if changeType2 != PatternNoChange { - t.Errorf("Expected PatternNoChange for second add to same cluster, got %v", changeType2) - } + // Should be the same pattern (same cluster, merged together) + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with same signature should merge into same pattern") + assert.Equal(t, 2, pattern2.LogCount, "Pattern should have log count 2") + assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add") - stats := cm.GetStats() - if stats.TotalTokenLists != 2 || stats.TotalClusters != 1 { - t.Errorf("Expected 2 TokenLists and 1 cluster, got %d TokenLists and %d clusters", - stats.TotalTokenLists, stats.TotalClusters) - } + // With eager pattern generation, adding the second token list creates wildcards (pattern update) + assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)") } func TestClusterManager_Add_DifferentSignatures(t *testing.T) { @@ -106,7 +72,7 @@ func TestClusterManager_Add_DifferentSignatures(t *testing.T) { // Create TokenLists with different signatures tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } @@ -119,19 +85,11 @@ func TestClusterManager_Add_DifferentSignatures(t *testing.T) { tokenList1 := token.NewTokenListWithTokens(tokens1) tokenList2 := token.NewTokenListWithTokens(tokens2) - cluster1, _ := cm.Add(tokenList1) - cluster2, _ := cm.Add(tokenList2) + pattern1, _ := cm.Add(tokenList1) + pattern2, _ := cm.Add(tokenList2) - // Should be different clusters - if cluster1 == cluster2 { - t.Error("TokenLists with different signatures should go to different clusters") - } - - stats := cm.GetStats() - if stats.TotalTokenLists != 2 || stats.TotalClusters != 2 { - t.Errorf("Expected 2 TokenLists and 2 clusters, got %d TokenLists and %d clusters", - stats.TotalTokenLists, stats.TotalClusters) - } + // Should be different patterns (different clusters) + assert.NotEqual(t, pattern1.PatternID, pattern2.PatternID, "TokenLists with different signatures should create different patterns") } func TestClusterManager_GetCluster(t *testing.T) { @@ -139,21 +97,21 @@ func TestClusterManager_GetCluster(t *testing.T) { // Create and add TokenList tokens := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } tokenList := token.NewTokenListWithTokens(tokens) signature := token.NewSignature(tokenList) - addedCluster, _ := cm.Add(tokenList) + addedPattern, _ := cm.Add(tokenList) // Retrieve cluster by signature retrievedCluster := cm.GetCluster(signature) - if retrievedCluster != addedCluster { - t.Error("Retrieved cluster should be the same as added cluster") - } + assert.NotNil(t, retrievedCluster, "Should retrieve cluster by signature") + assert.Equal(t, 1, len(retrievedCluster.Patterns), "Cluster should have 1 pattern") + assert.Equal(t, addedPattern.PatternID, retrievedCluster.Patterns[0].PatternID, "Pattern IDs should match") // Try to get non-existent cluster differentTokens := []token.Token{ @@ -165,158 +123,7 @@ func TestClusterManager_GetCluster(t *testing.T) { differentSignature := token.NewSignature(differentTokenList) nonExistentCluster := cm.GetCluster(differentSignature) - if nonExistentCluster != nil { - t.Error("Should return nil for non-existent cluster") - } -} - -func TestClusterManager_GetAllClusters(t *testing.T) { - cm := NewClusterManager() - - // Add multiple clusters - tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "/api", Type: token.TokenAbsolutePath}, - } - tokens2 := []token.Token{ - {Value: "ERROR", Type: token.TokenSeverityLevel}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "failed", Type: token.TokenWord}, - } - tokens3 := []token.Token{ - {Value: "192.168.1.1", Type: token.TokenIPv4}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "connected", Type: token.TokenWord}, - } - - tokenList1 := token.NewTokenListWithTokens(tokens1) - tokenList2 := token.NewTokenListWithTokens(tokens2) - tokenList3 := token.NewTokenListWithTokens(tokens3) - - cm.Add(tokenList1) - cm.Add(tokenList2) - cm.Add(tokenList3) - - allClusters := cm.GetAllClusters() - - if len(allClusters) != 3 { - t.Errorf("Expected 3 clusters, got %d", len(allClusters)) - } -} - -func TestClusterManager_GetClustersByLength(t *testing.T) { - cm := NewClusterManager() - - // Add TokenLists of different lengths with different signatures - tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - } // Length 2 - - tokens2 := []token.Token{ - {Value: "ERROR", Type: token.TokenSeverityLevel}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "failed", Type: token.TokenWord}, - } // Length 3 - - tokens3 := []token.Token{ - {Value: "192.168.1.1", Type: token.TokenIPv4}, - {Value: " ", Type: token.TokenWhitespace}, - } // Length 2 (different signature than tokens1) - - tokenList1 := token.NewTokenListWithTokens(tokens1) - tokenList2 := token.NewTokenListWithTokens(tokens2) - tokenList3 := token.NewTokenListWithTokens(tokens3) - - cm.Add(tokenList1) - cm.Add(tokenList2) - cm.Add(tokenList3) - - // Get clusters of length 2 - should have 2 different clusters - length2Clusters := cm.GetClustersByLength(2) - if len(length2Clusters) != 2 { - t.Errorf("Expected 2 clusters of length 2, got %d", len(length2Clusters)) - } - - // Get clusters of length 3 - length3Clusters := cm.GetClustersByLength(3) - if len(length3Clusters) != 1 { - t.Errorf("Expected 1 cluster of length 3, got %d", len(length3Clusters)) - } - - // Get clusters of non-existent length - length5Clusters := cm.GetClustersByLength(5) - if len(length5Clusters) != 0 { - t.Errorf("Expected 0 clusters of length 5, got %d", len(length5Clusters)) - } -} - -func TestClusterManager_GetLargestClusters(t *testing.T) { - cm := NewClusterManager() - - // Create clusters of different sizes - // Cluster 1: size 3 - tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "/api", Type: token.TokenAbsolutePath}, - } - tokenList1a := token.NewTokenListWithTokens(tokens1) - tokenList1b := token.NewTokenListWithTokens([]token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "/users", Type: token.TokenAbsolutePath}, - }) - tokenList1c := token.NewTokenListWithTokens([]token.Token{ - {Value: "PUT", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "/items", Type: token.TokenAbsolutePath}, - }) - - // Cluster 2: size 1 - tokens2 := []token.Token{ - {Value: "ERROR", Type: token.TokenSeverityLevel}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "failed", Type: token.TokenWord}, - } - tokenList2 := token.NewTokenListWithTokens(tokens2) - - // Cluster 3: size 2 - tokens3 := []token.Token{ - {Value: "192.168.1.1", Type: token.TokenIPv4}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "connected", Type: token.TokenWord}, - } - tokenList3a := token.NewTokenListWithTokens(tokens3) - tokenList3b := token.NewTokenListWithTokens([]token.Token{ - {Value: "10.0.0.1", Type: token.TokenIPv4}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "disconnected", Type: token.TokenWord}, - }) - - cm.Add(tokenList1a) - cm.Add(tokenList1b) - cm.Add(tokenList1c) - cm.Add(tokenList2) - cm.Add(tokenList3a) - cm.Add(tokenList3b) - - // Get top 2 largest clusters - largest := cm.GetLargestClusters(2) - - if len(largest) != 2 { - t.Errorf("Expected 2 largest clusters, got %d", len(largest)) - } - - // Should be ordered by size (largest first) - if largest[0].Size() != 3 { - t.Errorf("Largest cluster should have size 3, got %d", largest[0].Size()) - } - - if largest[1].Size() != 2 { - t.Errorf("Second largest cluster should have size 2, got %d", largest[1].Size()) - } + assert.Nil(t, nonExistentCluster, "Should return nil for non-existent cluster") } func TestClusterManager_Clear(t *testing.T) { @@ -324,80 +131,70 @@ func TestClusterManager_Clear(t *testing.T) { // Add some data tokens := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) + cm.Add(tokenList) // Verify data exists - stats := cm.GetStats() - if stats.TotalTokenLists == 0 || stats.TotalClusters == 0 { - t.Error("Should have data before clear") - } + assert.NotNil(t, cm.GetCluster(signature), "Should have cluster before clear") // Clear cm.Clear() // Verify data is gone - stats = cm.GetStats() - if stats.TotalTokenLists != 0 || stats.TotalClusters != 0 || stats.HashBuckets != 0 { - t.Error("Should have no data after clear") - } - - allClusters := cm.GetAllClusters() - if len(allClusters) != 0 { - t.Error("Should have no clusters after clear") - } + assert.Nil(t, cm.GetCluster(signature), "Should have no cluster after clear") } -func TestClusterManager_Stats(t *testing.T) { +func TestClusterManager_GetAllPatterns(t *testing.T) { cm := NewClusterManager() - // Add TokenLists to create clusters of different sizes + // Initially empty + patterns := cm.GetAllPatterns() + assert.Equal(t, 0, len(patterns), "Should have no patterns initially") + + // Add pattern 1 (signature 1) tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } + pattern1, _ := cm.Add(token.NewTokenListWithTokens(tokens1)) + + // Add pattern 2 (same signature, should merge into pattern 1) tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, + {Value: "POST", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/users", Type: token.TokenAbsolutePath}, } + pattern2, _ := cm.Add(token.NewTokenListWithTokens(tokens2)) + + // Add pattern 3 (different signature) tokens3 := []token.Token{ {Value: "ERROR", Type: token.TokenSeverityLevel}, {Value: " ", Type: token.TokenWhitespace}, {Value: "failed", Type: token.TokenWord}, } + pattern3, _ := cm.Add(token.NewTokenListWithTokens(tokens3)) - tokenList1 := token.NewTokenListWithTokens(tokens1) - tokenList2 := token.NewTokenListWithTokens(tokens2) - tokenList3 := token.NewTokenListWithTokens(tokens3) - - cm.Add(tokenList1) - cm.Add(tokenList2) // Same cluster as tokenList1 - cm.Add(tokenList3) // Different cluster - - stats := cm.GetStats() - - if stats.TotalTokenLists != 3 { - t.Errorf("Expected 3 total TokenLists, got %d", stats.TotalTokenLists) - } - - if stats.TotalClusters != 2 { - t.Errorf("Expected 2 total clusters, got %d", stats.TotalClusters) - } + // Get all patterns + allPatterns := cm.GetAllPatterns() - expectedAvg := 3.0 / 2.0 // 3 TokenLists / 2 clusters - if stats.AverageClusterSize != expectedAvg { - t.Errorf("Expected average cluster size %.2f, got %.2f", expectedAvg, stats.AverageClusterSize) - } + // Should have 2 patterns: pattern1 (merged with pattern2) and pattern3 + assert.Equal(t, 2, len(allPatterns), "Should have 2 patterns total") - if stats.HashBuckets == 0 { - t.Error("Should have at least one hash bucket") + // Verify we have both pattern IDs + patternIDs := make(map[uint64]bool) + for _, p := range allPatterns { + patternIDs[p.PatternID] = true } + assert.True(t, patternIDs[pattern1.PatternID], "Should include pattern 1") + assert.True(t, patternIDs[pattern3.PatternID], "Should include pattern 3") + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Pattern 1 and 2 should be the same (merged)") } func TestClusterManager_PatternChangeType(t *testing.T) { @@ -405,22 +202,22 @@ func TestClusterManager_PatternChangeType(t *testing.T) { // Create token lists with same signature (HTTP method, space, path) tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api/users", Type: token.TokenAbsolutePath}, } tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, + {Value: "POST", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api/orders", Type: token.TokenAbsolutePath}, } tokens3 := []token.Token{ - {Value: "PUT", Type: token.TokenHttpMethod}, + {Value: "PUT", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api/items", Type: token.TokenAbsolutePath}, } tokens4 := []token.Token{ - {Value: "DELETE", Type: token.TokenHttpMethod}, + {Value: "DELETE", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api/products", Type: token.TokenAbsolutePath}, } @@ -431,59 +228,32 @@ func TestClusterManager_PatternChangeType(t *testing.T) { tokenList4 := token.NewTokenListWithTokens(tokens4) // First add - should create a new pattern - cluster1, changeType1 := cm.Add(tokenList1) - if changeType1 != PatternNew { - t.Errorf("Expected PatternNew for first add, got %v", changeType1) - } - t.Logf("āœ… Add #1: PatternNew (created cluster with PatternID=%d)", cluster1.GetPatternID()) - - // Second add - same signature, but pattern not yet generated, so no change - cluster2, changeType2 := cm.Add(tokenList2) - if changeType2 != PatternNoChange { - t.Errorf("Expected PatternNoChange for second add, got %v", changeType2) - } - if cluster1 != cluster2 { - t.Error("Should return same cluster for same signature") - } - t.Logf("āœ… Add #2: PatternNoChange (added to existing cluster, size=%d)", cluster2.Size()) - - // Generate pattern to set up for PatternUpdated - pattern := cluster2.GeneratePattern() - if pattern == nil { - t.Fatal("Pattern should be generated") - } - t.Logf(" Pattern after 2 logs: '%s'", cluster2.GetPatternString()) - - // Third add - pattern exists, so it will be updated - cluster3, changeType3 := cm.Add(tokenList3) - if changeType3 != PatternUpdated { - t.Errorf("Expected PatternUpdated for third add (pattern exists), got %v", changeType3) - } - if cluster1 != cluster3 { - t.Error("Should return same cluster for same signature") - } - t.Logf("āœ… Add #3: PatternUpdated (pattern will change, size=%d)", cluster3.Size()) - - // Regenerate pattern to see the change - newPattern := cluster3.GeneratePattern() - if newPattern == nil { - t.Fatal("Pattern should be regenerated") - } - t.Logf(" Pattern after 3 logs: '%s'", cluster3.GetPatternString()) + pattern1, changeType1 := cm.Add(tokenList1) + assert.Equal(t, PatternNew, changeType1, "Expected PatternNew for first add") + t.Logf("āœ… Add #1: PatternNew (created pattern with PatternID=%d)", pattern1.PatternID) + + // Second add - same signature, adding to existing pattern creates wildcards (pattern update) + pattern2, changeType2 := cm.Add(tokenList2) + assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)") + assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Should return same pattern for same signature") + t.Logf("āœ… Add #2: PatternUpdated (wildcards created, logCount=%d)", pattern2.LogCount) + t.Logf(" Pattern after 2 logs: '%s'", pattern2.getPatternString()) + + // Third add - pattern exists and will gain more wildcards + pattern3, changeType3 := cm.Add(tokenList3) + assert.Equal(t, PatternUpdated, changeType3, "Expected PatternUpdated for third add") + assert.Equal(t, pattern1.PatternID, pattern3.PatternID, "Should return same pattern for same signature") + t.Logf("āœ… Add #3: PatternUpdated (pattern updated, logCount=%d)", pattern3.LogCount) + t.Logf(" Pattern after 3 logs: '%s'", pattern3.getPatternString()) // Fourth add - pattern exists, so updated again - cluster4, changeType4 := cm.Add(tokenList4) - if changeType4 != PatternUpdated { - t.Errorf("Expected PatternUpdated for fourth add (pattern exists), got %v", changeType4) - } - t.Logf("āœ… Add #4: PatternUpdated (pattern will change, size=%d)", cluster4.Size()) + pattern4, changeType4 := cm.Add(tokenList4) + assert.Equal(t, PatternUpdated, changeType4, "Expected PatternUpdated for fourth add (pattern exists)") + t.Logf("āœ… Add #4: PatternUpdated (pattern will change, logCount=%d)", pattern4.LogCount) - // Final pattern - cluster4.GeneratePattern() - t.Logf(" Final pattern after 4 logs: '%s'", cluster4.GetPatternString()) + // Final pattern (eagerly generated by Add) + t.Logf(" Final pattern after 4 logs: '%s'", pattern4.getPatternString()) - // Verify all returned the same cluster - if cluster1.Size() != 4 { - t.Errorf("Expected cluster size 4, got %d", cluster1.Size()) - } + // Verify all returned the same pattern + assert.Equal(t, 4, pattern4.LogCount, "Expected pattern log count 4") } diff --git a/pkg/logs/patterns/clustering/cluster_test.go b/pkg/logs/patterns/clustering/cluster_test.go index f6152bc46067..79323f7fab6e 100644 --- a/pkg/logs/patterns/clustering/cluster_test.go +++ b/pkg/logs/patterns/clustering/cluster_test.go @@ -8,13 +8,15 @@ package clustering import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) func TestCluster_NewCluster(t *testing.T) { // Create a simple TokenList tokens := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } @@ -23,23 +25,15 @@ func TestCluster_NewCluster(t *testing.T) { cluster := NewCluster(signature, tokenList) - if cluster.Size() != 1 { - t.Errorf("Expected cluster size 1, got %d", cluster.Size()) - } - - if !cluster.Signature.Equals(signature) { - t.Error("Cluster signature doesn't match expected signature") - } - - if cluster.Pattern != nil { - t.Error("Pattern should be nil initially (computed lazily)") - } + assert.Equal(t, 0, cluster.Size(), "Expected cluster size 0 initially") + assert.True(t, cluster.Signature.Equals(signature), "Cluster signature doesn't match expected signature") + assert.Empty(t, cluster.Patterns, "Patterns should be empty initially (computed lazily)") } -func TestCluster_Add(t *testing.T) { +func TestCluster_AddTokenListToPatterns(t *testing.T) { // Create first TokenList tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "GET", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/api", Type: token.TokenAbsolutePath}, } @@ -47,210 +41,330 @@ func TestCluster_Add(t *testing.T) { signature1 := token.NewSignature(tokenList1) cluster := NewCluster(signature1, tokenList1) + cluster.AddTokenListToPatterns(tokenList1) + + assert.Equal(t, 1, cluster.Size(), "Expected initial cluster size 1") // Create second TokenList with same signature but different values tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, + {Value: "POST", Type: token.TokenHTTPMethod}, {Value: " ", Type: token.TokenWhitespace}, {Value: "/users", Type: token.TokenAbsolutePath}, } tokenList2 := token.NewTokenListWithTokens(tokens2) - // Should add successfully (same signature) - if !cluster.Add(tokenList2) { - t.Error("Failed to add TokenList with matching signature") - } + // Add tokenList with matching signature + cluster.AddTokenListToPatterns(tokenList2) - if cluster.Size() != 2 { - t.Errorf("Expected cluster size 2, got %d", cluster.Size()) - } + assert.Equal(t, 2, cluster.Size(), "Expected cluster size 2 after adding") + assert.NotEmpty(t, cluster.Patterns, "Expected patterns to exist after adding TokenLists") +} - // Create third TokenList with different signature - tokens3 := []token.Token{ +func TestCluster_SinglePattern_SingleLog(t *testing.T) { + // When a cluster has only one log, it creates one pattern with no wildcards + tokens := []token.Token{ {Value: "ERROR", Type: token.TokenSeverityLevel}, {Value: " ", Type: token.TokenWhitespace}, {Value: "failed", Type: token.TokenWord}, } - tokenList3 := token.NewTokenListWithTokens(tokens3) + tokenList := token.NewTokenListWithTokens(tokens) + signature := token.NewSignature(tokenList) - // Should fail to add (different signature) - if cluster.Add(tokenList3) { - t.Error("Should not add TokenList with different signature") - } + cluster := NewCluster(signature, tokenList) + cluster.AddTokenListToPatterns(tokenList) + + // Should have exactly one pattern (which is also the primary) + assert.Equal(t, 1, len(cluster.Patterns), "Should have exactly one pattern") + + mostCommon := cluster.GetMostCommonPattern() + assert.NotNil(t, mostCommon, "Most common pattern should not be nil") + + pattern := mostCommon.Template + assert.NotNil(t, pattern, "Pattern template should not be nil") + assert.False(t, cluster.HasWildcards(), "Single log should not have wildcards") + assert.Equal(t, tokenList.Length(), pattern.Length(), "Pattern length should match original TokenList") - if cluster.Size() != 2 { - t.Errorf("Expected cluster size to remain 2, got %d", cluster.Size()) + for i, tok := range pattern.Tokens { + assert.Equal(t, tokenList.Tokens[i].Value, tok.Value, + "Pattern token %d value mismatch", i) } } -func TestCluster_GeneratePattern_NoWildcards(t *testing.T) { - // Create cluster with identical TokenLists - tokens := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, +func TestCluster_MultiplePatterns_SpecialCharVariation(t *testing.T) { + // This is the key test for multi-pattern clusters! + // TokenLists with same signature but different special characters should create multiple patterns + // Note: Whitespace variations now merge (normalized to single space) + + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word|Word|Word", + Length: 6, + Hash: 1234, + } + + cluster := NewCluster(signature, nil) + + // Create TokenLists with different special characters (cannot merge - structural difference) + tokens1 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Protected first word + {Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Colon {Value: " ", Type: token.TokenWhitespace}, - {Value: "/api", Type: token.TokenAbsolutePath}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ";", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Semicolon - DIFFERENT! + {Value: " ", Type: token.TokenWhitespace}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } - tokenList1 := token.NewTokenListWithTokens(tokens) - tokenList2 := token.NewTokenListWithTokens(tokens) // Identical + tokens3 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord, Wildcard: token.NotWildcard}, // Colon - matches tokens1 + {Value: " ", Type: token.TokenWhitespace}, + {Value: "database", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "error", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + tokenList3 := token.NewTokenListWithTokens(tokens3) - cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) - cluster.Add(tokenList2) + cluster.AddTokenListToPatterns(tokenList1) + cluster.AddTokenListToPatterns(tokenList2) // Different special char - new pattern + cluster.AddTokenListToPatterns(tokenList3) // Same special char as tokens1 - same pattern - pattern := cluster.GeneratePattern() + // Should have 2 patterns (one for colon, one for semicolon) + assert.Len(t, cluster.Patterns, 2, "Expected 2 patterns due to special character variation") - if pattern == nil { - t.Fatal("Pattern should not be nil") - } + // Verify pattern sizes + pattern1Size := cluster.Patterns[0].size() + pattern2Size := cluster.Patterns[1].size() - // Should have no wildcards since all values are identical - if cluster.HasWildcards() { - t.Error("Should not have wildcards for identical TokenLists") - } + // One pattern should have 2 token lists, the other should have 1 + validSizes := (pattern1Size == 2 && pattern2Size == 1) || (pattern1Size == 1 && pattern2Size == 2) + assert.True(t, validSizes, "Expected pattern sizes [2, 1], got [%d, %d]", pattern1Size, pattern2Size) - // Pattern should match original tokens - if pattern.Length() != 3 { - t.Errorf("Expected pattern length 3, got %d", pattern.Length()) - } + t.Logf("āœ… Multi-pattern cluster created: %d patterns", len(cluster.Patterns)) + t.Logf(" Pattern 1: %d token lists", cluster.Patterns[0].size()) + t.Logf(" Pattern 2: %d token lists", cluster.Patterns[1].size()) +} - if pattern.Tokens[0].Value != "GET" { - t.Errorf("Expected first token 'GET', got '%s'", pattern.Tokens[0].Value) +func TestCluster_FindMatchingPattern(t *testing.T) { + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word", + Length: 4, + Hash: 5678, } -} -func TestCluster_GeneratePattern_WithWildcards(t *testing.T) { - // Create cluster with different values at some positions + cluster := NewCluster(signature, nil) + tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, {Value: " ", Type: token.TokenWhitespace}, - {Value: "/api", Type: token.TokenAbsolutePath}, + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, // Different value - {Value: " ", Type: token.TokenWhitespace}, // Same value - {Value: "/users", Type: token.TokenAbsolutePath}, // Different value + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, // Different whitespace + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } tokenList1 := token.NewTokenListWithTokens(tokens1) tokenList2 := token.NewTokenListWithTokens(tokens2) - cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) - cluster.Add(tokenList2) - - pattern := cluster.GeneratePattern() + pattern1 := cluster.AddTokenListToPatterns(tokenList1) + pattern2 := cluster.AddTokenListToPatterns(tokenList2) - if pattern == nil { - t.Fatal("Pattern should not be nil") - } + // Should return different patterns + assert.NotEqual(t, pattern1, pattern2, "Should create different patterns for different whitespace") - // Should have wildcards at positions 0 and 2 - if !cluster.HasWildcards() { - t.Error("Should have wildcards for different values") - } + // FindMatchingPattern should return the correct pattern for each token list + found1 := cluster.FindMatchingPattern(tokenList1) + found2 := cluster.FindMatchingPattern(tokenList2) - wildcardPositions := cluster.GetWildcardPositions() - expectedPositions := map[int]bool{0: true, 2: true} + assert.Equal(t, pattern1, found1, "Should find the first pattern for tokenList1") + assert.Equal(t, pattern2, found2, "Should find the second pattern for tokenList2") +} - if len(wildcardPositions) != 2 { - t.Errorf("Expected 2 wildcard positions, got %d", len(wildcardPositions)) +func TestCluster_GetMostCommonPattern(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 9999, } - for _, pos := range wildcardPositions { - if !expectedPositions[pos] { - t.Errorf("Unexpected wildcard position: %d", pos) + cluster := NewCluster(signature, nil) + + // Add multiple token lists that split into different patterns + // Pattern 1: 3 logs (should be most common) + for i := 0; i < 3; i++ { + tokens := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } + tokenList := token.NewTokenListWithTokens(tokens) + cluster.AddTokenListToPatterns(tokenList) } - // Check pattern tokens - // Position 0: Wildcard token (empty value, Wildcard field indicates status) - if pattern.Tokens[0].Wildcard != token.IsWildcard { - t.Error("Position 0 should be a wildcard") + // Pattern 2: 1 log (less common) + tokens2 := []token.Token{ + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different whitespace + {Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } + tokenList2 := token.NewTokenListWithTokens(tokens2) + cluster.AddTokenListToPatterns(tokenList2) - if pattern.Tokens[1].Value != " " || pattern.Tokens[1].Wildcard == token.IsWildcard { - t.Error("Position 1 should not be a wildcard") + mostCommon := cluster.GetMostCommonPattern() + assert.NotNil(t, mostCommon, "Most common pattern should not be nil") + assert.Equal(t, 3, mostCommon.LogCount, "Most common pattern should have 3 logs") +} + +func TestCluster_GetAllPatterns(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Numeric", + Length: 3, + Hash: 1111, } - // Position 2: Path wildcard (special case - value is set to path pattern) - if pattern.Tokens[2].Value != "/*" || pattern.Tokens[2].Wildcard != token.IsWildcard { - t.Error("Position 2 should be a wildcard with path pattern") + cluster := NewCluster(signature, nil) + + // Create 3 different patterns via whitespace variation + tokens1 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "42", Type: token.TokenNumeric}, + } + tokens2 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "100", Type: token.TokenNumeric}, + } + tokens3 := []token.Token{ + {Value: "Count", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "200", Type: token.TokenNumeric}, } + + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens3)) + + allPatterns := cluster.GetAllPatterns() + assert.Len(t, allPatterns, 3, "Expected 3 patterns") } -func TestCluster_GeneratePattern_SingleTokenList(t *testing.T) { - // Create cluster with single TokenList - tokens := []token.Token{ - {Value: "ERROR", Type: token.TokenSeverityLevel}, +func TestCluster_ExtractWildcardValues_MultiPattern(t *testing.T) { + signature := token.Signature{ + Position: "Error|Word|Whitespace|Word", + Length: 4, + Hash: 2222, + } + + cluster := NewCluster(signature, nil) + + tokens1 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, {Value: " ", Type: token.TokenWhitespace}, - {Value: "failed", Type: token.TokenWord}, + {Value: "connection", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + tokens2 := []token.Token{ + {Value: "Error", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: ":", Type: token.TokenWord}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "timeout", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } - tokenList := token.NewTokenListWithTokens(tokens) - cluster := NewCluster(token.NewSignature(tokenList), tokenList) - pattern := cluster.GeneratePattern() + tokenList1 := token.NewTokenListWithTokens(tokens1) + tokenList2 := token.NewTokenListWithTokens(tokens2) + + cluster.AddTokenListToPatterns(tokenList1) + cluster.AddTokenListToPatterns(tokenList2) + + // Both should merge into same pattern + // Extract wildcard values from tokenList2 + values := cluster.ExtractWildcardValues(tokenList2) - if pattern == nil { - t.Fatal("Pattern should not be nil") + // Should have one wildcard value for the last word token + assert.Len(t, values, 1, "Expected 1 wildcard value") + if len(values) > 0 { + assert.Equal(t, "timeout", values[0], "Expected wildcard value 'timeout'") } +} - // Single TokenList should have no wildcards - if cluster.HasWildcards() { - t.Error("Single TokenList should not have wildcards") +func TestCluster_Size_MultiPattern(t *testing.T) { + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 3333, } - // Pattern should be identical to original - if pattern.Length() != tokenList.Length() { - t.Error("Pattern length should match original TokenList") + cluster := NewCluster(signature, nil) + + // Add 2 token lists to pattern 1 + for i := 0; i < 2; i++ { + tokens := []token.Token{ + {Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, + {Value: "passed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, + } + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens)) } - for i, tok := range pattern.Tokens { - if tok.Value != tokenList.Tokens[i].Value { - t.Errorf("Pattern token %d value mismatch: expected '%s', got '%s'", - i, tokenList.Tokens[i].Value, tok.Value) + // Add 3 token lists to pattern 2 (different whitespace) + for i := 0; i < 3; i++ { + tokens := []token.Token{ + {Value: "Test", Type: token.TokenWord, Wildcard: token.NotWildcard}, + {Value: " ", Type: token.TokenWhitespace}, // Different + {Value: "failed", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens)) } + + // Total size should be 5 (2 + 3) + assert.Equal(t, 5, cluster.Size(), "Expected cluster size 5 (2 + 3)") } -func TestCluster_GeneratePattern_Caching(t *testing.T) { - // Create cluster +func TestCluster_BackwardCompatibility(t *testing.T) { + // Test that old API methods still work (GetPatternString, GetWildcardPositions, etc.) + signature := token.Signature{ + Position: "Word|Whitespace|Word", + Length: 3, + Hash: 4444, + } + + cluster := NewCluster(signature, nil) + tokens1 := []token.Token{ - {Value: "GET", Type: token.TokenHttpMethod}, + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, {Value: " ", Type: token.TokenWhitespace}, - {Value: "/api", Type: token.TokenAbsolutePath}, + {Value: "started", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } tokens2 := []token.Token{ - {Value: "POST", Type: token.TokenHttpMethod}, + {Value: "Service", Type: token.TokenWord, Wildcard: token.NotWildcard}, {Value: " ", Type: token.TokenWhitespace}, - {Value: "/users", Type: token.TokenAbsolutePath}, + {Value: "stopped", Type: token.TokenWord, Wildcard: token.PotentialWildcard}, } - tokenList1 := token.NewTokenListWithTokens(tokens1) - tokenList2 := token.NewTokenListWithTokens(tokens2) - - cluster := NewCluster(token.NewSignature(tokenList1), tokenList1) - cluster.Add(tokenList2) - - // Generate pattern twice - pattern1 := cluster.GeneratePattern() - pattern2 := cluster.GeneratePattern() + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1)) + cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) - // Should return the same cached instance - if pattern1 != pattern2 { - t.Error("Pattern should be cached and return same instance") - } + patternString := cluster.GetPatternString() + assert.NotEmpty(t, patternString, "GetPatternString should return a valid pattern string") - // Add another TokenList - should invalidate cache - tokens3 := []token.Token{ - {Value: "PUT", Type: token.TokenHttpMethod}, - {Value: " ", Type: token.TokenWhitespace}, - {Value: "/items", Type: token.TokenAbsolutePath}, - } - tokenList3 := token.NewTokenListWithTokens(tokens3) - cluster.Add(tokenList3) + hasWildcards := cluster.HasWildcards() + assert.True(t, hasWildcards, "Should have wildcards") - pattern3 := cluster.GeneratePattern() + wildcardPositions := cluster.GetWildcardPositions() + assert.NotEmpty(t, wildcardPositions, "Should have wildcard positions") - // Should be a new instance (cache was invalidated) - if pattern1 == pattern3 { - t.Error("Pattern cache should be invalidated after adding new TokenList") - } + t.Logf("āœ… Backward compatibility: pattern='%s', wildcards=%v", patternString, wildcardPositions) } diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go index 20c5b9cf42f7..fa208b0ab708 100644 --- a/pkg/logs/patterns/clustering/merging/merging.go +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -14,14 +14,10 @@ import ( // shouldProtectPosition determines if a the token is the first word token and should be wildcarded. func shouldProtectPosition(position int, tokenType token.TokenType) bool { - if position == 0 && tokenType == token.TokenWord { - return true - } - - return false + return position == 0 && tokenType == token.TokenWord } -// CanMergeTokenLists checks if two TokenLists can be merged into a unified pattern. +// CanMergeTokenLists checks if incoming log (tl2) can merge with existing pattern's sample (tl1). // Returns true only if all token positions are either identical or mergeable according // to their comparison results and protection rules. func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { @@ -45,7 +41,7 @@ func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { continue } - // For wildcard result, check protection rules + // For wildcard result, check first word protection rule if result == token.Wildcard && shouldProtectPosition(i, tok1.Type) { return false } @@ -89,54 +85,3 @@ func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { return merged } - -// FindMergeableGroups analyzes a list of TokenLists and groups them by mergeability. -// This is used to detect heterogeneous clusters that should be split into multiple patterns. -// Returns a list of groups where each group contains mutually mergeable TokenLists. -func FindMergeableGroups(tokenLists []*token.TokenList) [][]*token.TokenList { - if len(tokenLists) == 0 { - return nil - } - - if len(tokenLists) == 1 { - return [][]*token.TokenList{tokenLists} - } - - var groups [][]*token.TokenList - processed := make(map[int]bool) - - for i := 0; i < len(tokenLists); i++ { - if processed[i] { - continue - } - - // Start a new group with this TokenList - group := []*token.TokenList{tokenLists[i]} - processed[i] = true - - // Find all TokenLists that can merge with this one - for j := i + 1; j < len(tokenLists); j++ { - if processed[j] { - continue - } - - // Check if this TokenList can merge with all members of the current group - canMergeWithGroup := true - for _, groupMember := range group { - if !CanMergeTokenLists(tokenLists[j], groupMember) { - canMergeWithGroup = false - break - } - } - - if canMergeWithGroup { - group = append(group, tokenLists[j]) - processed[j] = true - } - } - - groups = append(groups, group) - } - - return groups -} diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go index ef436455318e..2d1f5230120b 100644 --- a/pkg/logs/patterns/clustering/merging/merging_test.go +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -175,78 +175,6 @@ func TestMergeTokenLists_UnmergeableReturnsNil(t *testing.T) { assert.Nil(t, merged, "Unmergeable TokenLists should return nil") } -func TestFindMergeableGroups_SingleGroup(t *testing.T) { - tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), - }) - - tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), - }) - - tl3 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "guest789", token.PotentialWildcard), - }) - - groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3}) - assert.Equal(t, 1, len(groups), "All mergeable TokenLists should be in one group") - assert.Equal(t, 3, len(groups[0]), "Group should contain all three TokenLists") -} - -func TestFindMergeableGroups_MultipleGroups(t *testing.T) { - // Group 1: mergeable user logs - tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), - }) - - tl2 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), - }) - - // Group 2: unmergeable generic words - tl3 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "cat", token.NotWildcard), - }) - - tl4 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "logged", token.NotWildcard), - token.NewToken(token.TokenWord, "dog", token.NotWildcard), - }) - - groups := FindMergeableGroups([]*token.TokenList{tl1, tl2, tl3, tl4}) - assert.Equal(t, 3, len(groups), "Should have 3 groups: user group + 2 separate generic word entries") - - // Find the largest group (should be the user group with 2 members) - maxSize := 0 - for _, group := range groups { - if len(group) > maxSize { - maxSize = len(group) - } - } - assert.Equal(t, 2, maxSize, "Largest group should have 2 TokenLists") -} - -func TestFindMergeableGroups_EmptyInput(t *testing.T) { - groups := FindMergeableGroups([]*token.TokenList{}) - assert.Nil(t, groups) -} - -func TestFindMergeableGroups_SingleTokenList(t *testing.T) { - tl1 := token.NewTokenListWithTokens([]token.Token{ - token.NewToken(token.TokenWord, "hello", token.NotWildcard), - }) - - groups := FindMergeableGroups([]*token.TokenList{tl1}) - assert.Equal(t, 1, len(groups)) - assert.Equal(t, 1, len(groups[0])) -} - func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { // Try to merge when first token is a word but differs tl1 := token.NewTokenListWithTokens([]token.Token{ diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go new file mode 100644 index 000000000000..908e8cb10942 --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern.go @@ -0,0 +1,160 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import ( + "strings" + "time" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Pattern represents a single pattern within a cluster. +// A cluster with the same signature may contain multiple incompatible patterns +// (e.g., different non-identical special characters that cannot merge). +type Pattern struct { + Template *token.TokenList // The pattern template with wildcards (matches proto "template") + Positions []int // Token indices that are wildcards (matches proto "pos_list") + PatternID uint64 // Unique pattern ID (matches proto "pattern_id") + Sample *token.TokenList // First log sample (for multi-pattern matching) + LogCount int // Total number of logs that matched this pattern + + // Timestamp tracking for stateful encoding + CreatedAt time.Time // When pattern was first created + UpdatedAt time.Time // When pattern was last modified + LastSentAt time.Time // When we last sent this pattern to gRPC +} + +// newPattern creates a new pattern from a single token list. +func newPattern(tokenList *token.TokenList, patternID uint64) *Pattern { + now := time.Now() + return &Pattern{ + Template: tokenList, // First log becomes initial template + Positions: []int{}, // No wildcards yet + PatternID: patternID, + Sample: tokenList, // Store first log as sample + LogCount: 1, // First log + CreatedAt: now, + UpdatedAt: now, + LastSentAt: time.Time{}, // Zero time - never sent + } +} + +// size returns the number of logs in this pattern. +func (p *Pattern) size() int { + return p.LogCount +} + +// getPatternString returns a string representation of the pattern. +func (p *Pattern) getPatternString() string { + if p.Template == nil { + return "" + } + + var parts []string + for _, tok := range p.Template.Tokens { + // Use "*" for wildcard positions, actual value otherwise + if tok.Wildcard == token.IsWildcard { + parts = append(parts, "*") + } else { + // Only use printable ASCII/UTF-8 characters in the template + cleaned := sanitizeForTemplate(tok.Value) + if cleaned != "" { + parts = append(parts, cleaned) + } + } + } + return strings.Join(parts, "") +} + +// hasWildcards returns true if this pattern contains wildcard positions. +func (p *Pattern) hasWildcards() bool { + return len(p.Positions) > 0 +} + +// getWildcardPositions returns wildcard token positions (indices in token array). +func (p *Pattern) getWildcardPositions() []int { + return p.Positions +} + +// getWildcardCharPositions returns character indices where wildcards appear in the pattern string. +func (p *Pattern) getWildcardCharPositions() []int { + if p.Template == nil { + return nil + } + + var charPositions []int + currentPos := 0 + + for _, tok := range p.Template.Tokens { + // Clean the token value for proper length calculation + cleaned := sanitizeForTemplate(tok.Value) + + if tok.Wildcard == token.IsWildcard { + // Record the current character position for this wildcard + charPositions = append(charPositions, currentPos) + // Wildcard is represented as "*" (1 character) + currentPos += 1 + } else if cleaned != "" { + // Add the length of the cleaned token value + currentPos += len(cleaned) + } + } + + return charPositions +} + +// getWildcardValues extracts wildcard values from the sample log. +// Note: In practice, wildcard values are extracted from incoming logs, not stored ones. +func (p *Pattern) getWildcardValues() []string { + if p.Template == nil || p.Sample == nil { + return nil + } + + // Extract values from sample at wildcard positions + return p.extractWildcardValues(p.Sample) +} + +// extractWildcardValues extracts the wildcard values from a specific TokenList. +func (p *Pattern) extractWildcardValues(tokenList *token.TokenList) []string { + if p.Template == nil || len(p.Positions) == 0 { + return []string{} + } + + wildcardValues := make([]string, 0, len(p.Positions)) + for _, pos := range p.Positions { + if pos < tokenList.Length() { + wildcardValues = append(wildcardValues, tokenList.Tokens[pos].Value) + } + } + + return wildcardValues +} + +// markAsSent updates the LastSentAt timestamp to indicate this pattern was sent to gRPC. +func (p *Pattern) markAsSent() { + p.LastSentAt = time.Now() +} + +// needsSending returns true if this pattern has never been sent or has been updated since last sent. +func (p *Pattern) needsSending() bool { + return p.LastSentAt.IsZero() || p.UpdatedAt.After(p.LastSentAt) +} + +// sanitizeForTemplate removes non-printable characters from template strings +func sanitizeForTemplate(s string) string { + runes := []rune(s) + result := make([]rune, 0, len(runes)) + for _, r := range runes { + // Keep only printable characters (space and above, excluding DEL) + if r >= ' ' && r != 0x7F && r < 0xFFFD { + result = append(result, r) + } + } + return string(result) +} diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go new file mode 100644 index 000000000000..8c3e92d49f2d --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern_test.go @@ -0,0 +1,481 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +func TestNewPattern(t *testing.T) { + // Create a simple token list + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + patternID := uint64(12345) + pattern := newPattern(tl, patternID) + + assert.NotNil(t, pattern) + assert.Equal(t, patternID, pattern.PatternID) + assert.Equal(t, tl, pattern.Template, "Template should be the initial token list") + assert.Equal(t, tl, pattern.Sample, "Sample should be the initial token list") + assert.Equal(t, 1, pattern.LogCount, "LogCount should be 1 for first log") + assert.Equal(t, 0, len(pattern.Positions), "No wildcards initially") + assert.False(t, pattern.CreatedAt.IsZero(), "CreatedAt should be set") + assert.False(t, pattern.UpdatedAt.IsZero(), "UpdatedAt should be set") + assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") +} + +func TestAddTokenList(t *testing.T) { + // Note: addTokenList() was inlined into Cluster.AddTokenListToPatterns() + // This test now verifies that LogCount and UpdatedAt can be modified directly + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + initialLogCount := pattern.LogCount + initialUpdatedAt := pattern.UpdatedAt + + // Simulate what cluster does when adding to existing pattern + time.Sleep(1 * time.Millisecond) // Ensure time difference + pattern.LogCount++ + pattern.UpdatedAt = time.Now() + + assert.Equal(t, initialLogCount+1, pattern.LogCount, "LogCount should increment") + assert.True(t, pattern.UpdatedAt.After(initialUpdatedAt), "UpdatedAt should be updated") +} + +func TestSize(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + assert.Equal(t, 1, pattern.size()) + + // Simulate adding more logs (what cluster does) + pattern.LogCount++ + assert.Equal(t, 2, pattern.size()) + + pattern.LogCount++ + assert.Equal(t, 3, pattern.size()) +} + +func TestGetPatternString_NoWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + result := pattern.getPatternString() + + assert.Equal(t, "Service started", result) +} + +func TestGetPatternString_WithWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2} + result := pattern.getPatternString() + + assert.Equal(t, "Service *", result) +} + +func TestGetPatternString_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + } + result := pattern.getPatternString() + + assert.Equal(t, "", result) +} + +func TestHasWildcards(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + // No wildcards initially + assert.False(t, pattern.hasWildcards()) + + // Add wildcard positions + pattern.Positions = []int{1, 3} + assert.True(t, pattern.hasWildcards()) +} + +func TestGetWildcardPositions(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{1, 3, 5} + + positions := pattern.getWildcardPositions() + assert.Equal(t, []int{1, 3, 5}, positions) +} + +// getParamCount returns the number of parameters/wildcards in a pattern. +func getParamCount(p *Pattern) int { + return len(p.Positions) +} + +func TestGetParamCount(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + // No wildcards + assert.Equal(t, 0, getParamCount(pattern)) + + // Add wildcard positions + pattern.Positions = []int{1, 3, 5} + assert.Equal(t, 3, getParamCount(pattern)) +} + +func TestGetWildcardCharPositions(t *testing.T) { + // Create pattern: "Service *" + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2} + + charPositions := pattern.getWildcardCharPositions() + // "Service" (7 chars) + " " (1 char) = 8, wildcard at position 8 + assert.Equal(t, []int{8}, charPositions) +} + +func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) { + // Create pattern: "Error * in *" + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Error", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "code", token.IsWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "module", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{2, 6} + + charPositions := pattern.getWildcardCharPositions() + // "Error " = 6 chars, wildcard at position 6 + // "Error * in " = 6 + 1 (wildcard) + 4 (" in ") = 11, wildcard at position 11 + assert.Equal(t, []int{6, 11}, charPositions) +} + +func TestGetWildcardCharPositions_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + } + + charPositions := pattern.getWildcardCharPositions() + assert.Nil(t, charPositions) +} + +func TestGetWildcardValues(t *testing.T) { + // Create sample log: "Service started" + sample := token.NewTokenList() + sample.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + sample.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + sample.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + // Create template with wildcard: "Service *" + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(sample, 12345) + pattern.Template = tl + pattern.Positions = []int{2} + + values := pattern.getWildcardValues() + assert.Equal(t, []string{"started"}, values) +} + +func TestGetWildcardValues_NilTemplate(t *testing.T) { + sample := token.NewTokenList() + sample.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(sample, 12345) + pattern.Template = nil + + values := pattern.getWildcardValues() + assert.Nil(t, values) +} + +func TestGetWildcardValues_NilSample(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Sample = nil + pattern.Positions = []int{0} + + values := pattern.getWildcardValues() + assert.Nil(t, values) +} + +func TestExtractWildcardValues(t *testing.T) { + // Create template: "Service *" + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Template = template + pattern.Positions = []int{2} + + // Create incoming log: "Service crashed" + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "crashed", token.PotentialWildcard)) + + values := pattern.extractWildcardValues(incoming) + assert.Equal(t, []string{"crashed"}, values) +} + +func TestExtractWildcardValues_MultipleWildcards(t *testing.T) { + // Create template: "* in * at *" + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value3", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Template = template + pattern.Positions = []int{0, 4, 8} + + // Create incoming log: "Error in module at line" + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Error", token.PotentialWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "in", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "module", token.PotentialWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "at", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + incoming.Add(token.NewToken(token.TokenWord, "line", token.PotentialWildcard)) + + values := pattern.extractWildcardValues(incoming) + assert.Equal(t, []string{"Error", "module", "line"}, values) +} + +func TestExtractWildcardValues_NilTemplate(t *testing.T) { + pattern := &Pattern{ + Template: nil, + Positions: []int{0}, + } + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + values := pattern.extractWildcardValues(incoming) + assert.Equal(t, []string{}, values) +} + +func TestExtractWildcardValues_NoPositions(t *testing.T) { + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(template, 12345) + pattern.Positions = []int{} // No wildcards + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + values := pattern.extractWildcardValues(incoming) + assert.Equal(t, []string{}, values) +} + +func TestExtractWildcardValues_PositionOutOfBounds(t *testing.T) { + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Test", token.IsWildcard)) + + pattern := newPattern(template, 12345) + pattern.Positions = []int{0, 5} // Position 5 is out of bounds + + incoming := token.NewTokenList() + incoming.Add(token.NewToken(token.TokenWord, "Value", token.PotentialWildcard)) + + values := pattern.extractWildcardValues(incoming) + assert.Equal(t, []string{"Value"}, values, "Should only extract valid positions") +} + +func TestMarkAsSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") + + pattern.markAsSent() + assert.False(t, pattern.LastSentAt.IsZero(), "LastSentAt should be set after marking") +} + +func TestNeedsSending_NeverSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + assert.True(t, pattern.needsSending(), "Should need sending if never sent") +} + +func TestNeedsSending_AlreadySent_NotUpdated(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + time.Sleep(1 * time.Millisecond) + pattern.markAsSent() + + assert.False(t, pattern.needsSending(), "Should not need sending if sent and not updated") +} + +func TestNeedsSending_UpdatedAfterSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.markAsSent() + + // Update pattern + time.Sleep(1 * time.Millisecond) + pattern.UpdatedAt = time.Now() + + assert.True(t, pattern.needsSending(), "Should need sending if updated after last sent") +} + +func TestSanitizeForTemplate_PrintableChars(t *testing.T) { + input := "Hello World 123" + result := sanitizeForTemplate(input) + assert.Equal(t, "Hello World 123", result) +} + +func TestSanitizeForTemplate_NonPrintableChars(t *testing.T) { + // Include null byte, bell, backspace + input := "Hello\x00\x07\x08World" + result := sanitizeForTemplate(input) + assert.Equal(t, "HelloWorld", result, "Non-printable characters should be removed") +} + +func TestSanitizeForTemplate_DELCharacter(t *testing.T) { + input := "Hello\x7FWorld" + result := sanitizeForTemplate(input) + assert.Equal(t, "HelloWorld", result, "DEL character should be removed") +} + +func TestSanitizeForTemplate_SpecialChars(t *testing.T) { + input := "Service: Error! @user #tag" + result := sanitizeForTemplate(input) + assert.Equal(t, "Service: Error! @user #tag", result, "Special chars should be kept") +} + +func TestSanitizeForTemplate_EmptyString(t *testing.T) { + input := "" + result := sanitizeForTemplate(input) + assert.Equal(t, "", result) +} + +func TestSanitizeForTemplate_UnicodeChars(t *testing.T) { + input := "Hello äø–ē•Œ šŸŒ" + result := sanitizeForTemplate(input) + // Emoji (šŸŒ) is above 0xFFFD and gets filtered out by sanitizeForTemplate + // CJK characters (äø–ē•Œ) are within the acceptable range + assert.Equal(t, "Hello äø–ē•Œ ", result, "CJK chars preserved, emoji filtered") +} + +func TestPattern_IntegrationScenario(t *testing.T) { + // Simulate a realistic pattern lifecycle + + // 1. First log arrives + log1 := token.NewTokenList() + log1.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "Database", token.PotentialWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "connection", token.PotentialWildcard)) + log1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log1.Add(token.NewToken(token.TokenWord, "failed", token.PotentialWildcard)) + + pattern := newPattern(log1, 9999) + + assert.Equal(t, 1, pattern.LogCount) + assert.False(t, pattern.hasWildcards()) + assert.Equal(t, "ERROR: Database connection failed", pattern.getPatternString()) + + // 2. Pattern updated with wildcards (simulated) + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern.Template = template + pattern.Positions = []int{3, 5, 7} + pattern.LogCount++ // Simulate second log being added + pattern.UpdatedAt = time.Now() + + assert.Equal(t, 2, pattern.LogCount) + assert.True(t, pattern.hasWildcards()) + assert.Equal(t, 3, getParamCount(pattern)) + assert.Equal(t, "ERROR: * * *", pattern.getPatternString()) + + // 3. Extract wildcard values from new log + log2 := token.NewTokenList() + log2.Add(token.NewToken(token.TokenWord, "ERROR", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, ":", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "Network", token.PotentialWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "timeout", token.PotentialWildcard)) + log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + log2.Add(token.NewToken(token.TokenWord, "reached", token.PotentialWildcard)) + + values := pattern.extractWildcardValues(log2) + assert.Equal(t, []string{"Network", "timeout", "reached"}, values) + + // 4. Check sending status + assert.True(t, pattern.needsSending()) + pattern.markAsSent() + assert.False(t, pattern.needsSending()) + + // 5. Update and check needs sending again + time.Sleep(1 * time.Millisecond) + pattern.LogCount++ + pattern.UpdatedAt = time.Now() + assert.True(t, pattern.needsSending()) +} diff --git a/pkg/logs/patterns/token/signature.go b/pkg/logs/patterns/token/signature.go index d513de9f6d84..7f8411d39b56 100644 --- a/pkg/logs/patterns/token/signature.go +++ b/pkg/logs/patterns/token/signature.go @@ -30,8 +30,16 @@ func NewSignature(tl *TokenList) Signature { } position := positionSignature(tl) - hash := computeHash(position) + // Include first word token value in signature if it exists + // This prevents messages with different first words but similar signature from being in the same cluster + // eg: I love burger vs You love burger + if len(tl.Tokens) > 0 && tl.Tokens[0].Type == TokenWord { + firstWordValue := tl.Tokens[0].Value + position = firstWordValue + position + } + + hash := computeHash(position) return Signature{ Position: position, Length: len(tl.Tokens), diff --git a/pkg/logs/patterns/token/signature_test.go b/pkg/logs/patterns/token/signature_test.go index bb4bbd4c0234..885062fa90c4 100644 --- a/pkg/logs/patterns/token/signature_test.go +++ b/pkg/logs/patterns/token/signature_test.go @@ -43,40 +43,52 @@ func TestNewSignature(t *testing.T) { } func TestSignature_Equals(t *testing.T) { + // Test 1: Same structure, SAME first word, different other values → EQUAL signatures tokens1 := []Token{ {Type: TokenWord, Value: "hello"}, {Type: TokenWhitespace, Value: " "}, {Type: TokenWord, Value: "world"}, } tokens2 := []Token{ - {Type: TokenWord, Value: "goodbye"}, + {Type: TokenWord, Value: "hello"}, // Same first word! {Type: TokenWhitespace, Value: " "}, - {Type: TokenWord, Value: "world"}, - } - tokens3 := []Token{ - {Type: TokenWord, Value: "hello"}, - {Type: TokenNumeric, Value: "123"}, // Different type + {Type: TokenWord, Value: "universe"}, // Different second word } - tl1 := NewTokenListWithTokens(tokens1) tl2 := NewTokenListWithTokens(tokens2) - tl3 := NewTokenListWithTokens(tokens3) - sig1 := NewSignature(tl1) sig2 := NewSignature(tl2) - sig3 := NewSignature(tl3) - // Same structure, different values - should be equal if !sig1.Equals(sig2) { - t.Error("TokenLists with same structure should have equal signatures") + t.Error("TokenLists with same first word and structure should have equal signatures") + } + + // Test 2: Same structure, DIFFERENT first word → DIFFERENT signatures + tokens3 := []Token{ + {Type: TokenWord, Value: "goodbye"}, // Different first word + {Type: TokenWhitespace, Value: " "}, + {Type: TokenWord, Value: "world"}, } + tl3 := NewTokenListWithTokens(tokens3) + sig3 := NewSignature(tl3) - // Different structure - should not be equal if sig1.Equals(sig3) { + t.Error("TokenLists with different first word should NOT have equal signatures") + } + + // Test 3: Different structure (different types) → DIFFERENT signatures + tokens4 := []Token{ + {Type: TokenWord, Value: "hello"}, + {Type: TokenNumeric, Value: "123"}, // Different type + } + tl4 := NewTokenListWithTokens(tokens4) + sig4 := NewSignature(tl4) + + if sig1.Equals(sig4) { t.Error("TokenLists with different structure should not have equal signatures") } - // Test signature equality with itself + // Test 4: Signature equality with itself if !sig1.Equals(sig1) { t.Error("Signature should equal itself") } diff --git a/pkg/logs/patterns/token/token.go b/pkg/logs/patterns/token/token.go index 2f40c36ab8d4..89b0dcdea904 100644 --- a/pkg/logs/patterns/token/token.go +++ b/pkg/logs/patterns/token/token.go @@ -41,7 +41,7 @@ const ( TokenDate // TokenDate is the date token type ) -// WildcardStatus describes a token's relationship to wildcards +// WildcardStatus describes a token's potential to become a wildcard type WildcardStatus int const ( @@ -95,14 +95,14 @@ func (t *Token) String() string { return fmt.Sprintf("%s(%s)", t.Type, t.Value) } -// Compare checks if two tokens can merge and returns the result +// Compare checks if two tokens can merge func (t *Token) Compare(t2 *Token) MergeResult { // Different types cannot merge if t.Type != t2.Type { return Conflict } - // Identical value + // Same type same value - check this first before type-specific logic if t.Value == t2.Value { return Identical } @@ -112,7 +112,6 @@ func (t *Token) Compare(t2 *Token) MergeResult { return Identical } - // Different values - check if they can merge into wildcard // Whitespace never wildcards (structural) if t.Type == TokenWhitespace { return Conflict From eb26d04994a6f29c77ef21d1ce83a1ff1bb0d38e Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Fri, 14 Nov 2025 15:06:59 -0500 Subject: [PATCH 13/16] index on AGNTLOG-334-full-intake-integration: 6dbedbbefd Merge joy.zhang/stateful-encoding-grpc-v2 into AGNTLOG-334-full-intake-integration --- comp/logs/agent/config/config.go | 2 - pkg/logs/message/message.go | 6 + pkg/logs/patterns/clustering/cluster.go | 160 +-- .../patterns/clustering/cluster_manager.go | 42 - .../clustering/cluster_manager_test.go | 59 +- pkg/logs/patterns/clustering/cluster_test.go | 120 +- pkg/logs/patterns/clustering/pattern.go | 65 +- .../clustering/pattern_state_tracker.go | 57 + .../clustering/pattern_state_tracker_test.go | 380 ++++++ pkg/logs/patterns/clustering/pattern_test.go | 123 +- pkg/logs/processor/processor.go | 34 + pkg/logs/sender/dumb_strategy.go | 278 ----- pkg/logs/sender/grpc/grpc_sender.go | 287 ----- pkg/logs/sender/grpc/grpc_sender_test.go | 642 ----------- pkg/logs/sender/grpc/stateful_encoding.pb.go | 1014 ----------------- .../sender/grpc/stateful_encoding_grpc.pb.go | 115 -- 16 files changed, 728 insertions(+), 2656 deletions(-) create mode 100644 pkg/logs/patterns/clustering/pattern_state_tracker.go create mode 100644 pkg/logs/patterns/clustering/pattern_state_tracker_test.go delete mode 100644 pkg/logs/sender/dumb_strategy.go delete mode 100644 pkg/logs/sender/grpc/grpc_sender.go delete mode 100644 pkg/logs/sender/grpc/grpc_sender_test.go delete mode 100644 pkg/logs/sender/grpc/stateful_encoding.pb.go delete mode 100644 pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go diff --git a/comp/logs/agent/config/config.go b/comp/logs/agent/config/config.go index 9986b3b453df..69e932c3f49b 100644 --- a/comp/logs/agent/config/config.go +++ b/comp/logs/agent/config/config.go @@ -119,8 +119,6 @@ func BuildEndpointsWithConfig(coreConfig pkgconfigmodel.Reader, logsConfig *Logs "please use '%s' and '%s' instead", logsConfig.getConfigKey("logs_dd_url"), logsConfig.getConfigKey("logs_no_ssl")) } - mrfEnabled := coreConfig.GetBool("multi_region_failover.enabled") - // logs_config.logs_dd_url might specify a HTTP(S) proxy. Never fall back to TCP in this case. haveHTTPProxy := false if logsDDURL, defined := logsConfig.logsDDURL(); defined { diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index 5a4757702cf6..e56b8b4afbcf 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -11,6 +11,7 @@ import ( "fmt" "time" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/logs/sources" "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -71,6 +72,11 @@ func (m *Payload) Size() int64 { type Message struct { MessageContent MessageMetadata + + // Pattern extraction + Pattern *clustering.Pattern // The pattern this log matched + WildcardValues []string // This log's specific wildcard values + PatternTemplateState clustering.PatternTemplateStatus // Whether a new/updated pattern template needs sending or if the pattern template stay unchanged (New/Update/None) } // StatefulMessage represents a log message for gRPC stateful streaming diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index f037daec8a9d..a870a11fa52e 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -93,14 +93,13 @@ func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) { // Incremental merge: merge new log with existing template merged := merging.MergeTokenLists(p.Template, newTokenList) if merged == nil { - // Merge failed (shouldn't happen since CanMergeTokenLists passed), keep current template return } p.Template = merged p.Positions = make([]int, 0, merged.Length()) - // Build wildcard positions list + // Build wildcard positions list when 2 tokenlists are mergable. for i := 0; i < merged.Length(); i++ { tok := merged.Tokens[i] if tok.Wildcard == token.IsWildcard { @@ -117,163 +116,6 @@ func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) { p.UpdatedAt = time.Now() } -// ============================================================================= -// Pattern Access Methods -// ============================================================================= - -// FindMatchingPattern finds the Pattern that matches the given TokenList. -// Returns the matching Pattern, or nil if no match found. -func (c *Cluster) FindMatchingPattern(tokenList *token.TokenList) *Pattern { - // Ensure patterns are generated - if len(c.Patterns) == 0 { - return nil - } - - // Try to find a Pattern where the TokenList can merge - for _, p := range c.Patterns { - // Check if this TokenList can merge with the pattern's sample - if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { - return p - } - } - - // Fallback: return most common pattern (largest group) - return c.GetMostCommonPattern() -} - -// GetPatternString returns a string representation of the most common pattern. -// For backward compatibility. -func (c *Cluster) GetPatternString() string { - primary := c.GetMostCommonPattern() - if primary == nil { - return "" - } - return primary.getPatternString() -} - -// GetMostCommonPattern returns the pattern with the highest log count in this cluster. -// When a cluster contains multiple patterns (due to structural differences like special characters), -// this returns the most frequently occurring pattern, which is typically the most representative. -func (c *Cluster) GetMostCommonPattern() *Pattern { - if len(c.Patterns) == 0 { - return nil - } - - mostCommonIdx := 0 - maxLogCount := c.Patterns[0].LogCount - for idx, p := range c.Patterns { - if p.LogCount > maxLogCount { - maxLogCount = p.LogCount - mostCommonIdx = idx - } - } - return c.Patterns[mostCommonIdx] -} - -// GetAllPatterns returns all Patterns in this cluster. -func (c *Cluster) GetAllPatterns() []*Pattern { - return c.Patterns -} - -// GetPatternID returns the pattern ID for the most common pattern. -// For backward compatibility. -func (c *Cluster) GetPatternID() uint64 { - primary := c.GetMostCommonPattern() - if primary == nil { - return 0 - } - return primary.PatternID -} - -// ============================================================================= -// Wildcard Methods -// ============================================================================= - -// HasWildcards returns true if any pattern in this cluster contains wildcard positions. -func (c *Cluster) HasWildcards() bool { - for _, p := range c.Patterns { - if p.hasWildcards() { - return true - } - } - return false -} - -// GetWildcardPositions returns wildcard token positions for the most common pattern. -// For backward compatibility. -func (c *Cluster) GetWildcardPositions() []int { - primary := c.GetMostCommonPattern() - if primary == nil { - return nil - } - return primary.getWildcardPositions() -} - -// GetWildcardCharPositions returns character positions where wildcards appear in the most common pattern string. -// For backward compatibility. -func (c *Cluster) GetWildcardCharPositions() []int { - primary := c.GetMostCommonPattern() - if primary == nil { - return nil - } - return primary.getWildcardCharPositions() -} - -// GetWildcardValues extracts the actual values from the most recent token list in the most common pattern. -// For backward compatibility. -func (c *Cluster) GetWildcardValues() []string { - primary := c.GetMostCommonPattern() - if primary == nil { - return nil - } - return primary.getWildcardValues() -} - -// ExtractWildcardValues extracts the wildcard values from a specific TokenList. -// Uses the matching Pattern to determine wildcard positions. -func (c *Cluster) ExtractWildcardValues(tokenList *token.TokenList) []string { - // Find the matching pattern for this TokenList - p := c.FindMatchingPattern(tokenList) - if p == nil { - return []string{} - } - return p.extractWildcardValues(tokenList) -} - -// ============================================================================= -// State Management & Metadata -// ============================================================================= - -// Size returns the total number of TokenLists across all patterns in this cluster. -func (c *Cluster) Size() int { - total := 0 - for _, p := range c.Patterns { - total += p.size() - } - return total -} - -// MarkAsSent updates the LastSentAt timestamp for all patterns. -func (c *Cluster) MarkAsSent() { - for _, p := range c.Patterns { - p.markAsSent() - } -} - -// NeedsSending returns true if any pattern has never been sent or has been updated since last sent. -func (c *Cluster) NeedsSending() bool { - for _, p := range c.Patterns { - if p.needsSending() { - return true - } - } - return false -} - -// ============================================================================= -// Helper Functions -// ============================================================================= - // getPathPattern converts a path to hierarchical wildcard pattern func getPathPattern(path string) string { if path == "/" { diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go index 30ea4f2d1893..864047159d18 100644 --- a/pkg/logs/patterns/clustering/cluster_manager.go +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -112,48 +112,6 @@ func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChan return pattern, PatternNew } -// GetCluster retrieves the cluster with the given signature. -func (cm *ClusterManager) GetCluster(signature token.Signature) *Cluster { - hash := signature.Hash - - cm.mu.RLock() - defer cm.mu.RUnlock() - - clusters, exists := cm.hashBuckets[hash] - if !exists { - return nil - } - - for _, cluster := range clusters { - if cluster.Signature.Equals(signature) { - return cluster - } - } - - return nil -} - -// GetAllPatterns returns all patterns across all clusters. -// This is useful for re-sending pattern state after stream hard rotation or shutdown. -// Patterns are returned in no particular order since we are resending all patterns. -// Quite expensive for now, might need to be optimized later. -func (cm *ClusterManager) GetAllPatterns() []*Pattern { - var allPatterns []*Pattern - - cm.mu.RLock() - defer cm.mu.RUnlock() - - // Iterate through all clusters in all hash buckets - for _, clusters := range cm.hashBuckets { - for _, cluster := range clusters { - // Collect all patterns from this cluster - allPatterns = append(allPatterns, cluster.Patterns...) - } - } - - return allPatterns -} - // Clear removes all clusters. func (cm *ClusterManager) Clear() { cm.mu.Lock() diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go index 60016e92f7d9..76be758f7276 100644 --- a/pkg/logs/patterns/clustering/cluster_manager_test.go +++ b/pkg/logs/patterns/clustering/cluster_manager_test.go @@ -13,6 +13,47 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) +// Test-only helper functions + +// getCluster retrieves the cluster with the given signature. +func getCluster(cm *ClusterManager, signature token.Signature) *Cluster { + hash := signature.Hash + + cm.mu.RLock() + defer cm.mu.RUnlock() + + clusters, exists := cm.hashBuckets[hash] + if !exists { + return nil + } + + for _, cluster := range clusters { + if cluster.Signature.Equals(signature) { + return cluster + } + } + + return nil +} + +// getAllPatterns returns all patterns across all clusters. +func getAllPatterns(cm *ClusterManager) []*Pattern { + var allPatterns []*Pattern + + cm.mu.RLock() + defer cm.mu.RUnlock() + + // Iterate through all clusters in all hash buckets + for _, clusters := range cm.hashBuckets { + for _, cluster := range clusters { + // Collect all patterns from this cluster + allPatterns = append(allPatterns, cluster.Patterns...) + } + } + + return allPatterns +} + func TestClusterManager_NewClusterManager(t *testing.T) { cm := NewClusterManager() @@ -107,7 +148,7 @@ func TestClusterManager_GetCluster(t *testing.T) { addedPattern, _ := cm.Add(tokenList) // Retrieve cluster by signature - retrievedCluster := cm.GetCluster(signature) + retrievedCluster := getCluster(cm, signature) assert.NotNil(t, retrievedCluster, "Should retrieve cluster by signature") assert.Equal(t, 1, len(retrievedCluster.Patterns), "Cluster should have 1 pattern") @@ -122,7 +163,7 @@ func TestClusterManager_GetCluster(t *testing.T) { differentTokenList := token.NewTokenListWithTokens(differentTokens) differentSignature := token.NewSignature(differentTokenList) - nonExistentCluster := cm.GetCluster(differentSignature) + nonExistentCluster := getCluster(cm, differentSignature) assert.Nil(t, nonExistentCluster, "Should return nil for non-existent cluster") } @@ -141,20 +182,20 @@ func TestClusterManager_Clear(t *testing.T) { cm.Add(tokenList) // Verify data exists - assert.NotNil(t, cm.GetCluster(signature), "Should have cluster before clear") + assert.NotNil(t, getCluster(cm, signature), "Should have cluster before clear") // Clear cm.Clear() // Verify data is gone - assert.Nil(t, cm.GetCluster(signature), "Should have no cluster after clear") + assert.Nil(t, getCluster(cm, signature), "Should have no cluster after clear") } func TestClusterManager_GetAllPatterns(t *testing.T) { cm := NewClusterManager() // Initially empty - patterns := cm.GetAllPatterns() + patterns := getAllPatterns(cm) assert.Equal(t, 0, len(patterns), "Should have no patterns initially") // Add pattern 1 (signature 1) @@ -182,7 +223,7 @@ func TestClusterManager_GetAllPatterns(t *testing.T) { pattern3, _ := cm.Add(token.NewTokenListWithTokens(tokens3)) // Get all patterns - allPatterns := cm.GetAllPatterns() + allPatterns := getAllPatterns(cm) // Should have 2 patterns: pattern1 (merged with pattern2) and pattern3 assert.Equal(t, 2, len(allPatterns), "Should have 2 patterns total") @@ -237,14 +278,14 @@ func TestClusterManager_PatternChangeType(t *testing.T) { assert.Equal(t, PatternUpdated, changeType2, "Expected PatternUpdated for second add (creates wildcards)") assert.Equal(t, pattern1.PatternID, pattern2.PatternID, "Should return same pattern for same signature") t.Logf("āœ… Add #2: PatternUpdated (wildcards created, logCount=%d)", pattern2.LogCount) - t.Logf(" Pattern after 2 logs: '%s'", pattern2.getPatternString()) + t.Logf(" Pattern after 2 logs: '%s'", pattern2.GetPatternString()) // Third add - pattern exists and will gain more wildcards pattern3, changeType3 := cm.Add(tokenList3) assert.Equal(t, PatternUpdated, changeType3, "Expected PatternUpdated for third add") assert.Equal(t, pattern1.PatternID, pattern3.PatternID, "Should return same pattern for same signature") t.Logf("āœ… Add #3: PatternUpdated (pattern updated, logCount=%d)", pattern3.LogCount) - t.Logf(" Pattern after 3 logs: '%s'", pattern3.getPatternString()) + t.Logf(" Pattern after 3 logs: '%s'", pattern3.GetPatternString()) // Fourth add - pattern exists, so updated again pattern4, changeType4 := cm.Add(tokenList4) @@ -252,7 +293,7 @@ func TestClusterManager_PatternChangeType(t *testing.T) { t.Logf("āœ… Add #4: PatternUpdated (pattern will change, logCount=%d)", pattern4.LogCount) // Final pattern (eagerly generated by Add) - t.Logf(" Final pattern after 4 logs: '%s'", pattern4.getPatternString()) + t.Logf(" Final pattern after 4 logs: '%s'", pattern4.GetPatternString()) // Verify all returned the same pattern assert.Equal(t, 4, pattern4.LogCount, "Expected pattern log count 4") diff --git a/pkg/logs/patterns/clustering/cluster_test.go b/pkg/logs/patterns/clustering/cluster_test.go index 79323f7fab6e..350ec8be5a0d 100644 --- a/pkg/logs/patterns/clustering/cluster_test.go +++ b/pkg/logs/patterns/clustering/cluster_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) @@ -25,7 +26,7 @@ func TestCluster_NewCluster(t *testing.T) { cluster := NewCluster(signature, tokenList) - assert.Equal(t, 0, cluster.Size(), "Expected cluster size 0 initially") + assert.Equal(t, 0, clusterSize(cluster), "Expected cluster size 0 initially") assert.True(t, cluster.Signature.Equals(signature), "Cluster signature doesn't match expected signature") assert.Empty(t, cluster.Patterns, "Patterns should be empty initially (computed lazily)") } @@ -43,7 +44,7 @@ func TestCluster_AddTokenListToPatterns(t *testing.T) { cluster := NewCluster(signature1, tokenList1) cluster.AddTokenListToPatterns(tokenList1) - assert.Equal(t, 1, cluster.Size(), "Expected initial cluster size 1") + assert.Equal(t, 1, clusterSize(cluster), "Expected initial cluster size 1") // Create second TokenList with same signature but different values tokens2 := []token.Token{ @@ -56,7 +57,7 @@ func TestCluster_AddTokenListToPatterns(t *testing.T) { // Add tokenList with matching signature cluster.AddTokenListToPatterns(tokenList2) - assert.Equal(t, 2, cluster.Size(), "Expected cluster size 2 after adding") + assert.Equal(t, 2, clusterSize(cluster), "Expected cluster size 2 after adding") assert.NotEmpty(t, cluster.Patterns, "Expected patterns to exist after adding TokenLists") } @@ -76,12 +77,12 @@ func TestCluster_SinglePattern_SingleLog(t *testing.T) { // Should have exactly one pattern (which is also the primary) assert.Equal(t, 1, len(cluster.Patterns), "Should have exactly one pattern") - mostCommon := cluster.GetMostCommonPattern() + mostCommon := getMostCommonPattern(cluster) assert.NotNil(t, mostCommon, "Most common pattern should not be nil") pattern := mostCommon.Template assert.NotNil(t, pattern, "Pattern template should not be nil") - assert.False(t, cluster.HasWildcards(), "Single log should not have wildcards") + assert.False(t, hasWildcards(cluster), "Single log should not have wildcards") assert.Equal(t, tokenList.Length(), pattern.Length(), "Pattern length should match original TokenList") for i, tok := range pattern.Tokens { @@ -184,9 +185,9 @@ func TestCluster_FindMatchingPattern(t *testing.T) { // Should return different patterns assert.NotEqual(t, pattern1, pattern2, "Should create different patterns for different whitespace") - // FindMatchingPattern should return the correct pattern for each token list - found1 := cluster.FindMatchingPattern(tokenList1) - found2 := cluster.FindMatchingPattern(tokenList2) + // findMatchingPattern should return the correct pattern for each token list + found1 := findMatchingPattern(cluster, tokenList1) + found2 := findMatchingPattern(cluster, tokenList2) assert.Equal(t, pattern1, found1, "Should find the first pattern for tokenList1") assert.Equal(t, pattern2, found2, "Should find the second pattern for tokenList2") @@ -222,7 +223,7 @@ func TestCluster_GetMostCommonPattern(t *testing.T) { tokenList2 := token.NewTokenListWithTokens(tokens2) cluster.AddTokenListToPatterns(tokenList2) - mostCommon := cluster.GetMostCommonPattern() + mostCommon := getMostCommonPattern(cluster) assert.NotNil(t, mostCommon, "Most common pattern should not be nil") assert.Equal(t, 3, mostCommon.LogCount, "Most common pattern should have 3 logs") } @@ -257,7 +258,7 @@ func TestCluster_GetAllPatterns(t *testing.T) { cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens3)) - allPatterns := cluster.GetAllPatterns() + allPatterns := cluster.Patterns assert.Len(t, allPatterns, 3, "Expected 3 patterns") } @@ -291,7 +292,7 @@ func TestCluster_ExtractWildcardValues_MultiPattern(t *testing.T) { // Both should merge into same pattern // Extract wildcard values from tokenList2 - values := cluster.ExtractWildcardValues(tokenList2) + values := extractWildcardValues(cluster, tokenList2) // Should have one wildcard value for the last word token assert.Len(t, values, 1, "Expected 1 wildcard value") @@ -330,7 +331,7 @@ func TestCluster_Size_MultiPattern(t *testing.T) { } // Total size should be 5 (2 + 3) - assert.Equal(t, 5, cluster.Size(), "Expected cluster size 5 (2 + 3)") + assert.Equal(t, 5, clusterSize(cluster), "Expected cluster size 5 (2 + 3)") } func TestCluster_BackwardCompatibility(t *testing.T) { @@ -357,14 +358,99 @@ func TestCluster_BackwardCompatibility(t *testing.T) { cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens1)) cluster.AddTokenListToPatterns(token.NewTokenListWithTokens(tokens2)) - patternString := cluster.GetPatternString() - assert.NotEmpty(t, patternString, "GetPatternString should return a valid pattern string") + patternString := getPatternString(cluster) + assert.NotEmpty(t, patternString, "getPatternString should return a valid pattern string") - hasWildcards := cluster.HasWildcards() - assert.True(t, hasWildcards, "Should have wildcards") + wildcards := hasWildcards(cluster) + assert.True(t, wildcards, "Should have wildcards") - wildcardPositions := cluster.GetWildcardPositions() + wildcardPositions := getWildcardPositions(cluster) assert.NotEmpty(t, wildcardPositions, "Should have wildcard positions") t.Logf("āœ… Backward compatibility: pattern='%s', wildcards=%v", patternString, wildcardPositions) } + +// ============================================================================= +// Test Helper Functions +// ============================================================================= + +// getMostCommonPattern returns the pattern with the highest log count in the cluster. +func getMostCommonPattern(c *Cluster) *Pattern { + if len(c.Patterns) == 0 { + return nil + } + + mostCommonIdx := 0 + maxLogCount := c.Patterns[0].LogCount + for idx, p := range c.Patterns { + if p.LogCount > maxLogCount { + maxLogCount = p.LogCount + mostCommonIdx = idx + } + } + return c.Patterns[mostCommonIdx] +} + +// getPatternString returns a string representation of the most common pattern. +func getPatternString(c *Cluster) string { + mostCommon := getMostCommonPattern(c) + if mostCommon == nil { + return "" + } + return mostCommon.GetPatternString() +} + +// getWildcardPositions returns wildcard token positions for the most common pattern. +func getWildcardPositions(c *Cluster) []int { + mostCommon := getMostCommonPattern(c) + if mostCommon == nil { + return nil + } + return mostCommon.Positions +} + +// hasWildcards returns true if any pattern in this cluster contains wildcard positions. +func hasWildcards(c *Cluster) bool { + for _, p := range c.Patterns { + if len(p.Positions) > 0 { + return true + } + } + return false +} + +// extractWildcardValues extracts wildcard values from a TokenList using the matching pattern. +func extractWildcardValues(c *Cluster, tokenList *token.TokenList) []string { + p := findMatchingPattern(c, tokenList) + if p == nil { + return []string{} + } + return p.GetWildcardValues(tokenList) +} + +// findMatchingPattern finds the Pattern that matches the given TokenList. +func findMatchingPattern(c *Cluster, tokenList *token.TokenList) *Pattern { + if len(c.Patterns) == 0 { + return nil + } + + // Try to find a Pattern where the TokenList can merge + for _, p := range c.Patterns { + // Check if this TokenList can merge with the pattern's sample + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + return p + } + } + + // No matching pattern found + return nil +} + +// clusterSize returns the total number of logs across all patterns in the cluster. +func clusterSize(c *Cluster) int { + total := 0 + for _, p := range c.Patterns { + total += p.LogCount + } + return total +} diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go index 908e8cb10942..417d8c6a7cca 100644 --- a/pkg/logs/patterns/clustering/pattern.go +++ b/pkg/logs/patterns/clustering/pattern.go @@ -28,6 +28,9 @@ type Pattern struct { CreatedAt time.Time // When pattern was first created UpdatedAt time.Time // When pattern was last modified LastSentAt time.Time // When we last sent this pattern to gRPC + + // State tracking for pattern messages + SentTemplate string // The template string that was last sent (for detecting changes) } // newPattern creates a new pattern from a single token list. @@ -50,8 +53,8 @@ func (p *Pattern) size() int { return p.LogCount } -// getPatternString returns a string representation of the pattern. -func (p *Pattern) getPatternString() string { +// GetPatternString returns the pattern template as a string with wildcards marked as "*" +func (p *Pattern) GetPatternString() string { if p.Template == nil { return "" } @@ -77,51 +80,9 @@ func (p *Pattern) hasWildcards() bool { return len(p.Positions) > 0 } -// getWildcardPositions returns wildcard token positions (indices in token array). -func (p *Pattern) getWildcardPositions() []int { - return p.Positions -} - -// getWildcardCharPositions returns character indices where wildcards appear in the pattern string. -func (p *Pattern) getWildcardCharPositions() []int { - if p.Template == nil { - return nil - } - - var charPositions []int - currentPos := 0 - - for _, tok := range p.Template.Tokens { - // Clean the token value for proper length calculation - cleaned := sanitizeForTemplate(tok.Value) - - if tok.Wildcard == token.IsWildcard { - // Record the current character position for this wildcard - charPositions = append(charPositions, currentPos) - // Wildcard is represented as "*" (1 character) - currentPos += 1 - } else if cleaned != "" { - // Add the length of the cleaned token value - currentPos += len(cleaned) - } - } - - return charPositions -} - -// getWildcardValues extracts wildcard values from the sample log. -// Note: In practice, wildcard values are extracted from incoming logs, not stored ones. -func (p *Pattern) getWildcardValues() []string { - if p.Template == nil || p.Sample == nil { - return nil - } - - // Extract values from sample at wildcard positions - return p.extractWildcardValues(p.Sample) -} - -// extractWildcardValues extracts the wildcard values from a specific TokenList. -func (p *Pattern) extractWildcardValues(tokenList *token.TokenList) []string { +// GetWildcardValues extracts the wildcard values from a specific TokenList. +// This is called per-log to get that log's specific wildcard parameter values. +func (p *Pattern) GetWildcardValues(tokenList *token.TokenList) []string { if p.Template == nil || len(p.Positions) == 0 { return []string{} } @@ -136,16 +97,6 @@ func (p *Pattern) extractWildcardValues(tokenList *token.TokenList) []string { return wildcardValues } -// markAsSent updates the LastSentAt timestamp to indicate this pattern was sent to gRPC. -func (p *Pattern) markAsSent() { - p.LastSentAt = time.Now() -} - -// needsSending returns true if this pattern has never been sent or has been updated since last sent. -func (p *Pattern) needsSending() bool { - return p.LastSentAt.IsZero() || p.UpdatedAt.After(p.LastSentAt) -} - // sanitizeForTemplate removes non-printable characters from template strings func sanitizeForTemplate(s string) string { runes := []rune(s) diff --git a/pkg/logs/patterns/clustering/pattern_state_tracker.go b/pkg/logs/patterns/clustering/pattern_state_tracker.go new file mode 100644 index 000000000000..301b229bf680 --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern_state_tracker.go @@ -0,0 +1,57 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +// Package clustering provides clustering functionality for grouping similar TokenLists +// and identifying wildcard positions for pattern extraction. +package clustering + +import "time" + +// PatternTemplateStatus indicates whether the pattern template needs to be sent +type PatternTemplateStatus int + +const ( + // TemplateNotNeeded indicates template is already synced, no action needed + TemplateNotNeeded PatternTemplateStatus = iota + // TemplateIsNew indicates template has never been sent, needs PatternDefine + TemplateIsNew + // TemplateChanged indicates template changed since last send, needs PatternDelete + PatternDefine + TemplateChanged +) + +// NeedsResend determines if a pattern template needs to be sent and its status. +// Returns (needsSend, templateStatus): +// - (false, TemplateNotNeeded) if template was already sent and hasn't changed +// - (true, TemplateIsNew) if template has never been sent +// - (true, TemplateChanged) if template changed since last send +func (p *Pattern) NeedsResend() (bool, PatternTemplateStatus) { + if p == nil { + return false, TemplateNotNeeded + } + + // Never sent? Need to send as new template + if p.LastSentAt.IsZero() { + return true, TemplateIsNew + } + + // Check if template changed since last send + currentTemplate := p.GetPatternString() + if p.SentTemplate != currentTemplate { + return true, TemplateChanged + } + + // Already sent and unchanged + return false, TemplateNotNeeded +} + +// MarkAsSent records that this pattern was successfully sent. +// It updates both the LastSentAt timestamp and stores the sent template. +func (p *Pattern) MarkAsSent() { + if p == nil { + return + } + p.LastSentAt = time.Now() + p.SentTemplate = p.GetPatternString() +} diff --git a/pkg/logs/patterns/clustering/pattern_state_tracker_test.go b/pkg/logs/patterns/clustering/pattern_state_tracker_test.go new file mode 100644 index 000000000000..54b67349777a --- /dev/null +++ b/pkg/logs/patterns/clustering/pattern_state_tracker_test.go @@ -0,0 +1,380 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +package clustering + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" +) + +// Test-only helper functions + +// wasSent returns true if this pattern has been sent at least once. +func wasSent(p *Pattern) bool { + if p == nil { + return false + } + return !p.LastSentAt.IsZero() +} + +// templateChanged returns true if the template has changed since last send. +func templateChanged(p *Pattern) bool { + if p == nil { + return false + } + if p.LastSentAt.IsZero() { + return false // Never sent, so no baseline to compare + } + currentTemplate := p.GetPatternString() + return p.SentTemplate != currentTemplate +} + +// getSentTemplate returns the template that was last sent. +// Returns empty string if never sent. +func getSentTemplate(p *Pattern) string { + if p == nil { + return "" + } + return p.SentTemplate +} + +// TestNeedsResend_NeverSent tests that a pattern that has never been sent needs sending as PatternDefine +func TestNeedsResend_NeverSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend, "Pattern should need sending") + assert.Equal(t, TemplateIsNew, templateState, "Should be TemplateIsNew for first send") +} + +// TestNeedsResend_AlreadySent_NoChange tests that a sent pattern with no changes doesn't need resending +func TestNeedsResend_AlreadySent_NoChange(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + needsSend, templateState := pattern.NeedsResend() + assert.False(t, needsSend, "Pattern should not need sending") + assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded") +} + +// TestNeedsResend_TemplateChanged tests that a pattern with changed template needs sending as PatternUpdate +func TestNeedsResend_TemplateChanged(t *testing.T) { + // Create initial pattern + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + // Simulate template evolution (add wildcard) + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + + pattern.Template = template + pattern.Positions = []int{2} + pattern.UpdatedAt = time.Now() + + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend, "Pattern should need sending after template change") + assert.Equal(t, TemplateChanged, templateState, "Should be TemplateChanged for template change") +} + +// TestNeedsResend_NilPattern tests that a nil pattern doesn't need sending +func TestNeedsResend_NilPattern(t *testing.T) { + var pattern *Pattern = nil + + needsSend, templateState := pattern.NeedsResend() + assert.False(t, needsSend, "Nil pattern should not need sending") + assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded for nil") +} + +// TestMarkAsSent_UpdatesTimestampAndTemplate tests that MarkAsSent properly records state +func TestMarkAsSent_UpdatesTimestampAndTemplate(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") + assert.Equal(t, "", pattern.SentTemplate, "SentTemplate should be empty initially") + + pattern.MarkAsSent() + + assert.False(t, pattern.LastSentAt.IsZero(), "LastSentAt should be set") + assert.Equal(t, "Service started", pattern.SentTemplate, "SentTemplate should match current template") +} + +// TestMarkAsSent_NilPattern tests that MarkAsSent handles nil gracefully +func TestMarkAsSent_NilPattern(t *testing.T) { + var pattern *Pattern = nil + // Should not panic + pattern.MarkAsSent() +} + +// TestWasSent_NeverSent tests that WasSent returns false for unsent patterns +func TestWasSent_NeverSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + assert.False(t, wasSent(pattern), "wasSent should return false for new pattern") +} + +// TestWasSent_AfterSend tests that WasSent returns true after sending +func TestWasSent_AfterSend(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + assert.True(t, wasSent(pattern), "wasSent should return true after MarkAsSent") +} + +// TestWasSent_NilPattern tests that WasSent handles nil gracefully +func TestWasSent_NilPattern(t *testing.T) { + var pattern *Pattern = nil + assert.False(t, wasSent(pattern), "wasSent should return false for nil pattern") +} + +// TestTemplateChanged_NeverSent tests that TemplateChanged returns false for unsent patterns +func TestTemplateChanged_NeverSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + assert.False(t, templateChanged(pattern), "templateChanged should return false if never sent") +} + +// TestTemplateChanged_NoChange tests that TemplateChanged returns false when template hasn't changed +func TestTemplateChanged_NoChange(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + assert.False(t, templateChanged(pattern), "templateChanged should return false when unchanged") +} + +// TestTemplateChanged_Changed tests that TemplateChanged returns true when template changed +func TestTemplateChanged_Changed(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + // Change template + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + pattern.Template = template + pattern.Positions = []int{2} + + assert.True(t, templateChanged(pattern), "templateChanged should return true when template changed") +} + +// TestTemplateChanged_NilPattern tests that TemplateChanged handles nil gracefully +func TestTemplateChanged_NilPattern(t *testing.T) { + var pattern *Pattern = nil + assert.False(t, templateChanged(pattern), "templateChanged should return false for nil pattern") +} + +// TestGetSentTemplate_NeverSent tests that GetSentTemplate returns empty for unsent patterns +func TestGetSentTemplate_NeverSent(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + assert.Equal(t, "", getSentTemplate(pattern), "getSentTemplate should return empty for new pattern") +} + +// TestGetSentTemplate_AfterSend tests that GetSentTemplate returns the sent template +func TestGetSentTemplate_AfterSend(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + assert.Equal(t, "Service started", getSentTemplate(pattern), "getSentTemplate should return sent template") +} + +// TestGetSentTemplate_NilPattern tests that GetSentTemplate handles nil gracefully +func TestGetSentTemplate_NilPattern(t *testing.T) { + var pattern *Pattern = nil + assert.Equal(t, "", getSentTemplate(pattern), "getSentTemplate should return empty for nil pattern") +} + +// TestPatternLifecycle_FullFlow tests the complete pattern lifecycle +func TestPatternLifecycle_FullFlow(t *testing.T) { + // 1. Create new pattern + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + + // 2. Check initial state - needs Define + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateIsNew, templateState) + assert.False(t, wasSent(pattern)) + assert.Equal(t, "", getSentTemplate(pattern)) + + // 3. Mark as sent + pattern.MarkAsSent() + assert.True(t, wasSent(pattern)) + assert.Equal(t, "Service started", getSentTemplate(pattern)) + + // 4. Check no resend needed + needsSend, templateState = pattern.NeedsResend() + assert.False(t, needsSend) + assert.Equal(t, TemplateNotNeeded, templateState) + + // 5. Simulate template evolution + time.Sleep(1 * time.Millisecond) + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + pattern.Template = template + pattern.Positions = []int{2} + pattern.UpdatedAt = time.Now() + + // 6. Check needs Update + assert.True(t, templateChanged(pattern)) + needsSend, templateState = pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateChanged, templateState) + + // 7. Mark as sent again + pattern.MarkAsSent() + assert.Equal(t, "Service *", getSentTemplate(pattern)) + + // 8. Check no resend needed again + needsSend, templateState = pattern.NeedsResend() + assert.False(t, needsSend) + assert.Equal(t, TemplateNotNeeded, templateState) +} + +// TestPatternLifecycle_MultipleUpdates tests multiple template updates +func TestPatternLifecycle_MultipleUpdates(t *testing.T) { + // Initial pattern: "Service started" + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + + // First update: "Service *" + time.Sleep(1 * time.Millisecond) + template1 := token.NewTokenList() + template1.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) + template1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template1.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + pattern.Template = template1 + pattern.Positions = []int{2} + pattern.UpdatedAt = time.Now() + + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateChanged, templateState) + pattern.MarkAsSent() + + // Second update: "* *" + time.Sleep(1 * time.Millisecond) + template2 := token.NewTokenList() + template2.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) + template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template2.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) + pattern.Template = template2 + pattern.Positions = []int{0, 2} + pattern.UpdatedAt = time.Now() + + needsSend, templateState = pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateChanged, templateState) + pattern.MarkAsSent() + + assert.Equal(t, "* *", getSentTemplate(pattern)) +} + +// TestPatternStateTracker_EdgeCases tests various edge cases +func TestPatternStateTracker_EdgeCases(t *testing.T) { + t.Run("EmptyTemplate", func(t *testing.T) { + tl := token.NewTokenList() + pattern := newPattern(tl, 12345) + + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateIsNew, templateState) + + pattern.MarkAsSent() + assert.Equal(t, "", getSentTemplate(pattern)) + + needsSend, templateState = pattern.NeedsResend() + assert.False(t, needsSend) + assert.Equal(t, TemplateNotNeeded, templateState) + }) + + t.Run("OnlyWildcards", func(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) + tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + tl.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) + + pattern := newPattern(tl, 12345) + pattern.Positions = []int{0, 2} + pattern.MarkAsSent() + + assert.Equal(t, "* *", getSentTemplate(pattern)) + assert.False(t, templateChanged(pattern)) + }) + + t.Run("TemplateBecomesNil", func(t *testing.T) { + tl := token.NewTokenList() + tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) + + pattern := newPattern(tl, 12345) + pattern.MarkAsSent() + assert.Equal(t, "Test", getSentTemplate(pattern)) + + // Simulate template becoming nil (edge case) + pattern.Template = nil + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateChanged, templateState) + }) +} diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go index 8c3e92d49f2d..1b67852cbf4c 100644 --- a/pkg/logs/patterns/clustering/pattern_test.go +++ b/pkg/logs/patterns/clustering/pattern_test.go @@ -14,6 +14,35 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) +// Test-only helper functions + +// getWildcardCharPositions returns character indices where wildcards appear in the pattern string. +func getWildcardCharPositions(p *Pattern) []int { + if p.Template == nil { + return nil + } + + var charPositions []int + currentPos := 0 + + for _, tok := range p.Template.Tokens { + // Clean the token value for proper length calculation + cleaned := sanitizeForTemplate(tok.Value) + + if tok.Wildcard == token.IsWildcard { + // Record the current character position for this wildcard + charPositions = append(charPositions, currentPos) + // Wildcard is represented as "*" (1 character) + currentPos += 1 + } else if cleaned != "" { + // Add the length of the cleaned token value + currentPos += len(cleaned) + } + } + + return charPositions +} + func TestNewPattern(t *testing.T) { // Create a simple token list tl := token.NewTokenList() @@ -76,7 +105,7 @@ func TestGetPatternString_NoWildcards(t *testing.T) { tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) pattern := newPattern(tl, 12345) - result := pattern.getPatternString() + result := pattern.GetPatternString() assert.Equal(t, "Service started", result) } @@ -89,7 +118,7 @@ func TestGetPatternString_WithWildcards(t *testing.T) { pattern := newPattern(tl, 12345) pattern.Positions = []int{2} - result := pattern.getPatternString() + result := pattern.GetPatternString() assert.Equal(t, "Service *", result) } @@ -98,7 +127,7 @@ func TestGetPatternString_NilTemplate(t *testing.T) { pattern := &Pattern{ Template: nil, } - result := pattern.getPatternString() + result := pattern.GetPatternString() assert.Equal(t, "", result) } @@ -124,8 +153,7 @@ func TestGetWildcardPositions(t *testing.T) { pattern := newPattern(tl, 12345) pattern.Positions = []int{1, 3, 5} - positions := pattern.getWildcardPositions() - assert.Equal(t, []int{1, 3, 5}, positions) + assert.Equal(t, []int{1, 3, 5}, pattern.Positions) } // getParamCount returns the number of parameters/wildcards in a pattern. @@ -157,7 +185,7 @@ func TestGetWildcardCharPositions(t *testing.T) { pattern := newPattern(tl, 12345) pattern.Positions = []int{2} - charPositions := pattern.getWildcardCharPositions() + charPositions := getWildcardCharPositions(pattern) // "Service" (7 chars) + " " (1 char) = 8, wildcard at position 8 assert.Equal(t, []int{8}, charPositions) } @@ -176,7 +204,7 @@ func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) { pattern := newPattern(tl, 12345) pattern.Positions = []int{2, 6} - charPositions := pattern.getWildcardCharPositions() + charPositions := getWildcardCharPositions(pattern) // "Error " = 6 chars, wildcard at position 6 // "Error * in " = 6 + 1 (wildcard) + 4 (" in ") = 11, wildcard at position 11 assert.Equal(t, []int{6, 11}, charPositions) @@ -187,7 +215,7 @@ func TestGetWildcardCharPositions_NilTemplate(t *testing.T) { Template: nil, } - charPositions := pattern.getWildcardCharPositions() + charPositions := getWildcardCharPositions(pattern) assert.Nil(t, charPositions) } @@ -208,7 +236,7 @@ func TestGetWildcardValues(t *testing.T) { pattern.Template = tl pattern.Positions = []int{2} - values := pattern.getWildcardValues() + values := pattern.GetWildcardValues(sample) assert.Equal(t, []string{"started"}, values) } @@ -219,8 +247,8 @@ func TestGetWildcardValues_NilTemplate(t *testing.T) { pattern := newPattern(sample, 12345) pattern.Template = nil - values := pattern.getWildcardValues() - assert.Nil(t, values) + values := pattern.GetWildcardValues(sample) + assert.Empty(t, values) } func TestGetWildcardValues_NilSample(t *testing.T) { @@ -231,8 +259,9 @@ func TestGetWildcardValues_NilSample(t *testing.T) { pattern.Sample = nil pattern.Positions = []int{0} - values := pattern.getWildcardValues() - assert.Nil(t, values) + // Test with the template itself since sample is nil + values := pattern.GetWildcardValues(tl) + assert.Equal(t, []string{"Test"}, values) } func TestExtractWildcardValues(t *testing.T) { @@ -252,7 +281,7 @@ func TestExtractWildcardValues(t *testing.T) { incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) incoming.Add(token.NewToken(token.TokenWord, "crashed", token.PotentialWildcard)) - values := pattern.extractWildcardValues(incoming) + values := pattern.GetWildcardValues(incoming) assert.Equal(t, []string{"crashed"}, values) } @@ -285,7 +314,7 @@ func TestExtractWildcardValues_MultipleWildcards(t *testing.T) { incoming.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) incoming.Add(token.NewToken(token.TokenWord, "line", token.PotentialWildcard)) - values := pattern.extractWildcardValues(incoming) + values := pattern.GetWildcardValues(incoming) assert.Equal(t, []string{"Error", "module", "line"}, values) } @@ -298,7 +327,7 @@ func TestExtractWildcardValues_NilTemplate(t *testing.T) { incoming := token.NewTokenList() incoming.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) - values := pattern.extractWildcardValues(incoming) + values := pattern.GetWildcardValues(incoming) assert.Equal(t, []string{}, values) } @@ -312,7 +341,7 @@ func TestExtractWildcardValues_NoPositions(t *testing.T) { incoming := token.NewTokenList() incoming.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - values := pattern.extractWildcardValues(incoming) + values := pattern.GetWildcardValues(incoming) assert.Equal(t, []string{}, values) } @@ -326,7 +355,7 @@ func TestExtractWildcardValues_PositionOutOfBounds(t *testing.T) { incoming := token.NewTokenList() incoming.Add(token.NewToken(token.TokenWord, "Value", token.PotentialWildcard)) - values := pattern.extractWildcardValues(incoming) + values := pattern.GetWildcardValues(incoming) assert.Equal(t, []string{"Value"}, values, "Should only extract valid positions") } @@ -337,8 +366,9 @@ func TestMarkAsSent(t *testing.T) { pattern := newPattern(tl, 12345) assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") - pattern.markAsSent() + pattern.MarkAsSent() assert.False(t, pattern.LastSentAt.IsZero(), "LastSentAt should be set after marking") + assert.Equal(t, "Test", pattern.SentTemplate, "SentTemplate should be set") } func TestNeedsSending_NeverSent(t *testing.T) { @@ -346,7 +376,9 @@ func TestNeedsSending_NeverSent(t *testing.T) { tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) pattern := newPattern(tl, 12345) - assert.True(t, pattern.needsSending(), "Should need sending if never sent") + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend, "Should need sending if never sent") + assert.Equal(t, TemplateIsNew, templateState, "Should be TemplateIsNew for first send") } func TestNeedsSending_AlreadySent_NotUpdated(t *testing.T) { @@ -355,9 +387,11 @@ func TestNeedsSending_AlreadySent_NotUpdated(t *testing.T) { pattern := newPattern(tl, 12345) time.Sleep(1 * time.Millisecond) - pattern.markAsSent() + pattern.MarkAsSent() - assert.False(t, pattern.needsSending(), "Should not need sending if sent and not updated") + needsSend, templateState := pattern.NeedsResend() + assert.False(t, needsSend, "Should not need sending if sent and not updated") + assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded") } func TestNeedsSending_UpdatedAfterSent(t *testing.T) { @@ -365,13 +399,19 @@ func TestNeedsSending_UpdatedAfterSent(t *testing.T) { tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) pattern := newPattern(tl, 12345) - pattern.markAsSent() + pattern.MarkAsSent() - // Update pattern + // Update pattern template (not just timestamp) time.Sleep(1 * time.Millisecond) + template := token.NewTokenList() + template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + pattern.Template = template + pattern.Positions = []int{0} pattern.UpdatedAt = time.Now() - assert.True(t, pattern.needsSending(), "Should need sending if updated after last sent") + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend, "Should need sending if template changed after last sent") + assert.Equal(t, TemplateChanged, templateState, "Should be TemplateChanged for template change") } func TestSanitizeForTemplate_PrintableChars(t *testing.T) { @@ -431,7 +471,7 @@ func TestPattern_IntegrationScenario(t *testing.T) { assert.Equal(t, 1, pattern.LogCount) assert.False(t, pattern.hasWildcards()) - assert.Equal(t, "ERROR: Database connection failed", pattern.getPatternString()) + assert.Equal(t, "ERROR: Database connection failed", pattern.GetPatternString()) // 2. Pattern updated with wildcards (simulated) template := token.NewTokenList() @@ -452,7 +492,7 @@ func TestPattern_IntegrationScenario(t *testing.T) { assert.Equal(t, 2, pattern.LogCount) assert.True(t, pattern.hasWildcards()) assert.Equal(t, 3, getParamCount(pattern)) - assert.Equal(t, "ERROR: * * *", pattern.getPatternString()) + assert.Equal(t, "ERROR: * * *", pattern.GetPatternString()) // 3. Extract wildcard values from new log log2 := token.NewTokenList() @@ -465,17 +505,32 @@ func TestPattern_IntegrationScenario(t *testing.T) { log2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) log2.Add(token.NewToken(token.TokenWord, "reached", token.PotentialWildcard)) - values := pattern.extractWildcardValues(log2) + values := pattern.GetWildcardValues(log2) assert.Equal(t, []string{"Network", "timeout", "reached"}, values) // 4. Check sending status - assert.True(t, pattern.needsSending()) - pattern.markAsSent() - assert.False(t, pattern.needsSending()) - - // 5. Update and check needs sending again + needsSend, templateState := pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateIsNew, templateState) + pattern.MarkAsSent() + needsSend, templateState = pattern.NeedsResend() + assert.False(t, needsSend) + assert.Equal(t, TemplateNotNeeded, templateState) + + // 5. Update pattern (change template, not just log count) time.Sleep(1 * time.Millisecond) + // Evolve template to add more wildcards + template2 := token.NewTokenList() + template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) + template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) + pattern.Template = template2 + pattern.Positions = []int{0, 2, 4} pattern.LogCount++ pattern.UpdatedAt = time.Now() - assert.True(t, pattern.needsSending()) + needsSend, templateState = pattern.NeedsResend() + assert.True(t, needsSend) + assert.Equal(t, TemplateChanged, templateState) } diff --git a/pkg/logs/processor/processor.go b/pkg/logs/processor/processor.go index 6bff04fd564a..a4ebb654206b 100644 --- a/pkg/logs/processor/processor.go +++ b/pkg/logs/processor/processor.go @@ -18,6 +18,8 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/diagnostic" "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -51,6 +53,9 @@ type Processor struct { configChan chan failoverConfig failoverConfig failoverConfig + // Pattern extraction components + clusterManager *clustering.ClusterManager + // Telemetry pipelineMonitor metrics.PipelineMonitor utilization metrics.UtilizationMonitor @@ -75,6 +80,10 @@ func New(config pkgconfigmodel.Reader, inputChan, outputChan chan *message.Messa pipelineMonitor: pipelineMonitor, utilization: pipelineMonitor.MakeUtilizationMonitor(metrics.ProcessorTlmName, instanceID), instanceID: instanceID, + + // Initialize pattern extraction components, + /// Will lock behind feature flag later + clusterManager: clustering.NewClusterManager(), } // Initialize cached failover config @@ -204,6 +213,31 @@ func (p *Processor) processMessage(msg *message.Message) { // report this message to diagnostic receivers (e.g. `stream-logs` command) p.diagnosticMessageReceiver.HandleMessage(msg, rendered, "") + // Extract pattern from the rendered message content + if p.clusterManager != nil { + // Tokenize the rendered content + tokenizer := automaton.NewTokenizer(string(rendered)) + tokenList := tokenizer.Tokenize() + + // Add to cluster manager and get the pattern + pattern, _ := p.clusterManager.Add(tokenList) + + if pattern != nil { + // Store pattern reference and per-log wildcard values + msg.Pattern = pattern + msg.WildcardValues = pattern.GetWildcardValues(tokenList) + + // Determine if pattern template needs sending + needsSend, templateState := pattern.NeedsResend() + msg.PatternTemplateState = templateState + + // Mark as sent if template was sent + if needsSend { + pattern.MarkAsSent() + } + } + } + if p.failoverConfig.isFailoverActive { p.filterMRFMessages(msg) } diff --git a/pkg/logs/sender/dumb_strategy.go b/pkg/logs/sender/dumb_strategy.go deleted file mode 100644 index c2d8781ee724..000000000000 --- a/pkg/logs/sender/dumb_strategy.go +++ /dev/null @@ -1,278 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -// Package sender provides log message sending functionality -package sender - -import ( - "bytes" - "encoding/gob" - "unsafe" - - "github.com/DataDog/datadog-agent/pkg/logs/message" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" - "github.com/DataDog/datadog-agent/pkg/util/compression" - "github.com/DataDog/datadog-agent/pkg/util/log" -) - -// dumbStrategy is a minimal batching strategy that forwards one message per payload. -type dumbStrategy struct { - inputChan chan *message.Message - clusterManager *clustering.ClusterManager - outputChan chan *message.Payload - flushChan chan struct{} - serializer Serializer - compression compression.Compressor - pipelineName string - - maxContentSize int - - stopChan chan struct{} - buffer []*message.Message -} - -// PatternData is a simple intermediate format for patterns (avoids import cycles with grpc package) -// stream_worker will convert this to protobuf PatternDefine or PatternUpdate -type PatternData struct { - PatternID uint64 - Template string - ParamCount uint32 - PosList []uint32 - IsUpdate bool // true for PatternUpdate, false for PatternDefine -} - -// LogData represents a log with pattern reference and wildcard values -type LogData struct { - PatternID uint64 - WildcardValues []string - Timestamp uint64 -} - -// NewDumbStrategy returns a strategy that sends one message per payload using the -// provided serializer and compressor. Messages larger than maxContentSize are -// dropped to mimic batch strategy behaviour. -func NewDumbStrategy( - inputChan chan *message.Message, - outputChan chan *message.Payload, - flushChan chan struct{}, - serializer Serializer, - maxContentSize int, - pipelineName string, - compression compression.Compressor, -) Strategy { - return &dumbStrategy{ - inputChan: inputChan, - outputChan: outputChan, - flushChan: flushChan, - serializer: serializer, - compression: compression, - pipelineName: pipelineName, - maxContentSize: maxContentSize, - clusterManager: clustering.NewClusterManager(), - stopChan: make(chan struct{}), - buffer: make([]*message.Message, 0, 1), - } -} - -// Start begins processing messages from the input channel. -func (s *dumbStrategy) Start() { - go func() { - defer close(s.stopChan) - for { - select { - case msg, ok := <-s.inputChan: - if !ok { - s.flushBuffer() - return - } - s.bufferMessage(msg) - s.flushBuffer() - case <-s.flushChan: - s.flushBuffer() - } - } - }() -} - -// Stop stops the strategy and waits for the processing goroutine to exit. -func (s *dumbStrategy) Stop() { - close(s.inputChan) - <-s.stopChan -} - -func (s *dumbStrategy) bufferMessage(m *message.Message) { - if m == nil { - return - } - - if s.maxContentSize > 0 && len(m.GetContent()) > s.maxContentSize { - log.Warnf("Dropped message in pipeline=%s reason=too-large ContentLength=%d ContentSizeLimit=%d", s.pipelineName, len(m.GetContent()), s.maxContentSize) - tlmDroppedTooLarge.Inc(s.pipelineName) - return - } - - s.buffer = append(s.buffer, m) -} - -func (s *dumbStrategy) flushBuffer() { - if len(s.buffer) > 0 { - s.processMessage(s.buffer[0]) - s.buffer = s.buffer[:0] - } -} - -func (s *dumbStrategy) processMessage(m *message.Message) { - // Use rendered content for pattern extraction (plain text), not encoded content (binary) - content := m.GetRenderedContent() - if len(content) == 0 { - return - } - - // Debug: Check content - previewLen := 100 - if len(content) < previewLen { - previewLen = len(content) - } - log.Infof("šŸ” Tokenizing rendered content (first %d chars): %q", previewLen, content[:previewLen]) - - contentStr := bytesToString(content) - - // Simple pattern extraction for POC - tokenList := automaton.TokenizeString(contentStr) - if tokenList != nil && !tokenList.IsEmpty() { - cluster, changeType := s.clusterManager.Add(tokenList) - if cluster != nil { - cluster.GeneratePattern() - - // Log pattern changes - switch changeType { - case clustering.PatternNew: - log.Infof("šŸ“ NEW pattern discovered: PatternID=%d, Template='%s', Size=%d", - cluster.GetPatternID(), cluster.GetPatternString(), cluster.Size()) - case clustering.PatternUpdated: - log.Infof("šŸ”„ Pattern UPDATED: PatternID=%d, Template='%s', Size=%d", - cluster.GetPatternID(), cluster.GetPatternString(), cluster.Size()) - case clustering.PatternNoChange: - log.Debugf("Pattern matched: PatternID=%d", cluster.GetPatternID()) - } - - // Step 1: Send pattern change (define/update) if needed - if changeType == clustering.PatternNew || changeType == clustering.PatternUpdated { - patternPayload, err := s.buildPatternChangePayload(m, cluster, changeType) - if err != nil { - log.Warn("Failed to build pattern change payload", err) - return - } - log.Debugf("ā« Queuing pattern payload (changeType=%v, patternID=%d) to outputChan", changeType, cluster.GetPatternID()) - s.outputChan <- patternPayload - log.Debugf("āœ… Pattern payload queued successfully") - } - - // Step 2: Send log with pattern reference + wildcard values - logPayload, err := s.buildLogPayload(m, cluster) - if err != nil { - log.Warn("Failed to build log payload", err) - return - } - log.Debugf("ā« Queuing log payload (patternID=%d) to outputChan", cluster.GetPatternID()) - s.outputChan <- logPayload - log.Debugf("āœ… Log payload queued successfully") - } - } -} - -// buildPatternChangePayload creates a payload for PatternDefine or PatternUpdate -func (s *dumbStrategy) buildPatternChangePayload(m *message.Message, cluster *clustering.Cluster, changeType clustering.PatternChangeType) (*message.Payload, error) { - // Get character positions where wildcards appear in the template string - charPos := cluster.GetWildcardCharPositions() - posList := make([]uint32, len(charPos)) - for i, pos := range charPos { - posList[i] = uint32(pos) - } - - // Create pattern data - patternData := &PatternData{ - PatternID: cluster.GetPatternID(), - Template: cluster.GetPatternString(), - ParamCount: uint32(len(charPos)), - PosList: posList, - IsUpdate: changeType == clustering.PatternUpdated, - } - - // Serialize to binary format - return s.serializePattern(patternData, m) -} - -// buildLogPayload creates a payload for Log with StructuredLog (pattern_id + wildcard values) -func (s *dumbStrategy) buildLogPayload(m *message.Message, cluster *clustering.Cluster) (*message.Payload, error) { - // Extract wildcard values from the cluster - wildcardValues := cluster.GetWildcardValues() - - // Create log data - logData := &LogData{ - PatternID: cluster.GetPatternID(), - WildcardValues: wildcardValues, - Timestamp: uint64(m.IngestionTimestamp), - } - - // Serialize to binary format - return s.serializeLog(logData, m) -} - -// serializePattern serializes pattern data using gob encoding -func (s *dumbStrategy) serializePattern(pattern *PatternData, m *message.Message) (*message.Payload, error) { - var buf bytes.Buffer - encoder := gob.NewEncoder(&buf) - - if err := encoder.Encode(pattern); err != nil { - return nil, err - } - - // Create payload with original message metadata - metaCopy := m.MessageMetadata - // Add pattern change indicator to processing tags - if pattern.IsUpdate { - metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern_update") - } else { - metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:pattern_define") - } - - return message.NewPayload( - []*message.MessageMetadata{&metaCopy}, // original message metadata with pattern tag - buf.Bytes(), // gob-encoded pattern data - "", // no content encoding - gRPC handles compression - buf.Len(), // gob size - ), nil -} - -// serializeLog serializes log data using gob encoding -func (s *dumbStrategy) serializeLog(logData *LogData, m *message.Message) (*message.Payload, error) { - var buf bytes.Buffer - encoder := gob.NewEncoder(&buf) - - if err := encoder.Encode(logData); err != nil { - return nil, err - } - - // Create payload with original message metadata - metaCopy := m.MessageMetadata - // Add log with pattern reference indicator - metaCopy.ProcessingTags = append(metaCopy.ProcessingTags, "data_type:log_with_pattern") - - return message.NewPayload( - []*message.MessageMetadata{&metaCopy}, // original message metadata with log tag - buf.Bytes(), // gob-encoded log data - "", // no content encoding - gRPC handles compression - buf.Len(), // gob size - ), nil -} - -func bytesToString(b []byte) string { - if len(b) == 0 { - return "" - } - return unsafe.String(&b[0], len(b)) -} diff --git a/pkg/logs/sender/grpc/grpc_sender.go b/pkg/logs/sender/grpc/grpc_sender.go deleted file mode 100644 index 58afffa5f61b..000000000000 --- a/pkg/logs/sender/grpc/grpc_sender.go +++ /dev/null @@ -1,287 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -// Package grpc implements gRPC-based log sender -package grpc - -import ( - "context" - "crypto/tls" - "fmt" - "time" - "unsafe" - - "google.golang.org/grpc" - "google.golang.org/grpc/credentials" - "google.golang.org/grpc/credentials/insecure" - "google.golang.org/grpc/keepalive" - - "github.com/DataDog/datadog-agent/comp/logs/agent/config" - pkgconfigmodel "github.com/DataDog/datadog-agent/pkg/config/model" - "github.com/DataDog/datadog-agent/pkg/logs/client" - "github.com/DataDog/datadog-agent/pkg/logs/message" - "github.com/DataDog/datadog-agent/pkg/logs/metrics" - "github.com/DataDog/datadog-agent/pkg/logs/sender" - "github.com/DataDog/datadog-agent/pkg/util/log" - "github.com/DataDog/datadog-agent/pkg/version" - - "go.uber.org/atomic" -) - -// headerCredentials implements credentials.PerRPCCredentials to add headers to RPC calls -type headerCredentials struct { - endpoint config.Endpoint -} - -// GetRequestMetadata adds required headers to each RPC call -func (h *headerCredentials) GetRequestMetadata(ctx context.Context, uri ...string) (map[string]string, error) { - headers := map[string]string{ - "dd-api-key": h.endpoint.GetAPIKey(), - } - - // Add protocol header if specified - if h.endpoint.Protocol != "" { - headers["dd-protocol"] = string(h.endpoint.Protocol) - } - - // Add origin headers if specified - if h.endpoint.Origin != "" { - headers["dd-evp-origin"] = string(h.endpoint.Origin) - headers["dd-evp-origin-version"] = version.AgentVersion - } - - return headers, nil -} - -// RequireTransportSecurity indicates whether the credentials require transport security -func (h *headerCredentials) RequireTransportSecurity() bool { - return false // We handle TLS separately via WithTransportCredentials -} - -// GRPCSender implements PipelineComponent interface for gRPC log transmission. -// It manages multiple StreamWorker instances (one per pipeline) using round-robin distribution. -// It is similar to Sender/Worker architecture -type GRPCSender struct { - // Configuration - endpoint config.Endpoint - destinationsContext *client.DestinationsContext - cfg pkgconfigmodel.Reader - numberOfWorkers int - - // Pipeline integration - pipelineMonitor metrics.PipelineMonitor - - // Stream management (similar to Sender's workers and queues) - workers []*StreamWorker - queues []chan *message.Payload - idx *atomic.Uint32 - - // Auditor integration - sink sender.Sink - - // Stream rotation signaling - maps input channels to StreamWorker instances for 1:1 worker mapping - channelToWorkerMap map[chan *message.Payload]*StreamWorker - - // Global batch ID counter shared by all workers to ensure uniqueness - globalBatchIDCounter *atomic.Uint32 - - // gRPC connection management (shared across all streams) - conn *grpc.ClientConn - client StatefulLogsServiceClient -} - -// NewGRPCSender creates a new gRPC sender that implements PipelineComponent -// numberOfPipelines determines how many StreamWorker to create (same as number of pipelines) -func NewGRPCSender( - cfg pkgconfigmodel.Reader, - sink sender.Sink, - endpoints *config.Endpoints, - destinationsCtx *client.DestinationsContext, - pipelineMonitor metrics.PipelineMonitor, -) *GRPCSender { - - // For now, use the first reliable endpoint - // TODO: Support multiple endpoints with failover - var endpoint config.Endpoint - if len(endpoints.GetReliableEndpoints()) > 0 { - endpoint = endpoints.GetReliableEndpoints()[0] - } else { - log.Error("No reliable gRPC endpoints configured") - return nil - } - - // Get number of pipelines from config (same pattern as other senders) - numberOfWorkers := cfg.GetInt("logs_config.pipelines") - if numberOfWorkers <= 0 { - numberOfWorkers = 1 // Default to 1 // TODO: probably not good - } - - // Get stream lifetime from config - streamLifetime := config.StreamLifetime(cfg) - - sender := &GRPCSender{ - endpoint: endpoint, - destinationsContext: destinationsCtx, - cfg: cfg, - numberOfWorkers: numberOfWorkers, - pipelineMonitor: pipelineMonitor, - workers: make([]*StreamWorker, 0, numberOfWorkers), - queues: make([]chan *message.Payload, numberOfWorkers), - idx: &atomic.Uint32{}, - sink: sink, - channelToWorkerMap: make(map[chan *message.Payload]*StreamWorker), - globalBatchIDCounter: &atomic.Uint32{}, - } - - // Note: outputChan will be set in each StreamWorker's Start() method when sink.Channel() is available - - // Create gRPC connection (shared by all streams inside StreamWorkers) - if err := sender.createConnection(); err != nil { - log.Errorf("Failed to create gRPC connection: %v", err) - return nil - } - - // Create multiple StreamWorker instances (like Sender creates Workers) - for i := 0; i < numberOfWorkers; i++ { - workerID := fmt.Sprintf("worker-%d", i) - - // Create input queue for this worker (like Sender creates queues) - sender.queues[i] = make(chan *message.Payload, 100) - - // Create StreamWorker instance - worker := NewStreamWorker( - workerID, - destinationsCtx, - sender.client, - sender.sink, // Pass sink, outputChan will be set in Start() - streamLifetime, - sender.globalBatchIDCounter, // Pass shared counter for globally unique batch IDs - ) - - // Override the worker's input channel to use our queue - worker.inputChan = sender.queues[i] - - // Map input channel to worker for 1:1 worker-processor connection - sender.channelToWorkerMap[sender.queues[i]] = worker - - sender.workers = append(sender.workers, worker) - } - - log.Infof("Created gRPC sender with %d streams for endpoint %s:%d", - numberOfWorkers, endpoint.Host, endpoint.Port) - return sender -} - -// createConnection establishes the shared gRPC connection -func (s *GRPCSender) createConnection() error { - log.Infof("Creating gRPC connection to %s:%d", s.endpoint.Host, s.endpoint.Port) - - // Build connection options - var opts []grpc.DialOption - - // Configure TLS - if s.endpoint.UseSSL() { - tlsConfig := &tls.Config{ - ServerName: s.endpoint.Host, - } - opts = append(opts, grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig))) - } else { - opts = append(opts, grpc.WithTransportCredentials(insecure.NewCredentials())) - } - - // Configure keepalive - // Note: Increased Time from 30s to 5min to avoid "too_many_pings" errors from intake - keepaliveParams := keepalive.ClientParameters{ - Time: 5 * time.Minute, // Send ping every 5 minutes (was 30s) - Timeout: 10 * time.Second, // Wait 10 seconds for response (was 5s) - PermitWithoutStream: true, - } - opts = append(opts, grpc.WithKeepaliveParams(keepaliveParams)) - - // Add user agent - userAgent := fmt.Sprintf("datadog-agent/%s", version.AgentVersion) - opts = append(opts, grpc.WithUserAgent(userAgent)) - - // Add headers via per-RPC credentials - headerCreds := &headerCredentials{endpoint: s.endpoint} - opts = append(opts, grpc.WithPerRPCCredentials(headerCreds)) - - // Create connection - address := fmt.Sprintf("%s:%d", s.endpoint.Host, s.endpoint.Port) - conn, err := grpc.NewClient(address, opts...) - if err != nil { - return fmt.Errorf("failed to create gRPC connection: %w", err) - } - - s.conn = conn - s.client = NewStatefulLogsServiceClient(conn) - - log.Infof("Successfully created gRPC connection to %s", address) - return nil -} - -// PipelineComponent interface implementation - -// In returns the input channel using round-robin distribution (same as Sender.In()) -func (s *GRPCSender) In() chan *message.Payload { - idx := s.idx.Inc() % uint32(len(s.queues)) - return s.queues[idx] -} - -// PipelineMonitor returns the pipeline monitor -func (s *GRPCSender) PipelineMonitor() metrics.PipelineMonitor { - return s.pipelineMonitor -} - -// GetSignalChannelForInputChannel returns the stream rotation signal channel for the worker -// that owns the given input channel. This enables 1:1 mapping between processors and workers. -// This is ugly and temporary, until we have a proper way to link worker's signal channel to -// the processor. -func (s *GRPCSender) GetSignalChannelForInputChannel(inputChan chan *message.Payload) chan any { - // Find the worker that owns this input channel - worker := s.channelToWorkerMap[inputChan] - if worker == nil { - return nil - } - - // Convert the typed channel to chan any using unsafe conversion - // This is safe because both channels have the same underlying type - return *(*chan any)(unsafe.Pointer(&worker.signalStreamRotate)) -} - -// Start starts all StreamWorker instances (same pattern as Sender.Start()) -func (s *GRPCSender) Start() { - log.Infof("Starting gRPC sender with %d workers", len(s.workers)) - - for _, worker := range s.workers { - worker.Start() - } - - log.Info("All StreamWorkers started") -} - -// Stop stops all StreamWorker instances and closes the connection -func (s *GRPCSender) Stop() { - log.Info("Stopping gRPC sender") - - // Stop all workers (same pattern as Sender.Stop()) - for _, worker := range s.workers { - worker.Stop() - } - - // Close all queues - for _, queue := range s.queues { - close(queue) - } - - // Close the shared connection - if s.conn != nil { - if err := s.conn.Close(); err != nil { - log.Warnf("Error closing gRPC connection: %v", err) - } - } - - log.Info("gRPC sender stopped") -} diff --git a/pkg/logs/sender/grpc/grpc_sender_test.go b/pkg/logs/sender/grpc/grpc_sender_test.go deleted file mode 100644 index afd044febef1..000000000000 --- a/pkg/logs/sender/grpc/grpc_sender_test.go +++ /dev/null @@ -1,642 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2025-present Datadog, Inc. - -package grpc - -import ( - "fmt" - "net" - "strconv" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "google.golang.org/grpc" - "google.golang.org/grpc/codes" - "google.golang.org/grpc/status" - - "github.com/DataDog/datadog-agent/comp/logs/agent/config" - configmock "github.com/DataDog/datadog-agent/pkg/config/mock" - "github.com/DataDog/datadog-agent/pkg/logs/client" - "github.com/DataDog/datadog-agent/pkg/logs/message" - "github.com/DataDog/datadog-agent/pkg/logs/metrics" -) - -// MockGRPCServer that implements StatefulLogsServiceServer -type MockGRPCServer struct { - UnimplementedStatefulLogsServiceServer - - // Control behavior - shouldFailSend bool - shouldFailRecv bool - shouldDisconnect bool - responseDelay time.Duration - batchResponses map[int32]BatchStatus_Status - mu sync.RWMutex - - // Track what was received - receivedBatches []*StatefulBatch - activeStreams []StatefulLogsService_LogsStreamServer - streamsMu sync.RWMutex -} - -func NewMockGRPCServer() *MockGRPCServer { - return &MockGRPCServer{ - batchResponses: make(map[int32]BatchStatus_Status), - receivedBatches: make([]*StatefulBatch, 0), - activeStreams: make([]StatefulLogsService_LogsStreamServer, 0), - } -} - -func (s *MockGRPCServer) LogsStream(stream StatefulLogsService_LogsStreamServer) error { - s.streamsMu.Lock() - s.activeStreams = append(s.activeStreams, stream) - streamIndex := len(s.activeStreams) - 1 - s.streamsMu.Unlock() - - defer func() { - s.streamsMu.Lock() - if streamIndex < len(s.activeStreams) { - s.activeStreams = append(s.activeStreams[:streamIndex], s.activeStreams[streamIndex+1:]...) - } - s.streamsMu.Unlock() - }() - - for { - // Receive batch from client first - batch, err := stream.Recv() - if err != nil { - return err - } - - s.mu.RLock() - shouldFail := s.shouldFailRecv - shouldDisconnect := s.shouldDisconnect - delay := s.responseDelay - s.mu.RUnlock() - - // Store the received batch (so tests can verify it was received) - s.mu.Lock() - s.receivedBatches = append(s.receivedBatches, batch) - - // Determine response status - responseStatus := BatchStatus_OK - if status, exists := s.batchResponses[int32(batch.BatchId)]; exists { - responseStatus = status - } - s.mu.Unlock() - - // Check for failures AFTER receiving but BEFORE responding - if shouldDisconnect { - // Disconnect after receiving batch but before sending response - // This simulates server dying mid-processing - return status.Error(codes.Unavailable, "server disconnected") - } - - if shouldFail { - // Fail after receiving batch but before sending response - return status.Error(codes.Internal, "simulated recv failure") - } - - // Add delay if configured - if delay > 0 { - time.Sleep(delay) - } - - // Send response back - response := &BatchStatus{ - BatchId: int32(batch.BatchId), - Status: responseStatus, - } - - if err := stream.Send(response); err != nil { - return err - } - } -} - -// Control methods for testing -func (s *MockGRPCServer) SetShouldFailSend(fail bool) { - s.mu.Lock() - defer s.mu.Unlock() - s.shouldFailSend = fail -} - -func (s *MockGRPCServer) SetShouldFailRecv(fail bool) { - s.mu.Lock() - defer s.mu.Unlock() - s.shouldFailRecv = fail -} - -func (s *MockGRPCServer) SetShouldDisconnect(disconnect bool) { - s.mu.Lock() - defer s.mu.Unlock() - s.shouldDisconnect = disconnect -} - -func (s *MockGRPCServer) SetResponseDelay(delay time.Duration) { - s.mu.Lock() - defer s.mu.Unlock() - s.responseDelay = delay -} - -func (s *MockGRPCServer) SetBatchResponse(batchID int32, status BatchStatus_Status) { - s.mu.Lock() - defer s.mu.Unlock() - s.batchResponses[batchID] = status -} - -func (s *MockGRPCServer) GetReceivedBatches() []*StatefulBatch { - s.mu.RLock() - defer s.mu.RUnlock() - result := make([]*StatefulBatch, len(s.receivedBatches)) - copy(result, s.receivedBatches) - return result -} - -func (s *MockGRPCServer) ClearReceivedBatches() { - s.mu.Lock() - defer s.mu.Unlock() - s.receivedBatches = s.receivedBatches[:0] -} - -func (s *MockGRPCServer) DisconnectAllStreams() { - s.streamsMu.Lock() - defer s.streamsMu.Unlock() - s.shouldDisconnect = true -} - -// Test helper to start mock gRPC server -func startMockGRPCServer(t *testing.T) (*MockGRPCServer, string, func()) { - listener, err := net.Listen("tcp", "localhost:0") - require.NoError(t, err) - - mockServer := NewMockGRPCServer() - grpcServer := grpc.NewServer() - RegisterStatefulLogsServiceServer(grpcServer, mockServer) - - go func() { - if err := grpcServer.Serve(listener); err != nil { - t.Logf("gRPC server error: %v", err) - } - }() - - address := listener.Addr().String() - - cleanup := func() { - grpcServer.Stop() - listener.Close() - } - - // Server is ready immediately after starting - - return mockServer, address, cleanup -} - -// MockSink for testing -type MockSink struct { - outputChan chan *message.Payload -} - -func (s *MockSink) Channel() chan *message.Payload { - return s.outputChan -} - -// Helper to create GRPCSender with mock server -func createTestGRPCSender(t *testing.T, address string) (*GRPCSender, *MockSink) { - cfg := configmock.New(t) - cfg.SetWithoutSource("logs_config.batch_wait", 100) // Short batch wait for testing - cfg.SetWithoutSource("logs_config.pipelines", 1) // Single pipeline - - // Parse host and port from address (e.g., "127.0.0.1:53662") - host, portStr, err := net.SplitHostPort(address) - require.NoError(t, err) - port, err := strconv.Atoi(portStr) - require.NoError(t, err) - - // Create endpoint using the constructor pattern from existing tests - endpoint := config.NewMockEndpointWithOptions(map[string]interface{}{ - "host": host, - "port": port, - "is_reliable": true, - "use_grpc": true, - "use_ssl": false, - }) - - endpoints := &config.Endpoints{ - UseGRPC: true, - Main: endpoint, - Endpoints: []config.Endpoint{endpoint}, - } - - sink := &MockSink{outputChan: make(chan *message.Payload, 100)} - destinationsCtx := client.NewDestinationsContext() - destinationsCtx.Start() - t.Cleanup(func() { destinationsCtx.Stop() }) - - pipelineMonitor := metrics.NewNoopPipelineMonitor("test") - - sender := NewGRPCSender(cfg, sink, endpoints, destinationsCtx, pipelineMonitor) - require.NotNil(t, sender) - - return sender, sink -} - -// Test end-to-end payload flow through GRPCSender -func TestGRPCSenderEndToEndFlow(t *testing.T) { - mockServer, address, cleanup := startMockGRPCServer(t) - defer cleanup() - - sender, sink := createTestGRPCSender(t, address) - - sender.Start() - defer sender.Stop() - - // Create test payload - msg := message.NewMessage([]byte("test message"), nil, "", 0) - payload := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, - Encoded: []byte("test message"), - Encoding: "identity", - UnencodedSize: 12, - IsSnapshot: false, - } - - // Send payload through GRPCSender input channel - inputChan := sender.In() - select { - case inputChan <- payload: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send payload to GRPCSender") - } - - // Wait for server to actually receive the batch (event-driven, not time-based) - require.Eventually(t, func() bool { - batches := mockServer.GetReceivedBatches() - return len(batches) >= 1 - }, 3*time.Second, 50*time.Millisecond, "Server should receive batch") - - // Verify server received the batch - batches := mockServer.GetReceivedBatches() - require.Len(t, batches, 1, "Server should have received one batch") - - batch := batches[0] - assert.Equal(t, uint32(1), batch.BatchId) - require.Len(t, batch.Data, 1) - assert.Equal(t, "test message", batch.Data[0].GetLogs().GetRaw()) - - // Verify payload was acknowledged to auditor - select { - case ackPayload := <-sink.outputChan: - assert.Equal(t, payload, ackPayload) - case <-time.After(1 * time.Second): - t.Fatal("Expected payload acknowledgment from auditor") - } -} - -// Test GRPCSender stream failure and recovery -func TestGRPCSenderFailureRecovery(t *testing.T) { - mockServer, address, cleanup := startMockGRPCServer(t) - defer cleanup() - - sender, sink := createTestGRPCSender(t, address) - - sender.Start() - defer sender.Stop() - - // Connection will be established on first send - - // Send first payload (should succeed) - msg1 := message.NewMessage([]byte("message 1"), nil, "", 0) - payload1 := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg1.MessageMetadata}, - Encoded: []byte("message 1"), - Encoding: "identity", - UnencodedSize: 9, - IsSnapshot: false, - } - - inputChan := sender.In() - select { - case inputChan <- payload1: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send first payload") - } - - // Wait for server to receive the batch - require.Eventually(t, func() bool { - return len(mockServer.GetReceivedBatches()) >= 1 - }, 2*time.Second, 50*time.Millisecond) - - // Verify first payload succeeded - batches := mockServer.GetReceivedBatches() - require.Len(t, batches, 1) - - select { - case ackPayload := <-sink.outputChan: - assert.Equal(t, payload1, ackPayload) - case <-time.After(1 * time.Second): - t.Fatal("Expected first payload acknowledgment") - } - - // Get initial generation from the single worker (since we have 1 pipeline) - require.Len(t, sender.workers, 1, "Should have exactly 1 worker for single pipeline") - initialGeneration := sender.workers[0].generationID - - // Simulate server failure - mockServer.SetShouldDisconnect(true) - - // Send second payload (should trigger failure and rotation) - msg2 := message.NewMessage([]byte("message 2"), nil, "", 0) - payload2 := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg2.MessageMetadata}, - Encoded: []byte("message 2"), - Encoding: "identity", - UnencodedSize: 9, - IsSnapshot: false, - } - - select { - case inputChan <- payload2: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send second payload") - } - - // Wait for failure to be detected and rotation to begin - require.Eventually(t, func() bool { - return sender.workers[0].generationID > initialGeneration - }, 3*time.Second, 100*time.Millisecond) - - // Verify generation incremented due to failure - currentGeneration := sender.workers[0].generationID - assert.Greater(t, currentGeneration, initialGeneration, "Generation should increment after failure") - - // Re-enable server (simulate recovery) - mockServer.SetShouldDisconnect(false) - mockServer.ClearReceivedBatches() - - // Server is now available for new connections - - // Send snapshot to complete rotation - msgSnapshot := message.NewMessage([]byte("snapshot"), nil, "", 0) - payloadSnapshot := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msgSnapshot.MessageMetadata}, - Encoded: []byte("snapshot"), - Encoding: "identity", - UnencodedSize: 8, - IsSnapshot: true, - } - - select { - case inputChan <- payloadSnapshot: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send snapshot payload") - } - - // Wait for snapshot to be received on new stream - require.Eventually(t, func() bool { - return len(mockServer.GetReceivedBatches()) >= 1 - }, 3*time.Second, 100*time.Millisecond) - - // Verify snapshot was sent on new stream - newBatches := mockServer.GetReceivedBatches() - require.GreaterOrEqual(t, len(newBatches), 1, "Should have received snapshot on new stream") - - // Find snapshot batch - var snapshotBatch *StatefulBatch - for _, batch := range newBatches { - if len(batch.Data) > 0 && batch.Data[0].GetLogs().GetRaw() == "snapshot" { - snapshotBatch = batch - break - } - } - require.NotNil(t, snapshotBatch, "Should have received snapshot batch") - - // Send another payload to verify traffic continues - msg3 := message.NewMessage([]byte("message 3"), nil, "", 0) - payload3 := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg3.MessageMetadata}, - Encoded: []byte("message 3"), - Encoding: "identity", - UnencodedSize: 9, - IsSnapshot: false, - } - - select { - case inputChan <- payload3: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send third payload") - } - - // Payload sent, acknowledgments will be collected below - - // Collect all acknowledgments we receive (may include message 2, snapshot, message 3) - var receivedPayloads []*message.Payload - timeout := time.After(3 * time.Second) - - // Collect acknowledgments for up to 3 seconds - for { - select { - case ackPayload := <-sink.outputChan: - receivedPayloads = append(receivedPayloads, ackPayload) - case <-timeout: - goto done - } - } - -done: - require.GreaterOrEqual(t, len(receivedPayloads), 2, "Should have received at least 2 acknowledgments") - - // Verify we got the expected payloads (snapshot and message 3 at minimum) - payloadContents := make([]string, len(receivedPayloads)) - for i, p := range receivedPayloads { - payloadContents[i] = string(p.Encoded) - } - assert.Contains(t, payloadContents, "snapshot", "Should have received snapshot") - assert.Contains(t, payloadContents, "message 3", "Should have received message 3") -} - -// Test multiple consecutive failures with GRPCSender -func TestGRPCSenderMultipleFailures(t *testing.T) { - mockServer, address, cleanup := startMockGRPCServer(t) - defer cleanup() - - sender, sink := createTestGRPCSender(t, address) - - sender.Start() - defer sender.Stop() - - // Get initial generation from the single worker - require.Len(t, sender.workers, 1, "Should have exactly 1 worker for single pipeline") - initialGeneration := sender.workers[0].generationID - - inputChan := sender.In() - - mockServer.ClearReceivedBatches() - - // Cause failure - mockServer.SetShouldDisconnect(true) - - // Send payload to trigger failure - msg := message.NewMessage([]byte("trigger"), nil, "", 0) - payload := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, - Encoded: []byte("trigger"), - Encoding: "identity", - UnencodedSize: len("trigger"), - IsSnapshot: false, - } - - select { - case inputChan <- payload: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send trigger payload") - } - - // Wait for failure detection (generation increment) - require.Eventually(t, func() bool { - return sender.workers[0].generationID == initialGeneration+1 - }, 2*time.Second, 100*time.Millisecond) - - // Send snapshot to complete rotation - // but this message should trigger another rotation - msgSnapshot := message.NewMessage([]byte("snapshot"), nil, "", 0) - payloadSnapshot := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msgSnapshot.MessageMetadata}, - Encoded: []byte("snapshot"), - Encoding: "identity", - UnencodedSize: len("snapshot"), - IsSnapshot: true, - } - - select { - case inputChan <- payloadSnapshot: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send snapshot") - } - - // Verify generation incremented (at least 2 times) - require.Eventually(t, func() bool { - return sender.workers[0].generationID == initialGeneration+2 - }, 2*time.Second, 100*time.Millisecond) - - mockServer.SetShouldDisconnect(false) - - // Send final payload to verify system is still working - msgFinal := message.NewMessage([]byte("final test"), nil, "", 0) - payloadFinal := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msgFinal.MessageMetadata}, - Encoded: []byte("final test"), - Encoding: "identity", - UnencodedSize: 10, - IsSnapshot: true, - } - - select { - case inputChan <- payloadFinal: - case <-time.After(1 * time.Second): - t.Fatal("Failed to send final payload") - } - - // Payload sent, wait for acknowledgment - - // Verify we get at least one acknowledgment (system is working) - // Due to async nature and multiple failures, we may have many pending acks - timeout := time.After(2 * time.Second) - var gotAck bool - for !gotAck { - select { - case <-sink.outputChan: - gotAck = true - case <-timeout: - t.Fatal("Expected at least one payload acknowledgment") - } - } - - // Verify system is still functional by checking no more failures - assert.True(t, gotAck, "System should still be processing payloads") -} - -// Test GRPCSender signal channel mapping functionality -func TestGRPCSenderSignalChannelMapping(t *testing.T) { - _, address, cleanup := startMockGRPCServer(t) - defer cleanup() - - sender, _ := createTestGRPCSender(t, address) - - sender.Start() - defer sender.Stop() - - // Test GetSignalChannelForInputChannel functionality - inputChan := sender.In() - signalChan := sender.GetSignalChannelForInputChannel(inputChan) - - require.NotNil(t, signalChan, "Should have signal channel for input channel") - - // Verify the mapping is correct - worker, exists := sender.channelToWorkerMap[inputChan] - require.True(t, exists, "Input channel should be mapped to a worker") - - // The signal channel should be the same underlying channel, even though types differ - // GetSignalChannelForInputChannel returns chan any (via unsafe conversion) - // while worker.signalStreamRotate is chan StreamRotateSignal - // We can verify they're the same by checking the channel addresses - assert.NotNil(t, signalChan, "Signal channel should not be nil") - assert.NotNil(t, worker.signalStreamRotate, "Worker signal channel should not be nil") -} - -// Test GRPCSender graceful shutdown -func TestGRPCSenderGracefulShutdown(t *testing.T) { - _, address, cleanup := startMockGRPCServer(t) - defer cleanup() - - sender, sink := createTestGRPCSender(t, address) - - sender.Start() - - // Send some payloads - inputChan := sender.In() - for i := 0; i < 3; i++ { - msg := message.NewMessage([]byte(fmt.Sprintf("message %d", i)), nil, "", 0) - payload := &message.Payload{ - MessageMetas: []*message.MessageMetadata{&msg.MessageMetadata}, - Encoded: []byte(fmt.Sprintf("message %d", i)), - Encoding: "identity", - UnencodedSize: len(fmt.Sprintf("message %d", i)), - IsSnapshot: false, - } - - select { - case inputChan <- payload: - case <-time.After(1 * time.Second): - t.Fatalf("Failed to send payload %d", i) - } - } - - // Processing will start immediately - - // Stop sender gracefully - sender.Stop() - - // Shutdown is synchronous - - // Verify some acknowledgments came through (system processed what it could) - var ackCount int - timeout := time.After(1 * time.Second) - for { - select { - case <-sink.outputChan: - ackCount++ - case <-timeout: - goto done - } - } - -done: - // Should have processed at least some payloads before shutdown - assert.GreaterOrEqual(t, ackCount, 0, "Should have processed some payloads during graceful shutdown") -} diff --git a/pkg/logs/sender/grpc/stateful_encoding.pb.go b/pkg/logs/sender/grpc/stateful_encoding.pb.go deleted file mode 100644 index 860c5bc6f01e..000000000000 --- a/pkg/logs/sender/grpc/stateful_encoding.pb.go +++ /dev/null @@ -1,1014 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.36.6 -// protoc v4.24.3 -// source: pkg/logs/sender/grpc/stateful_encoding.proto - -package grpc - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" - unsafe "unsafe" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -// See Status Code Mappings section below for more details -type BatchStatus_Status int32 - -const ( - BatchStatus_UNKNOWN BatchStatus_Status = 0 - BatchStatus_OK BatchStatus_Status = 1 -) - -// Enum value maps for BatchStatus_Status. -var ( - BatchStatus_Status_name = map[int32]string{ - 0: "UNKNOWN", - 1: "OK", - } - BatchStatus_Status_value = map[string]int32{ - "UNKNOWN": 0, - "OK": 1, - } -) - -func (x BatchStatus_Status) Enum() *BatchStatus_Status { - p := new(BatchStatus_Status) - *p = x - return p -} - -func (x BatchStatus_Status) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (BatchStatus_Status) Descriptor() protoreflect.EnumDescriptor { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes[0].Descriptor() -} - -func (BatchStatus_Status) Type() protoreflect.EnumType { - return &file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes[0] -} - -func (x BatchStatus_Status) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Use BatchStatus_Status.Descriptor instead. -func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} -} - -type DictEntryDefine struct { - state protoimpl.MessageState `protogen:"open.v1"` - Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` - Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *DictEntryDefine) Reset() { - *x = DictEntryDefine{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *DictEntryDefine) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*DictEntryDefine) ProtoMessage() {} - -func (x *DictEntryDefine) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[0] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use DictEntryDefine.ProtoReflect.Descriptor instead. -func (*DictEntryDefine) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{0} -} - -func (x *DictEntryDefine) GetId() uint64 { - if x != nil { - return x.Id - } - return 0 -} - -func (x *DictEntryDefine) GetValue() string { - if x != nil { - return x.Value - } - return "" -} - -type DictEntryDelete struct { - state protoimpl.MessageState `protogen:"open.v1"` - Id uint64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *DictEntryDelete) Reset() { - *x = DictEntryDelete{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *DictEntryDelete) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*DictEntryDelete) ProtoMessage() {} - -func (x *DictEntryDelete) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[1] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use DictEntryDelete.ProtoReflect.Descriptor instead. -func (*DictEntryDelete) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{1} -} - -func (x *DictEntryDelete) GetId() uint64 { - if x != nil { - return x.Id - } - return 0 -} - -// pos_list is used to indicate where dynamic values should be inserted -// it's more accurate than a marker -type PatternDefine struct { - state protoimpl.MessageState `protogen:"open.v1"` - PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` - Template string `protobuf:"bytes,2,opt,name=template,proto3" json:"template,omitempty"` - ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` - PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *PatternDefine) Reset() { - *x = PatternDefine{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *PatternDefine) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PatternDefine) ProtoMessage() {} - -func (x *PatternDefine) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[2] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PatternDefine.ProtoReflect.Descriptor instead. -func (*PatternDefine) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{2} -} - -func (x *PatternDefine) GetPatternId() uint64 { - if x != nil { - return x.PatternId - } - return 0 -} - -func (x *PatternDefine) GetTemplate() string { - if x != nil { - return x.Template - } - return "" -} - -func (x *PatternDefine) GetParamCount() uint32 { - if x != nil { - return x.ParamCount - } - return 0 -} - -func (x *PatternDefine) GetPosList() []uint32 { - if x != nil { - return x.PosList - } - return nil -} - -type PatternUpdate struct { - state protoimpl.MessageState `protogen:"open.v1"` - PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` - NewTemplate string `protobuf:"bytes,2,opt,name=new_template,json=newTemplate,proto3" json:"new_template,omitempty"` - ParamCount uint32 `protobuf:"varint,3,opt,name=param_count,json=paramCount,proto3" json:"param_count,omitempty"` - PosList []uint32 `protobuf:"varint,4,rep,packed,name=pos_list,json=posList,proto3" json:"pos_list,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *PatternUpdate) Reset() { - *x = PatternUpdate{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *PatternUpdate) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PatternUpdate) ProtoMessage() {} - -func (x *PatternUpdate) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[3] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PatternUpdate.ProtoReflect.Descriptor instead. -func (*PatternUpdate) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{3} -} - -func (x *PatternUpdate) GetPatternId() uint64 { - if x != nil { - return x.PatternId - } - return 0 -} - -func (x *PatternUpdate) GetNewTemplate() string { - if x != nil { - return x.NewTemplate - } - return "" -} - -func (x *PatternUpdate) GetParamCount() uint32 { - if x != nil { - return x.ParamCount - } - return 0 -} - -func (x *PatternUpdate) GetPosList() []uint32 { - if x != nil { - return x.PosList - } - return nil -} - -type PatternDelete struct { - state protoimpl.MessageState `protogen:"open.v1"` - PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *PatternDelete) Reset() { - *x = PatternDelete{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *PatternDelete) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PatternDelete) ProtoMessage() {} - -func (x *PatternDelete) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[4] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PatternDelete.ProtoReflect.Descriptor instead. -func (*PatternDelete) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{4} -} - -func (x *PatternDelete) GetPatternId() uint64 { - if x != nil { - return x.PatternId - } - return 0 -} - -type Log struct { - state protoimpl.MessageState `protogen:"open.v1"` - Timestamp uint64 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"` - // Types that are valid to be assigned to Content: - // - // *Log_Structured - // *Log_Raw - Content isLog_Content `protobuf_oneof:"content"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *Log) Reset() { - *x = Log{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *Log) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Log) ProtoMessage() {} - -func (x *Log) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Log.ProtoReflect.Descriptor instead. -func (*Log) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{5} -} - -func (x *Log) GetTimestamp() uint64 { - if x != nil { - return x.Timestamp - } - return 0 -} - -func (x *Log) GetContent() isLog_Content { - if x != nil { - return x.Content - } - return nil -} - -func (x *Log) GetStructured() *StructuredLog { - if x != nil { - if x, ok := x.Content.(*Log_Structured); ok { - return x.Structured - } - } - return nil -} - -func (x *Log) GetRaw() string { - if x != nil { - if x, ok := x.Content.(*Log_Raw); ok { - return x.Raw - } - } - return "" -} - -type isLog_Content interface { - isLog_Content() -} - -type Log_Structured struct { - Structured *StructuredLog `protobuf:"bytes,2,opt,name=structured,proto3,oneof"` -} - -type Log_Raw struct { - Raw string `protobuf:"bytes,3,opt,name=raw,proto3,oneof"` -} - -func (*Log_Structured) isLog_Content() {} - -func (*Log_Raw) isLog_Content() {} - -type StructuredLog struct { - state protoimpl.MessageState `protogen:"open.v1"` - PatternId uint64 `protobuf:"varint,1,opt,name=pattern_id,json=patternId,proto3" json:"pattern_id,omitempty"` - DynamicValues []*DynamicValue `protobuf:"bytes,2,rep,name=dynamic_values,json=dynamicValues,proto3" json:"dynamic_values,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *StructuredLog) Reset() { - *x = StructuredLog{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *StructuredLog) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StructuredLog) ProtoMessage() {} - -func (x *StructuredLog) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[6] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. -func (*StructuredLog) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{6} -} - -func (x *StructuredLog) GetPatternId() uint64 { - if x != nil { - return x.PatternId - } - return 0 -} - -func (x *StructuredLog) GetDynamicValues() []*DynamicValue { - if x != nil { - return x.DynamicValues - } - return nil -} - -// TODO not sure we need numeric type -type DynamicValue struct { - state protoimpl.MessageState `protogen:"open.v1"` - // Types that are valid to be assigned to Value: - // - // *DynamicValue_IntValue - // *DynamicValue_FloatValue - // *DynamicValue_StringValue - // *DynamicValue_DictIndex - Value isDynamicValue_Value `protobuf_oneof:"value"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *DynamicValue) Reset() { - *x = DynamicValue{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *DynamicValue) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*DynamicValue) ProtoMessage() {} - -func (x *DynamicValue) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. -func (*DynamicValue) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{7} -} - -func (x *DynamicValue) GetValue() isDynamicValue_Value { - if x != nil { - return x.Value - } - return nil -} - -func (x *DynamicValue) GetIntValue() int64 { - if x != nil { - if x, ok := x.Value.(*DynamicValue_IntValue); ok { - return x.IntValue - } - } - return 0 -} - -func (x *DynamicValue) GetFloatValue() float64 { - if x != nil { - if x, ok := x.Value.(*DynamicValue_FloatValue); ok { - return x.FloatValue - } - } - return 0 -} - -func (x *DynamicValue) GetStringValue() string { - if x != nil { - if x, ok := x.Value.(*DynamicValue_StringValue); ok { - return x.StringValue - } - } - return "" -} - -func (x *DynamicValue) GetDictIndex() uint64 { - if x != nil { - if x, ok := x.Value.(*DynamicValue_DictIndex); ok { - return x.DictIndex - } - } - return 0 -} - -type isDynamicValue_Value interface { - isDynamicValue_Value() -} - -type DynamicValue_IntValue struct { - IntValue int64 `protobuf:"varint,1,opt,name=int_value,json=intValue,proto3,oneof"` -} - -type DynamicValue_FloatValue struct { - FloatValue float64 `protobuf:"fixed64,2,opt,name=float_value,json=floatValue,proto3,oneof"` -} - -type DynamicValue_StringValue struct { - StringValue string `protobuf:"bytes,3,opt,name=string_value,json=stringValue,proto3,oneof"` -} - -type DynamicValue_DictIndex struct { - DictIndex uint64 `protobuf:"varint,4,opt,name=dict_index,json=dictIndex,proto3,oneof"` -} - -func (*DynamicValue_IntValue) isDynamicValue_Value() {} - -func (*DynamicValue_FloatValue) isDynamicValue_Value() {} - -func (*DynamicValue_StringValue) isDynamicValue_Value() {} - -func (*DynamicValue_DictIndex) isDynamicValue_Value() {} - -type Datum struct { - state protoimpl.MessageState `protogen:"open.v1"` - // Types that are valid to be assigned to Data: - // - // *Datum_PatternDefine - // *Datum_PatternUpdate - // *Datum_PatternDelete - // *Datum_DictEntryDefine - // *Datum_DictEntryDelete - // *Datum_Logs - Data isDatum_Data `protobuf_oneof:"data"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *Datum) Reset() { - *x = Datum{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *Datum) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Datum) ProtoMessage() {} - -func (x *Datum) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Datum.ProtoReflect.Descriptor instead. -func (*Datum) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{8} -} - -func (x *Datum) GetData() isDatum_Data { - if x != nil { - return x.Data - } - return nil -} - -func (x *Datum) GetPatternDefine() *PatternDefine { - if x != nil { - if x, ok := x.Data.(*Datum_PatternDefine); ok { - return x.PatternDefine - } - } - return nil -} - -func (x *Datum) GetPatternUpdate() *PatternUpdate { - if x != nil { - if x, ok := x.Data.(*Datum_PatternUpdate); ok { - return x.PatternUpdate - } - } - return nil -} - -func (x *Datum) GetPatternDelete() *PatternDelete { - if x != nil { - if x, ok := x.Data.(*Datum_PatternDelete); ok { - return x.PatternDelete - } - } - return nil -} - -func (x *Datum) GetDictEntryDefine() *DictEntryDefine { - if x != nil { - if x, ok := x.Data.(*Datum_DictEntryDefine); ok { - return x.DictEntryDefine - } - } - return nil -} - -func (x *Datum) GetDictEntryDelete() *DictEntryDelete { - if x != nil { - if x, ok := x.Data.(*Datum_DictEntryDelete); ok { - return x.DictEntryDelete - } - } - return nil -} - -func (x *Datum) GetLogs() *Log { - if x != nil { - if x, ok := x.Data.(*Datum_Logs); ok { - return x.Logs - } - } - return nil -} - -type isDatum_Data interface { - isDatum_Data() -} - -type Datum_PatternDefine struct { - PatternDefine *PatternDefine `protobuf:"bytes,1,opt,name=pattern_define,json=patternDefine,proto3,oneof"` -} - -type Datum_PatternUpdate struct { - PatternUpdate *PatternUpdate `protobuf:"bytes,2,opt,name=pattern_update,json=patternUpdate,proto3,oneof"` -} - -type Datum_PatternDelete struct { - PatternDelete *PatternDelete `protobuf:"bytes,3,opt,name=pattern_delete,json=patternDelete,proto3,oneof"` -} - -type Datum_DictEntryDefine struct { - DictEntryDefine *DictEntryDefine `protobuf:"bytes,4,opt,name=dict_entry_define,json=dictEntryDefine,proto3,oneof"` -} - -type Datum_DictEntryDelete struct { - DictEntryDelete *DictEntryDelete `protobuf:"bytes,5,opt,name=dict_entry_delete,json=dictEntryDelete,proto3,oneof"` -} - -type Datum_Logs struct { - Logs *Log `protobuf:"bytes,6,opt,name=logs,proto3,oneof"` -} - -func (*Datum_PatternDefine) isDatum_Data() {} - -func (*Datum_PatternUpdate) isDatum_Data() {} - -func (*Datum_PatternDelete) isDatum_Data() {} - -func (*Datum_DictEntryDefine) isDatum_Data() {} - -func (*Datum_DictEntryDelete) isDatum_Data() {} - -func (*Datum_Logs) isDatum_Data() {} - -// data is sequence of pattern/dictionary changes + logs -// the ordering is significant, must be processed in order -type StatefulBatch struct { - state protoimpl.MessageState `protogen:"open.v1"` - BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` - Data []*Datum `protobuf:"bytes,2,rep,name=data,proto3" json:"data,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *StatefulBatch) Reset() { - *x = StatefulBatch{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *StatefulBatch) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*StatefulBatch) ProtoMessage() {} - -func (x *StatefulBatch) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[9] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. -func (*StatefulBatch) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{9} -} - -func (x *StatefulBatch) GetBatchId() uint32 { - if x != nil { - return x.BatchId - } - return 0 -} - -func (x *StatefulBatch) GetData() []*Datum { - if x != nil { - return x.Data - } - return nil -} - -type BatchStatus struct { - state protoimpl.MessageState `protogen:"open.v1"` - BatchId int32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` - Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=intake.BatchStatus_Status" json:"status,omitempty"` - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache -} - -func (x *BatchStatus) Reset() { - *x = BatchStatus{} - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) -} - -func (x *BatchStatus) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*BatchStatus) ProtoMessage() {} - -func (x *BatchStatus) ProtoReflect() protoreflect.Message { - mi := &file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[10] - if x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. -func (*BatchStatus) Descriptor() ([]byte, []int) { - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP(), []int{10} -} - -func (x *BatchStatus) GetBatchId() int32 { - if x != nil { - return x.BatchId - } - return 0 -} - -func (x *BatchStatus) GetStatus() BatchStatus_Status { - if x != nil { - return x.Status - } - return BatchStatus_UNKNOWN -} - -var File_pkg_logs_sender_grpc_stateful_encoding_proto protoreflect.FileDescriptor - -const file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc = "" + - "\n" + - ",pkg/logs/sender/grpc/stateful_encoding.proto\x12\x06intake\"7\n" + - "\x0fDictEntryDefine\x12\x0e\n" + - "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + - "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + - "\x0fDictEntryDelete\x12\x0e\n" + - "\x02id\x18\x01 \x01(\x04R\x02id\"\x86\x01\n" + - "\rPatternDefine\x12\x1d\n" + - "\n" + - "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12\x1a\n" + - "\btemplate\x18\x02 \x01(\tR\btemplate\x12\x1f\n" + - "\vparam_count\x18\x03 \x01(\rR\n" + - "paramCount\x12\x19\n" + - "\bpos_list\x18\x04 \x03(\rR\aposList\"\x8d\x01\n" + - "\rPatternUpdate\x12\x1d\n" + - "\n" + - "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12!\n" + - "\fnew_template\x18\x02 \x01(\tR\vnewTemplate\x12\x1f\n" + - "\vparam_count\x18\x03 \x01(\rR\n" + - "paramCount\x12\x19\n" + - "\bpos_list\x18\x04 \x03(\rR\aposList\".\n" + - "\rPatternDelete\x12\x1d\n" + - "\n" + - "pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" + - "\x03Log\x12\x1c\n" + - "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x127\n" + - "\n" + - "structured\x18\x02 \x01(\v2\x15.intake.StructuredLogH\x00R\n" + - "structured\x12\x12\n" + - "\x03raw\x18\x03 \x01(\tH\x00R\x03rawB\t\n" + - "\acontent\"k\n" + - "\rStructuredLog\x12\x1d\n" + - "\n" + - "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12;\n" + - "\x0edynamic_values\x18\x02 \x03(\v2\x14.intake.DynamicValueR\rdynamicValues\"\x9f\x01\n" + - "\fDynamicValue\x12\x1d\n" + - "\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" + - "\vfloat_value\x18\x02 \x01(\x01H\x00R\n" + - "floatValue\x12#\n" + - "\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" + - "\n" + - "dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" + - "\x05value\"\x80\x03\n" + - "\x05Datum\x12>\n" + - "\x0epattern_define\x18\x01 \x01(\v2\x15.intake.PatternDefineH\x00R\rpatternDefine\x12>\n" + - "\x0epattern_update\x18\x02 \x01(\v2\x15.intake.PatternUpdateH\x00R\rpatternUpdate\x12>\n" + - "\x0epattern_delete\x18\x03 \x01(\v2\x15.intake.PatternDeleteH\x00R\rpatternDelete\x12E\n" + - "\x11dict_entry_define\x18\x04 \x01(\v2\x17.intake.DictEntryDefineH\x00R\x0fdictEntryDefine\x12E\n" + - "\x11dict_entry_delete\x18\x05 \x01(\v2\x17.intake.DictEntryDeleteH\x00R\x0fdictEntryDelete\x12!\n" + - "\x04logs\x18\x06 \x01(\v2\v.intake.LogH\x00R\x04logsB\x06\n" + - "\x04data\"M\n" + - "\rStatefulBatch\x12\x19\n" + - "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12!\n" + - "\x04data\x18\x02 \x03(\v2\r.intake.DatumR\x04data\"{\n" + - "\vBatchStatus\x12\x19\n" + - "\bbatch_id\x18\x01 \x01(\x05R\abatchId\x122\n" + - "\x06status\x18\x02 \x01(\x0e2\x1a.intake.BatchStatus.StatusR\x06status\"\x1d\n" + - "\x06Status\x12\v\n" + - "\aUNKNOWN\x10\x00\x12\x06\n" + - "\x02OK\x10\x012S\n" + - "\x13StatefulLogsService\x12<\n" + - "\n" + - "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B7Z5github.com/DataDog/datadog-agent/pkg/logs/sender/grpcb\x06proto3" - -var ( - file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescOnce sync.Once - file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData []byte -) - -func file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescGZIP() []byte { - file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescOnce.Do(func() { - file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc), len(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc))) - }) - return file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDescData -} - -var file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) -var file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes = []any{ - (BatchStatus_Status)(0), // 0: intake.BatchStatus.Status - (*DictEntryDefine)(nil), // 1: intake.DictEntryDefine - (*DictEntryDelete)(nil), // 2: intake.DictEntryDelete - (*PatternDefine)(nil), // 3: intake.PatternDefine - (*PatternUpdate)(nil), // 4: intake.PatternUpdate - (*PatternDelete)(nil), // 5: intake.PatternDelete - (*Log)(nil), // 6: intake.Log - (*StructuredLog)(nil), // 7: intake.StructuredLog - (*DynamicValue)(nil), // 8: intake.DynamicValue - (*Datum)(nil), // 9: intake.Datum - (*StatefulBatch)(nil), // 10: intake.StatefulBatch - (*BatchStatus)(nil), // 11: intake.BatchStatus -} -var file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs = []int32{ - 7, // 0: intake.Log.structured:type_name -> intake.StructuredLog - 8, // 1: intake.StructuredLog.dynamic_values:type_name -> intake.DynamicValue - 3, // 2: intake.Datum.pattern_define:type_name -> intake.PatternDefine - 4, // 3: intake.Datum.pattern_update:type_name -> intake.PatternUpdate - 5, // 4: intake.Datum.pattern_delete:type_name -> intake.PatternDelete - 1, // 5: intake.Datum.dict_entry_define:type_name -> intake.DictEntryDefine - 2, // 6: intake.Datum.dict_entry_delete:type_name -> intake.DictEntryDelete - 6, // 7: intake.Datum.logs:type_name -> intake.Log - 9, // 8: intake.StatefulBatch.data:type_name -> intake.Datum - 0, // 9: intake.BatchStatus.status:type_name -> intake.BatchStatus.Status - 10, // 10: intake.StatefulLogsService.LogsStream:input_type -> intake.StatefulBatch - 11, // 11: intake.StatefulLogsService.LogsStream:output_type -> intake.BatchStatus - 11, // [11:12] is the sub-list for method output_type - 10, // [10:11] is the sub-list for method input_type - 10, // [10:10] is the sub-list for extension type_name - 10, // [10:10] is the sub-list for extension extendee - 0, // [0:10] is the sub-list for field type_name -} - -func init() { file_pkg_logs_sender_grpc_stateful_encoding_proto_init() } -func file_pkg_logs_sender_grpc_stateful_encoding_proto_init() { - if File_pkg_logs_sender_grpc_stateful_encoding_proto != nil { - return - } - file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[5].OneofWrappers = []any{ - (*Log_Structured)(nil), - (*Log_Raw)(nil), - } - file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ - (*DynamicValue_IntValue)(nil), - (*DynamicValue_FloatValue)(nil), - (*DynamicValue_StringValue)(nil), - (*DynamicValue_DictIndex)(nil), - } - file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes[8].OneofWrappers = []any{ - (*Datum_PatternDefine)(nil), - (*Datum_PatternUpdate)(nil), - (*Datum_PatternDelete)(nil), - (*Datum_DictEntryDefine)(nil), - (*Datum_DictEntryDelete)(nil), - (*Datum_Logs)(nil), - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: unsafe.Slice(unsafe.StringData(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc), len(file_pkg_logs_sender_grpc_stateful_encoding_proto_rawDesc)), - NumEnums: 1, - NumMessages: 11, - NumExtensions: 0, - NumServices: 1, - }, - GoTypes: file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes, - DependencyIndexes: file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs, - EnumInfos: file_pkg_logs_sender_grpc_stateful_encoding_proto_enumTypes, - MessageInfos: file_pkg_logs_sender_grpc_stateful_encoding_proto_msgTypes, - }.Build() - File_pkg_logs_sender_grpc_stateful_encoding_proto = out.File - file_pkg_logs_sender_grpc_stateful_encoding_proto_goTypes = nil - file_pkg_logs_sender_grpc_stateful_encoding_proto_depIdxs = nil -} diff --git a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go b/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go deleted file mode 100644 index 36b0dde717f1..000000000000 --- a/pkg/logs/sender/grpc/stateful_encoding_grpc.pb.go +++ /dev/null @@ -1,115 +0,0 @@ -// Code generated by protoc-gen-go-grpc. DO NOT EDIT. -// versions: -// - protoc-gen-go-grpc v1.5.1 -// - protoc v4.24.3 -// source: pkg/logs/sender/grpc/stateful_encoding.proto - -package grpc - -import ( - context "context" - grpc "google.golang.org/grpc" - codes "google.golang.org/grpc/codes" - status "google.golang.org/grpc/status" -) - -// This is a compile-time assertion to ensure that this generated file -// is compatible with the grpc package it is being compiled against. -// Requires gRPC-Go v1.64.0 or later. -const _ = grpc.SupportPackageIsVersion9 - -const ( - StatefulLogsService_LogsStream_FullMethodName = "/intake.StatefulLogsService/LogsStream" -) - -// StatefulLogsServiceClient is the client API for StatefulLogsService service. -// -// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. -type StatefulLogsServiceClient interface { - LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) -} - -type statefulLogsServiceClient struct { - cc grpc.ClientConnInterface -} - -func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServiceClient { - return &statefulLogsServiceClient{cc} -} - -func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[StatefulBatch, BatchStatus], error) { - cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) - stream, err := c.cc.NewStream(ctx, &StatefulLogsService_ServiceDesc.Streams[0], StatefulLogsService_LogsStream_FullMethodName, cOpts...) - if err != nil { - return nil, err - } - x := &grpc.GenericClientStream[StatefulBatch, BatchStatus]{ClientStream: stream} - return x, nil -} - -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type StatefulLogsService_LogsStreamClient = grpc.BidiStreamingClient[StatefulBatch, BatchStatus] - -// StatefulLogsServiceServer is the server API for StatefulLogsService service. -// All implementations must embed UnimplementedStatefulLogsServiceServer -// for forward compatibility. -type StatefulLogsServiceServer interface { - LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error - mustEmbedUnimplementedStatefulLogsServiceServer() -} - -// UnimplementedStatefulLogsServiceServer must be embedded to have -// forward compatible implementations. -// -// NOTE: this should be embedded by value instead of pointer to avoid a nil -// pointer dereference when methods are called. -type UnimplementedStatefulLogsServiceServer struct{} - -func (UnimplementedStatefulLogsServiceServer) LogsStream(grpc.BidiStreamingServer[StatefulBatch, BatchStatus]) error { - return status.Errorf(codes.Unimplemented, "method LogsStream not implemented") -} -func (UnimplementedStatefulLogsServiceServer) mustEmbedUnimplementedStatefulLogsServiceServer() {} -func (UnimplementedStatefulLogsServiceServer) testEmbeddedByValue() {} - -// UnsafeStatefulLogsServiceServer may be embedded to opt out of forward compatibility for this service. -// Use of this interface is not recommended, as added methods to StatefulLogsServiceServer will -// result in compilation errors. -type UnsafeStatefulLogsServiceServer interface { - mustEmbedUnimplementedStatefulLogsServiceServer() -} - -func RegisterStatefulLogsServiceServer(s grpc.ServiceRegistrar, srv StatefulLogsServiceServer) { - // If the following call pancis, it indicates UnimplementedStatefulLogsServiceServer was - // embedded by pointer and is nil. This will cause panics if an - // unimplemented method is ever invoked, so we test this at initialization - // time to prevent it from happening at runtime later due to I/O. - if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { - t.testEmbeddedByValue() - } - s.RegisterService(&StatefulLogsService_ServiceDesc, srv) -} - -func _StatefulLogsService_LogsStream_Handler(srv interface{}, stream grpc.ServerStream) error { - return srv.(StatefulLogsServiceServer).LogsStream(&grpc.GenericServerStream[StatefulBatch, BatchStatus]{ServerStream: stream}) -} - -// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name. -type StatefulLogsService_LogsStreamServer = grpc.BidiStreamingServer[StatefulBatch, BatchStatus] - -// StatefulLogsService_ServiceDesc is the grpc.ServiceDesc for StatefulLogsService service. -// It's only intended for direct use with grpc.RegisterService, -// and not to be introspected or modified (even as a copy) -var StatefulLogsService_ServiceDesc = grpc.ServiceDesc{ - ServiceName: "intake.StatefulLogsService", - HandlerType: (*StatefulLogsServiceServer)(nil), - Methods: []grpc.MethodDesc{}, - Streams: []grpc.StreamDesc{ - { - StreamName: "LogsStream", - Handler: _StatefulLogsService_LogsStream_Handler, - ServerStreams: true, - ClientStreams: true, - }, - }, - Metadata: "pkg/logs/sender/grpc/stateful_encoding.proto", -} From b8e412c274c620e8f4364cc986e4ad16765fa651 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Tue, 18 Nov 2025 18:37:15 -0500 Subject: [PATCH 14/16] fixed the bugs but its filled w loggings. --- pkg/logs/message/message.go | 15 - pkg/logs/patterns/clustering/cluster.go | 35 +- .../patterns/clustering/cluster_manager.go | 38 +- .../patterns/clustering/merging/merging.go | 25 +- .../clustering/merging/merging_test.go | 136 +++++- pkg/logs/patterns/clustering/pattern.go | 71 ++- .../clustering/pattern_state_tracker.go | 57 --- .../clustering/pattern_state_tracker_test.go | 380 --------------- pkg/logs/patterns/clustering/pattern_test.go | 86 +--- pkg/logs/pipeline/pipeline.go | 13 +- pkg/logs/processor/processor.go | 30 -- pkg/logs/sender/grpc/mock_state.go | 437 ++++++++++++++++-- 12 files changed, 649 insertions(+), 674 deletions(-) delete mode 100644 pkg/logs/patterns/clustering/pattern_state_tracker.go delete mode 100644 pkg/logs/patterns/clustering/pattern_state_tracker_test.go diff --git a/pkg/logs/message/message.go b/pkg/logs/message/message.go index e56b8b4afbcf..ec40a34361b6 100644 --- a/pkg/logs/message/message.go +++ b/pkg/logs/message/message.go @@ -11,7 +11,6 @@ import ( "fmt" "time" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/logs/sources" "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" "github.com/DataDog/datadog-agent/pkg/util/log" @@ -72,11 +71,6 @@ func (m *Payload) Size() int64 { type Message struct { MessageContent MessageMetadata - - // Pattern extraction - Pattern *clustering.Pattern // The pattern this log matched - WildcardValues []string // This log's specific wildcard values - PatternTemplateState clustering.PatternTemplateStatus // Whether a new/updated pattern template needs sending or if the pattern template stay unchanged (New/Update/None) } // StatefulMessage represents a log message for gRPC stateful streaming @@ -141,8 +135,6 @@ type MessageContent struct { //nolint:revive content []byte // structured content structuredContent StructuredContent - // rendered content preserved for pattern extraction (before encoding overwrites content) - renderedContent []byte State MessageContentState } @@ -205,7 +197,6 @@ func (m *MessageContent) SetContent(content []byte) { // SetRendered sets the content for the MessageContent and sets MessageContent state to rendered. func (m *MessageContent) SetRendered(content []byte) { m.content = content - m.renderedContent = content // Preserve for pattern extraction m.State = StateRendered } @@ -215,12 +206,6 @@ func (m *MessageContent) SetEncoded(content []byte) { m.State = StateEncoded } -// GetRenderedContent returns the preserved rendered content (before encoding). -// This is used for pattern extraction which needs plain text, not encoded binary. -func (m *MessageContent) GetRenderedContent() []byte { - return m.renderedContent -} - // ParsingExtra ships extra information parsers want to make available // to the rest of the pipeline. // E.g. Timestamp is used by the docker parsers to transmit a tailing offset. diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index a870a11fa52e..7cb36795a741 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -65,14 +65,36 @@ func (c *Cluster) AddTokenListToPatterns(tokenList *token.TokenList) *Pattern { for _, p := range c.Patterns { // Check if this TokenList can merge with this pattern's sample if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + // CRITICAL: Also verify it can merge with the template + // If template has evolved differently, regeneratePattern will fail + // and we should create a new pattern instead + // Note: CanMergeTokenLists is not symmetric, so check both directions + if p.Template != nil { + templateCompatible1 := merging.CanMergeTokenLists(p.Template, tokenList) + templateCompatible2 := merging.CanMergeTokenLists(tokenList, p.Template) + templateCompatible := templateCompatible1 || templateCompatible2 + if !templateCompatible { + // Log matches sample but not template - template has evolved incompatibly + // Skip this pattern and continue searching or create new one + // This will create a new pattern instead + continue + } + } + // Merge into existing pattern (same PatternID is preserved) p.LogCount++ p.UpdatedAt = time.Now() c.UpdatedAt = time.Now() // Incrementally merge the new token list into the pattern template - c.regeneratePattern(p, tokenList) - return p // Return existing pattern with updated template + // regeneratePattern will update template if merge succeeds + if c.regeneratePattern(p, tokenList) { + return p // Return existing pattern with updated template + } + // regeneratePattern failed - template couldn't merge with tokenList + // This shouldn't happen if we checked above, but handle it gracefully + // Create a new pattern instead + break } } @@ -85,15 +107,17 @@ func (c *Cluster) AddTokenListToPatterns(tokenList *token.TokenList) *Pattern { } // regeneratePattern incrementally merges a new token list into the pattern. -func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) { +// Returns true if merge succeeded, false if merge failed. +func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) bool { if p.Template == nil { - return + return false } // Incremental merge: merge new log with existing template merged := merging.MergeTokenLists(p.Template, newTokenList) if merged == nil { - return + // Merge failed - template and newTokenList are incompatible + return false } p.Template = merged @@ -114,6 +138,7 @@ func (c *Cluster) regeneratePattern(p *Pattern, newTokenList *token.TokenList) { } p.UpdatedAt = time.Now() + return true } // getPathPattern converts a path to hierarchical wildcard pattern diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go index 864047159d18..f3c54f741e95 100644 --- a/pkg/logs/patterns/clustering/cluster_manager.go +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -13,7 +13,9 @@ import ( "sync" "time" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" + "github.com/DataDog/datadog-agent/pkg/util/log" ) // PatternChangeType indicates what changed when adding a TokenList to the cluster manager @@ -67,10 +69,18 @@ func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChan // Track if patterns had wildcards before hadWildcards := false + var matchedPattern *Pattern + oldWildcardCount := 0 + if hadPatterns { + // Find which pattern this tokenList will match (before adding it) for _, p := range cluster.Patterns { - if p.hasWildcards() { - hadWildcards = true + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + matchedPattern = p + oldWildcardCount = p.GetWildcardCount() + if p.hasWildcards() { + hadWildcards = true + } break } } @@ -84,19 +94,31 @@ func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChan newPatternCount := len(cluster.Patterns) if newPatternCount > oldPatternCount { // New pattern was created within the cluster (multi-pattern scenario) + log.Debugf("[PATTERN_CHANGE] PatternNew: pattern_id=%d (new pattern in existing cluster)", pattern.PatternID) return pattern, PatternNew } - // Check if wildcards were added to an existing pattern + // Check if wildcards were added to an existing pattern (0 → N) if hadPatterns && pattern.hasWildcards() && !hadWildcards { - // Pattern gained wildcards + // Pattern gained its first wildcards + newWildcardCount := pattern.GetWildcardCount() + log.Infof("[PATTERN_CHANGE] PatternUpdated: pattern_id=%d gained first wildcards (0 → %d)", pattern.PatternID, newWildcardCount) return pattern, PatternUpdated } - // If pattern already had wildcards and got more, it's also an update - if hadPatterns && hadWildcards && pattern.size() > 2 { - // Pattern structure may have changed (more wildcards) - return pattern, PatternUpdated + // Check if wildcard count changed for existing pattern (N → M where N != M) + if matchedPattern != nil && matchedPattern.PatternID == pattern.PatternID { + newWildcardCount := pattern.GetWildcardCount() + if newWildcardCount != oldWildcardCount { + // Pattern wildcard count changed + log.Infof("[PATTERN_CHANGE] PatternUpdated: pattern_id=%d wildcard count changed (%d → %d)", pattern.PatternID, oldWildcardCount, newWildcardCount) + return pattern, PatternUpdated + } + // Wildcard count unchanged - this is the normal case for stable patterns + log.Debugf("[PATTERN_CHANGE] PatternNoChange: pattern_id=%d wildcard count unchanged (%d)", pattern.PatternID, oldWildcardCount) + } else { + // No matched pattern or different pattern ID (shouldn't happen, but log it) + log.Debugf("[PATTERN_CHANGE] PatternNoChange: pattern_id=%d (no matched pattern or ID mismatch)", pattern.PatternID) } } return pattern, PatternNoChange diff --git a/pkg/logs/patterns/clustering/merging/merging.go b/pkg/logs/patterns/clustering/merging/merging.go index fa208b0ab708..a778cb0d8f03 100644 --- a/pkg/logs/patterns/clustering/merging/merging.go +++ b/pkg/logs/patterns/clustering/merging/merging.go @@ -12,9 +12,24 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) -// shouldProtectPosition determines if a the token is the first word token and should be wildcarded. -func shouldProtectPosition(position int, tokenType token.TokenType) bool { - return position == 0 && tokenType == token.TokenWord +// shouldProtectPosition determines if the token at this position is the first word token. +// The first word token is protected from wildcarding to preserve semantic meaning, +// regardless of what position it appears at (e.g., after timestamps/dates). +func shouldProtectPosition(position int, tokenType token.TokenType, tl *token.TokenList) bool { + // Only word tokens can be protected + if tokenType != token.TokenWord { + return false + } + + // Check if any word token appears before this position + for i := 0; i < position; i++ { + if tl.Tokens[i].Type == token.TokenWord { + return false // Not the first word token + } + } + + // This is the first word token + return true } // CanMergeTokenLists checks if incoming log (tl2) can merge with existing pattern's sample (tl1). @@ -42,7 +57,7 @@ func CanMergeTokenLists(tl1, tl2 *token.TokenList) bool { } // For wildcard result, check first word protection rule - if result == token.Wildcard && shouldProtectPosition(i, tok1.Type) { + if result == token.Wildcard && shouldProtectPosition(i, tok1.Type, tl1) { return false } } @@ -75,7 +90,7 @@ func MergeTokenLists(tl1, tl2 *token.TokenList) *token.TokenList { case token.Wildcard: // Check protection rules before wildcarding - if shouldProtectPosition(i, tok1.Type) { + if shouldProtectPosition(i, tok1.Type, tl1) { return nil } // Create wildcard, preserving the first token's value as representative diff --git a/pkg/logs/patterns/clustering/merging/merging_test.go b/pkg/logs/patterns/clustering/merging/merging_test.go index 2d1f5230120b..510fdc169c12 100644 --- a/pkg/logs/patterns/clustering/merging/merging_test.go +++ b/pkg/logs/patterns/clustering/merging/merging_test.go @@ -15,40 +15,68 @@ import ( func TestShouldProtectPosition(t *testing.T) { tests := []struct { - name string - position int - tokenType token.TokenType - expected bool + name string + tokens []token.Token + position int + expected bool }{ { - name: "First word should be protected", - position: 0, - tokenType: token.TokenWord, - expected: true, + name: "First word at position 0 should be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "failed", token.NotWildcard), + }, + position: 0, + expected: true, }, { - name: "First numeric should not be protected", - position: 0, - tokenType: token.TokenNumeric, - expected: false, + name: "First numeric at position 0 should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenNumeric, "2025", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 0, + expected: false, }, { - name: "Second word should not be protected", - position: 1, - tokenType: token.TokenWord, - expected: false, + name: "Second word should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "failed", token.NotWildcard), + }, + position: 2, + expected: false, }, { - name: "First whitespace should not be protected", - position: 0, - tokenType: token.TokenWhitespace, - expected: false, + name: "First word after timestamp should be protected", + tokens: []token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 4, + expected: true, + }, + { + name: "Whitespace should not be protected", + tokens: []token.Token{ + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "ERROR", token.NotWildcard), + }, + position: 0, + expected: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result := shouldProtectPosition(tt.position, tt.tokenType) + tl := token.NewTokenListWithTokens(tt.tokens) + result := shouldProtectPosition(tt.position, tt.tokens[tt.position].Type, tl) assert.Equal(t, tt.expected, result) }) } @@ -194,6 +222,72 @@ func TestMergeTokenLists_ProtectionRulesEnforced(t *testing.T) { assert.Nil(t, merged, "Should not merge when first word differs (protected)") } +func TestCanMergeTokenLists_TimestampPrefixedLogs(t *testing.T) { + // Test that first WORD (not severity level) after timestamp is protected + // Severity levels CAN wildcard, but the first actual word is protected + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Memory", token.NotWildcard), + }) + + // Should NOT merge because first word (Failed vs Memory) differs and is protected + // Note: Severity levels (ERROR vs WARN) CAN wildcard - they're not the "first word" + assert.False(t, CanMergeTokenLists(tl1, tl2), "First word token (after severity) should be protected") +} + +func TestMergeTokenLists_TimestampPrefixedLogsSameFirstWord(t *testing.T) { + // Test that logs with same first word can merge, even with different timestamps and severity levels + // Pattern: * * * Failed * + tl1 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-15", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:09", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "ERROR", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "user123", token.PotentialWildcard), + }) + + tl2 := token.NewTokenListWithTokens([]token.Token{ + token.NewToken(token.TokenNumeric, "2025-11-16", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenNumeric, "07:03:11", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenSeverityLevel, "WARN", token.PotentialWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "Failed", token.NotWildcard), + token.NewToken(token.TokenWhitespace, " ", token.NotWildcard), + token.NewToken(token.TokenWord, "admin456", token.PotentialWildcard), + }) + + // Should merge - timestamps wildcard, severity wildcard, "Failed" is protected but identical, last word wildcards + merged := MergeTokenLists(tl1, tl2) + assert.NotNil(t, merged, "Should merge when first word matches") + assert.Equal(t, token.IsWildcard, merged.Tokens[0].Wildcard, "Date should be wildcarded") + assert.Equal(t, token.IsWildcard, merged.Tokens[2].Wildcard, "Time should be wildcarded") + assert.Equal(t, token.IsWildcard, merged.Tokens[4].Wildcard, "Severity level should be wildcarded") + assert.Equal(t, "Failed", merged.Tokens[6].Value, "Failed (first word) should be preserved") + assert.Equal(t, token.NotWildcard, merged.Tokens[6].Wildcard, "Failed should not be wildcarded (protected)") + assert.Equal(t, token.IsWildcard, merged.Tokens[8].Wildcard, "Last word should be wildcarded") +} + func TestMergeTokenLists_ProgressiveMerging(t *testing.T) { // Test merging multiple TokenLists progressively tl1 := token.NewTokenListWithTokens([]token.Token{ diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go index 3e3312a86de6..9e1552871cb4 100644 --- a/pkg/logs/patterns/clustering/pattern.go +++ b/pkg/logs/patterns/clustering/pattern.go @@ -11,6 +11,7 @@ import ( "strings" "time" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) @@ -25,26 +26,21 @@ type Pattern struct { LogCount int // Total number of logs that matched this pattern // Timestamp tracking for stateful encoding - CreatedAt time.Time // When pattern was first created - UpdatedAt time.Time // When pattern was last modified - LastSentAt time.Time // When we last sent this pattern to gRPC - - // State tracking for pattern messages - SentTemplate string // The template string that was last sent (for detecting changes) + CreatedAt time.Time // When pattern was first created + UpdatedAt time.Time // When pattern was last modified } // newPattern creates a new pattern from a single token list. func newPattern(tokenList *token.TokenList, patternID uint64) *Pattern { now := time.Now() return &Pattern{ - Template: tokenList, // First log becomes initial template - Positions: []int{}, // No wildcards yet - PatternID: patternID, - Sample: tokenList, // Store first log as sample - LogCount: 1, // First log - CreatedAt: now, - UpdatedAt: now, - LastSentAt: time.Time{}, // Zero time - never sent + Template: tokenList, // First log becomes initial template + Positions: []int{}, // No wildcards yet + PatternID: patternID, + Sample: tokenList, // Store first log as sample + LogCount: 1, // First log + CreatedAt: now, + UpdatedAt: now, } } @@ -80,7 +76,15 @@ func (p *Pattern) hasWildcards() bool { return len(p.Positions) > 0 } +// GetWildcardCount returns the number of wildcard positions in this pattern. +// This matches the ParamCount that will be sent in PatternDefine. +func (p *Pattern) GetWildcardCount() int { + return len(p.Positions) +} + // GetWildcardCharPositions returns character indices where wildcards appear in the pattern string. +// This matches the PosList that will be sent in PatternDefine. +// Example: "User * logged in from *" returns [7, 12] func (p *Pattern) GetWildcardCharPositions() []int { if p.Template == nil { return nil @@ -109,15 +113,46 @@ func (p *Pattern) GetWildcardCharPositions() []int { // GetWildcardValues extracts the wildcard values from a specific TokenList. // This is called per-log to get that log's specific wildcard parameter values. +// +// NOTE: AddTokenListToPatterns now verifies that tokenList matches p.Template before +// assigning it to a pattern, so this function should only be called when structures match. +// However, we keep the defensive check below as a safety measure. func (p *Pattern) GetWildcardValues(tokenList *token.TokenList) []string { if p.Template == nil || len(p.Positions) == 0 { return []string{} } - wildcardValues := make([]string, 0, len(p.Positions)) - for _, pos := range p.Positions { - if pos < tokenList.Length() { - wildcardValues = append(wildcardValues, tokenList.Tokens[pos].Value) + // CRITICAL CHECK: Verify tokenList matches p.Template structure + // Note: CanMergeTokenLists is not symmetric - template (with IsWildcard) vs tokenList (with PotentialWildcard) + // works one way but not the other. Since AddTokenListToPatterns already verified compatibility, + // we check both directions here as a safety measure. + templateMatches := merging.CanMergeTokenLists(p.Template, tokenList) || merging.CanMergeTokenLists(tokenList, p.Template) + if !templateMatches { + // tokenList doesn't match p.Template structure in either direction + // This shouldn't happen if AddTokenListToPatterns worked correctly, but handle gracefully + // Return nil slice (not empty slice) to signal mismatch - caller should send raw log + return nil + } + + // Ensure lengths match (CanMergeTokenLists already checks this, but be safe) + if tokenList.Length() != p.Template.Length() { + // Length mismatch - return nil to signal error + return nil + } + + // Preallocate slice with exact size to ensure count matches ParamCount + wildcardValues := make([]string, len(p.Positions)) + + // p.Positions are token indices in p.Template where wildcards are + // Since tokenList matches p.Template structure (verified above), + // we can use the same indices to extract values from tokenList + for i, templatePos := range p.Positions { + if templatePos < tokenList.Length() { + wildcardValues[i] = tokenList.Tokens[templatePos].Value + } else { + // Position out of bounds - use empty string to maintain count + // This shouldn't happen if structure matches correctly + wildcardValues[i] = "" } } diff --git a/pkg/logs/patterns/clustering/pattern_state_tracker.go b/pkg/logs/patterns/clustering/pattern_state_tracker.go deleted file mode 100644 index 301b229bf680..000000000000 --- a/pkg/logs/patterns/clustering/pattern_state_tracker.go +++ /dev/null @@ -1,57 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -// Package clustering provides clustering functionality for grouping similar TokenLists -// and identifying wildcard positions for pattern extraction. -package clustering - -import "time" - -// PatternTemplateStatus indicates whether the pattern template needs to be sent -type PatternTemplateStatus int - -const ( - // TemplateNotNeeded indicates template is already synced, no action needed - TemplateNotNeeded PatternTemplateStatus = iota - // TemplateIsNew indicates template has never been sent, needs PatternDefine - TemplateIsNew - // TemplateChanged indicates template changed since last send, needs PatternDelete + PatternDefine - TemplateChanged -) - -// NeedsResend determines if a pattern template needs to be sent and its status. -// Returns (needsSend, templateStatus): -// - (false, TemplateNotNeeded) if template was already sent and hasn't changed -// - (true, TemplateIsNew) if template has never been sent -// - (true, TemplateChanged) if template changed since last send -func (p *Pattern) NeedsResend() (bool, PatternTemplateStatus) { - if p == nil { - return false, TemplateNotNeeded - } - - // Never sent? Need to send as new template - if p.LastSentAt.IsZero() { - return true, TemplateIsNew - } - - // Check if template changed since last send - currentTemplate := p.GetPatternString() - if p.SentTemplate != currentTemplate { - return true, TemplateChanged - } - - // Already sent and unchanged - return false, TemplateNotNeeded -} - -// MarkAsSent records that this pattern was successfully sent. -// It updates both the LastSentAt timestamp and stores the sent template. -func (p *Pattern) MarkAsSent() { - if p == nil { - return - } - p.LastSentAt = time.Now() - p.SentTemplate = p.GetPatternString() -} diff --git a/pkg/logs/patterns/clustering/pattern_state_tracker_test.go b/pkg/logs/patterns/clustering/pattern_state_tracker_test.go deleted file mode 100644 index 54b67349777a..000000000000 --- a/pkg/logs/patterns/clustering/pattern_state_tracker_test.go +++ /dev/null @@ -1,380 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -package clustering - -import ( - "testing" - "time" - - "github.com/stretchr/testify/assert" - - "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" -) - -// Test-only helper functions - -// wasSent returns true if this pattern has been sent at least once. -func wasSent(p *Pattern) bool { - if p == nil { - return false - } - return !p.LastSentAt.IsZero() -} - -// templateChanged returns true if the template has changed since last send. -func templateChanged(p *Pattern) bool { - if p == nil { - return false - } - if p.LastSentAt.IsZero() { - return false // Never sent, so no baseline to compare - } - currentTemplate := p.GetPatternString() - return p.SentTemplate != currentTemplate -} - -// getSentTemplate returns the template that was last sent. -// Returns empty string if never sent. -func getSentTemplate(p *Pattern) string { - if p == nil { - return "" - } - return p.SentTemplate -} - -// TestNeedsResend_NeverSent tests that a pattern that has never been sent needs sending as PatternDefine -func TestNeedsResend_NeverSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend, "Pattern should need sending") - assert.Equal(t, TemplateIsNew, templateState, "Should be TemplateIsNew for first send") -} - -// TestNeedsResend_AlreadySent_NoChange tests that a sent pattern with no changes doesn't need resending -func TestNeedsResend_AlreadySent_NoChange(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - needsSend, templateState := pattern.NeedsResend() - assert.False(t, needsSend, "Pattern should not need sending") - assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded") -} - -// TestNeedsResend_TemplateChanged tests that a pattern with changed template needs sending as PatternUpdate -func TestNeedsResend_TemplateChanged(t *testing.T) { - // Create initial pattern - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - // Simulate template evolution (add wildcard) - template := token.NewTokenList() - template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - - pattern.Template = template - pattern.Positions = []int{2} - pattern.UpdatedAt = time.Now() - - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend, "Pattern should need sending after template change") - assert.Equal(t, TemplateChanged, templateState, "Should be TemplateChanged for template change") -} - -// TestNeedsResend_NilPattern tests that a nil pattern doesn't need sending -func TestNeedsResend_NilPattern(t *testing.T) { - var pattern *Pattern = nil - - needsSend, templateState := pattern.NeedsResend() - assert.False(t, needsSend, "Nil pattern should not need sending") - assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded for nil") -} - -// TestMarkAsSent_UpdatesTimestampAndTemplate tests that MarkAsSent properly records state -func TestMarkAsSent_UpdatesTimestampAndTemplate(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") - assert.Equal(t, "", pattern.SentTemplate, "SentTemplate should be empty initially") - - pattern.MarkAsSent() - - assert.False(t, pattern.LastSentAt.IsZero(), "LastSentAt should be set") - assert.Equal(t, "Service started", pattern.SentTemplate, "SentTemplate should match current template") -} - -// TestMarkAsSent_NilPattern tests that MarkAsSent handles nil gracefully -func TestMarkAsSent_NilPattern(t *testing.T) { - var pattern *Pattern = nil - // Should not panic - pattern.MarkAsSent() -} - -// TestWasSent_NeverSent tests that WasSent returns false for unsent patterns -func TestWasSent_NeverSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - assert.False(t, wasSent(pattern), "wasSent should return false for new pattern") -} - -// TestWasSent_AfterSend tests that WasSent returns true after sending -func TestWasSent_AfterSend(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - assert.True(t, wasSent(pattern), "wasSent should return true after MarkAsSent") -} - -// TestWasSent_NilPattern tests that WasSent handles nil gracefully -func TestWasSent_NilPattern(t *testing.T) { - var pattern *Pattern = nil - assert.False(t, wasSent(pattern), "wasSent should return false for nil pattern") -} - -// TestTemplateChanged_NeverSent tests that TemplateChanged returns false for unsent patterns -func TestTemplateChanged_NeverSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - assert.False(t, templateChanged(pattern), "templateChanged should return false if never sent") -} - -// TestTemplateChanged_NoChange tests that TemplateChanged returns false when template hasn't changed -func TestTemplateChanged_NoChange(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - assert.False(t, templateChanged(pattern), "templateChanged should return false when unchanged") -} - -// TestTemplateChanged_Changed tests that TemplateChanged returns true when template changed -func TestTemplateChanged_Changed(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - // Change template - template := token.NewTokenList() - template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - pattern.Template = template - pattern.Positions = []int{2} - - assert.True(t, templateChanged(pattern), "templateChanged should return true when template changed") -} - -// TestTemplateChanged_NilPattern tests that TemplateChanged handles nil gracefully -func TestTemplateChanged_NilPattern(t *testing.T) { - var pattern *Pattern = nil - assert.False(t, templateChanged(pattern), "templateChanged should return false for nil pattern") -} - -// TestGetSentTemplate_NeverSent tests that GetSentTemplate returns empty for unsent patterns -func TestGetSentTemplate_NeverSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - assert.Equal(t, "", getSentTemplate(pattern), "getSentTemplate should return empty for new pattern") -} - -// TestGetSentTemplate_AfterSend tests that GetSentTemplate returns the sent template -func TestGetSentTemplate_AfterSend(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - assert.Equal(t, "Service started", getSentTemplate(pattern), "getSentTemplate should return sent template") -} - -// TestGetSentTemplate_NilPattern tests that GetSentTemplate handles nil gracefully -func TestGetSentTemplate_NilPattern(t *testing.T) { - var pattern *Pattern = nil - assert.Equal(t, "", getSentTemplate(pattern), "getSentTemplate should return empty for nil pattern") -} - -// TestPatternLifecycle_FullFlow tests the complete pattern lifecycle -func TestPatternLifecycle_FullFlow(t *testing.T) { - // 1. Create new pattern - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - - // 2. Check initial state - needs Define - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateIsNew, templateState) - assert.False(t, wasSent(pattern)) - assert.Equal(t, "", getSentTemplate(pattern)) - - // 3. Mark as sent - pattern.MarkAsSent() - assert.True(t, wasSent(pattern)) - assert.Equal(t, "Service started", getSentTemplate(pattern)) - - // 4. Check no resend needed - needsSend, templateState = pattern.NeedsResend() - assert.False(t, needsSend) - assert.Equal(t, TemplateNotNeeded, templateState) - - // 5. Simulate template evolution - time.Sleep(1 * time.Millisecond) - template := token.NewTokenList() - template.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - pattern.Template = template - pattern.Positions = []int{2} - pattern.UpdatedAt = time.Now() - - // 6. Check needs Update - assert.True(t, templateChanged(pattern)) - needsSend, templateState = pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateChanged, templateState) - - // 7. Mark as sent again - pattern.MarkAsSent() - assert.Equal(t, "Service *", getSentTemplate(pattern)) - - // 8. Check no resend needed again - needsSend, templateState = pattern.NeedsResend() - assert.False(t, needsSend) - assert.Equal(t, TemplateNotNeeded, templateState) -} - -// TestPatternLifecycle_MultipleUpdates tests multiple template updates -func TestPatternLifecycle_MultipleUpdates(t *testing.T) { - // Initial pattern: "Service started" - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "started", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - // First update: "Service *" - time.Sleep(1 * time.Millisecond) - template1 := token.NewTokenList() - template1.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) - template1.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template1.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - pattern.Template = template1 - pattern.Positions = []int{2} - pattern.UpdatedAt = time.Now() - - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateChanged, templateState) - pattern.MarkAsSent() - - // Second update: "* *" - time.Sleep(1 * time.Millisecond) - template2 := token.NewTokenList() - template2.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) - template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template2.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) - pattern.Template = template2 - pattern.Positions = []int{0, 2} - pattern.UpdatedAt = time.Now() - - needsSend, templateState = pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateChanged, templateState) - pattern.MarkAsSent() - - assert.Equal(t, "* *", getSentTemplate(pattern)) -} - -// TestPatternStateTracker_EdgeCases tests various edge cases -func TestPatternStateTracker_EdgeCases(t *testing.T) { - t.Run("EmptyTemplate", func(t *testing.T) { - tl := token.NewTokenList() - pattern := newPattern(tl, 12345) - - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateIsNew, templateState) - - pattern.MarkAsSent() - assert.Equal(t, "", getSentTemplate(pattern)) - - needsSend, templateState = pattern.NeedsResend() - assert.False(t, needsSend) - assert.Equal(t, TemplateNotNeeded, templateState) - }) - - t.Run("OnlyWildcards", func(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "value1", token.IsWildcard)) - tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - tl.Add(token.NewToken(token.TokenWord, "value2", token.IsWildcard)) - - pattern := newPattern(tl, 12345) - pattern.Positions = []int{0, 2} - pattern.MarkAsSent() - - assert.Equal(t, "* *", getSentTemplate(pattern)) - assert.False(t, templateChanged(pattern)) - }) - - t.Run("TemplateBecomesNil", func(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.NotWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - assert.Equal(t, "Test", getSentTemplate(pattern)) - - // Simulate template becoming nil (edge case) - pattern.Template = nil - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateChanged, templateState) - }) -} diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go index f922fc553d2f..eeddca77c624 100644 --- a/pkg/logs/patterns/clustering/pattern_test.go +++ b/pkg/logs/patterns/clustering/pattern_test.go @@ -33,7 +33,6 @@ func TestNewPattern(t *testing.T) { assert.Equal(t, 0, len(pattern.Positions), "No wildcards initially") assert.False(t, pattern.CreatedAt.IsZero(), "CreatedAt should be set") assert.False(t, pattern.UpdatedAt.IsZero(), "UpdatedAt should be set") - assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") } func TestAddTokenList(t *testing.T) { @@ -329,62 +328,9 @@ func TestExtractWildcardValues_PositionOutOfBounds(t *testing.T) { incoming.Add(token.NewToken(token.TokenWord, "Value", token.PotentialWildcard)) values := pattern.GetWildcardValues(incoming) - assert.Equal(t, []string{"Value"}, values, "Should only extract valid positions") -} - -func TestMarkAsSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - assert.True(t, pattern.LastSentAt.IsZero(), "LastSentAt should be zero initially") - - pattern.MarkAsSent() - assert.False(t, pattern.LastSentAt.IsZero(), "LastSentAt should be set after marking") - assert.Equal(t, "Test", pattern.SentTemplate, "SentTemplate should be set") -} - -func TestNeedsSending_NeverSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend, "Should need sending if never sent") - assert.Equal(t, TemplateIsNew, templateState, "Should be TemplateIsNew for first send") -} - -func TestNeedsSending_AlreadySent_NotUpdated(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - time.Sleep(1 * time.Millisecond) - pattern.MarkAsSent() - - needsSend, templateState := pattern.NeedsResend() - assert.False(t, needsSend, "Should not need sending if sent and not updated") - assert.Equal(t, TemplateNotNeeded, templateState, "Should be TemplateNotNeeded") -} - -func TestNeedsSending_UpdatedAfterSent(t *testing.T) { - tl := token.NewTokenList() - tl.Add(token.NewToken(token.TokenWord, "Test", token.PotentialWildcard)) - - pattern := newPattern(tl, 12345) - pattern.MarkAsSent() - - // Update pattern template (not just timestamp) - time.Sleep(1 * time.Millisecond) - template := token.NewTokenList() - template.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - pattern.Template = template - pattern.Positions = []int{0} - pattern.UpdatedAt = time.Now() - - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend, "Should need sending if template changed after last sent") - assert.Equal(t, TemplateChanged, templateState, "Should be TemplateChanged for template change") + // CRITICAL: Must return same length as Positions to match ParamCount + // Out-of-bounds positions are filled with empty strings + assert.Equal(t, []string{"Value", ""}, values, "Should maintain Positions length with empty strings for out-of-bounds") } func TestSanitizeForTemplate_PrintableChars(t *testing.T) { @@ -480,30 +426,4 @@ func TestPattern_IntegrationScenario(t *testing.T) { values := pattern.GetWildcardValues(log2) assert.Equal(t, []string{"Network", "timeout", "reached"}, values) - - // 4. Check sending status - needsSend, templateState := pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateIsNew, templateState) - pattern.MarkAsSent() - needsSend, templateState = pattern.NeedsResend() - assert.False(t, needsSend) - assert.Equal(t, TemplateNotNeeded, templateState) - - // 5. Update pattern (change template, not just log count) - time.Sleep(1 * time.Millisecond) - // Evolve template to add more wildcards - template2 := token.NewTokenList() - template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - template2.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) - template2.Add(token.NewToken(token.TokenWord, "value", token.IsWildcard)) - pattern.Template = template2 - pattern.Positions = []int{0, 2, 4} - pattern.LogCount++ - pattern.UpdatedAt = time.Now() - needsSend, templateState = pattern.NeedsResend() - assert.True(t, needsSend) - assert.Equal(t, TemplateChanged, templateState) } diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go index 018225562af6..8b83929c76c3 100644 --- a/pkg/logs/pipeline/pipeline.go +++ b/pkg/logs/pipeline/pipeline.go @@ -118,14 +118,11 @@ func getStrategy( encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) } if endpoints.UseGRPC { - // Throwaway code to test with existing pipelines - // TODO: Remove this once we have a real State component - - // The interface of stateful transport layer is input channel to the GRPCBatchStrategy - // The input type is StatefulMessage, which should be emitted by the State component - // Here is the temporary translation from Message to StatefulMessage - statefulInputChan := make(chan *message.StatefulMessage, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size")) - grpcsender.StartMessageTranslator(inputChan, statefulInputChan) + translator := grpcsender.NewMessageTranslator(nil) // nil creates a new cluster manager per pipeline + // TODO: Consider sharing cluster manager across pipelines for better pattern clustering: + // sharedClusterManager := getSharedClusterManager() // would need to be passed in or singleton + // translator := grpcsender.NewMessageTranslator(sharedClusterManager) + statefulInputChan := translator.Start(inputChan, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size")) return grpcsender.NewBatchStrategy(statefulInputChan, outputChan, flushChan, endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID) } diff --git a/pkg/logs/processor/processor.go b/pkg/logs/processor/processor.go index a4ebb654206b..a4583b491aae 100644 --- a/pkg/logs/processor/processor.go +++ b/pkg/logs/processor/processor.go @@ -18,7 +18,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/diagnostic" "github.com/DataDog/datadog-agent/pkg/logs/message" "github.com/DataDog/datadog-agent/pkg/logs/metrics" - "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/util/log" ) @@ -80,10 +79,6 @@ func New(config pkgconfigmodel.Reader, inputChan, outputChan chan *message.Messa pipelineMonitor: pipelineMonitor, utilization: pipelineMonitor.MakeUtilizationMonitor(metrics.ProcessorTlmName, instanceID), instanceID: instanceID, - - // Initialize pattern extraction components, - /// Will lock behind feature flag later - clusterManager: clustering.NewClusterManager(), } // Initialize cached failover config @@ -213,31 +208,6 @@ func (p *Processor) processMessage(msg *message.Message) { // report this message to diagnostic receivers (e.g. `stream-logs` command) p.diagnosticMessageReceiver.HandleMessage(msg, rendered, "") - // Extract pattern from the rendered message content - if p.clusterManager != nil { - // Tokenize the rendered content - tokenizer := automaton.NewTokenizer(string(rendered)) - tokenList := tokenizer.Tokenize() - - // Add to cluster manager and get the pattern - pattern, _ := p.clusterManager.Add(tokenList) - - if pattern != nil { - // Store pattern reference and per-log wildcard values - msg.Pattern = pattern - msg.WildcardValues = pattern.GetWildcardValues(tokenList) - - // Determine if pattern template needs sending - needsSend, templateState := pattern.NeedsResend() - msg.PatternTemplateState = templateState - - // Mark as sent if template was sent - if needsSend { - pattern.MarkAsSent() - } - } - } - if p.failoverConfig.isFailoverActive { p.filterMRFMessages(msg) } diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go index 9cfae7562542..219fdd6dcfd3 100644 --- a/pkg/logs/sender/grpc/mock_state.go +++ b/pkg/logs/sender/grpc/mock_state.go @@ -11,84 +11,419 @@ import ( "unicode/utf8" "github.com/DataDog/datadog-agent/pkg/logs/message" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/automaton" "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" + "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" + "github.com/DataDog/datadog-agent/pkg/util/log" ) const nanoToMillis = 1000000 -// StartMessageTranslator starts a goroutine that translates message.Message to message.StatefulMessage -// It handles pattern extraction by sending PatternDefine for new patterns, or PatternDelete+PatternDefine for updates, -// then sends the StructuredLog or raw log. -func StartMessageTranslator(inputChan chan *message.Message, outputChan chan *message.StatefulMessage) { +// MessageTranslator handles translation of message.Message to message.StatefulMessage +// It manages pattern extraction, clustering, and stateful message creation +type MessageTranslator struct { + clusterManager *clustering.ClusterManager +} + +// NewMessageTranslator creates a new MessageTranslator instance +// If clusterManager is nil, a new one will be created +func NewMessageTranslator(clusterManager *clustering.ClusterManager) *MessageTranslator { + return &MessageTranslator{ + clusterManager: clustering.NewClusterManager(), + } + + // Would be shared cluster manager instead across pipelines when implemented. + // if clusterManager == nil { + // clusterManager = clustering.NewClusterManager() + // } + // return &MessageTranslator{ + // clusterManager: clusterManager, + // } +} + +// Start starts a goroutine that translates message.Message to message.StatefulMessage +// It handles pattern extraction by: +// 1. Tokenizing the message content +// 2. Using ClusterManager to create/update patterns +// 3. Sending PatternDefine for new patterns, or PatternDelete+PatternDefine for updates +// 4. Sending StructuredLog with wildcard values +// Returns the output channel for StatefulMessages +func (mt *MessageTranslator) Start(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage { + outputChan := make(chan *message.StatefulMessage, bufferSize) go func() { defer close(outputChan) for msg := range inputChan { - // Get timestamp - prefer message timestamp if available - ts := time.Now().UTC() - if !msg.ServerlessExtra.Timestamp.IsZero() { - ts = msg.ServerlessExtra.Timestamp + mt.processMessage(msg, outputChan) + } + }() + return outputChan +} + +// StartMessageTranslator is a convenience function that creates a MessageTranslator with a cluster manager +// Returns the output channel for StatefulMessages +func StartMessageTranslator(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage { + // Use a shared cluster manager for all pipelines (patterns shared across pipelines) + translator := NewMessageTranslator(nil) + return translator.Start(inputChan, bufferSize) +} + +// processMessage handles a single message: tokenizes, creates patterns, and sends appropriate datums +func (mt *MessageTranslator) processMessage(msg *message.Message, outputChan chan *message.StatefulMessage) { + var patternDefineSent bool + var patternDefineParamCount uint32 + + ts := getMessageTimestamp(msg) + + // Get message content + content := msg.GetContent() + if len(content) == 0 { + log.Debugf("[MSG_TRANSLATOR] Skipping empty message") + return + } + + // Tokenize the message content + contentStr := string(content) + tokenList := tokenizeMessage(contentStr) + + // Nil check but shouldn't happen at all + if tokenList == nil || tokenList.IsEmpty() { + log.Debugf("[MSG_TRANSLATOR] Skipping message with empty token list") + mt.sendRawLog(outputChan, msg, contentStr, ts) + return + } + + // Process tokenized log through cluster manager to get/create pattern + pattern, changeType := mt.clusterManager.Add(tokenList) + if pattern == nil { + log.Debugf("[MSG_TRANSLATOR] No pattern created, sending as raw log") + mt.sendRawLog(outputChan, msg, contentStr, ts) + return + } + + // CRITICAL RACE CONDITION DETECTION: Capture pattern state IMMEDIATELY after Add() returns + // The pattern pointer is shared and can be modified by other goroutines after ClusterManager's lock is released. + // Log the state at multiple points to detect if it changes (proving race condition) + patternID := pattern.PatternID + capturedParamCount := uint32(pattern.GetWildcardCount()) + capturedTemplateLen := 0 + capturedPositionsLen := 0 + if pattern.Template != nil { + capturedTemplateLen = pattern.Template.Length() + capturedPositionsLen = len(pattern.Positions) + } + + log.Infof("[RACE_DETECT] Step 1 - After Add(): pattern_id=%d paramCount=%d templateLen=%d positionsLen=%d", + patternID, capturedParamCount, capturedTemplateLen, capturedPositionsLen) + + // Extract wildcard values NOW, using the current pattern state + // This must happen before handlePatternChange() which might send PatternDefine + // If pattern is modified by another goroutine between now and then, we'll have inconsistent state + wildcardValues := mt.extractAndValidateWildcardValues(pattern, tokenList, capturedParamCount) + + log.Infof("[RACE_DETECT] Step 2 - After extract: pattern_id=%d wildcardValuesCount=%d", + patternID, len(wildcardValues)) + + // If wildcardValues is nil, it means tokenList doesn't match template structure + // Send as raw log instead of StructuredLog to avoid intake errors + if wildcardValues == nil { + log.Warnf("[MSG_TRANSLATOR] Pattern mismatch detected for pattern_id=%d - tokenList doesn't match template structure. Sending as raw log instead.", + patternID) + mt.sendRawLog(outputChan, msg, contentStr, ts) + return + } + + // Always use pattern-based encoding (PatternDefine + StructuredLog) + // - Patterns without wildcards: param_count=0, dynamic_values=[] + // - Patterns with wildcards: param_count>0, dynamic_values=[...] + // This ensures consistent behavior and proper pattern evolution tracking + + // Handle pattern state changes (send PatternDefine/PatternDelete as needed) + // WARNING: Pattern may have been modified by another goroutine by now! + // But we've already captured wildcardValues and paramCount, so we're consistent + + // Read pattern state BEFORE handlePatternChange to detect races + beforeHandleParamCount := uint32(pattern.GetWildcardCount()) + beforeHandleTemplateLen := 0 + if pattern.Template != nil { + beforeHandleTemplateLen = pattern.Template.Length() + } + + log.Infof("[RACE_DETECT] Step 3 - Before handlePatternChange: pattern_id=%d paramCount=%d templateLen=%d", + patternID, beforeHandleParamCount, beforeHandleTemplateLen) + + // Detect if pattern changed between capture and now + if beforeHandleParamCount != capturedParamCount { + log.Errorf("[RACE_DETECTED!!!] Pattern modified between Add() and handlePatternChange! pattern_id=%d captured=%d now=%d", + patternID, capturedParamCount, beforeHandleParamCount) + } + if beforeHandleTemplateLen != capturedTemplateLen { + log.Errorf("[RACE_DETECTED!!!] Template length changed! pattern_id=%d captured=%d now=%d", + patternID, capturedTemplateLen, beforeHandleTemplateLen) + } + + mt.handlePatternChange(pattern, changeType, msg, outputChan, &patternDefineSent, &patternDefineParamCount) + + // Read pattern state AFTER handlePatternChange to detect races + afterHandleParamCount := uint32(pattern.GetWildcardCount()) + afterHandleTemplateLen := 0 + if pattern.Template != nil { + afterHandleTemplateLen = pattern.Template.Length() + } + + log.Infof("[RACE_DETECT] Step 4 - After handlePatternChange: pattern_id=%d paramCount=%d templateLen=%d patternDefineSent=%v patternDefineParamCount=%d", + patternID, afterHandleParamCount, afterHandleTemplateLen, patternDefineSent, patternDefineParamCount) + + // Detect if pattern changed during handlePatternChange + if afterHandleParamCount != beforeHandleParamCount { + log.Errorf("[RACE_DETECTED!!!] Pattern modified DURING handlePatternChange! pattern_id=%d before=%d after=%d", + patternID, beforeHandleParamCount, afterHandleParamCount) + } + + // Use the captured paramCount to ensure consistency with wildcardValues we extracted + // If PatternDefine was sent, it might have used updated pattern state, so validate + if patternDefineSent { + // PatternDefine was sent - validate that its paramCount matches what we captured + if patternDefineParamCount != capturedParamCount { + log.Warnf("[MSG_TRANSLATOR] Pattern paramCount changed during processing! pattern_id=%d captured=%d PatternDefine=%d. Using PatternDefine value.", + patternID, capturedParamCount, patternDefineParamCount) + // Use PatternDefine's paramCount, but validate wildcardValues matches + if uint32(len(wildcardValues)) != patternDefineParamCount { + log.Errorf("CRITICAL: Race condition detected! pattern_id=%d wildcardValuesCount=%d PatternDefineParamCount=%d | This WILL cause intake error!", + patternID, len(wildcardValues), patternDefineParamCount) + // Adjust wildcardValues to match PatternDefine + wildcardValues = adjustWildcardValuesCount(wildcardValues, int(patternDefineParamCount)) } + } + } else { + // No PatternDefine sent - use captured paramCount + patternDefineParamCount = capturedParamCount + } + + // Always send StructuredLog with pattern_id + dynamic values + mt.sendStructuredLog(outputChan, msg, pattern, wildcardValues, ts, patternDefineSent, patternDefineParamCount) +} + +// getMessageTimestamp returns the timestamp for the message, preferring ServerlessExtra.Timestamp +func getMessageTimestamp(msg *message.Message) time.Time { + ts := time.Now().UTC() + if !msg.ServerlessExtra.Timestamp.IsZero() { + ts = msg.ServerlessExtra.Timestamp + } + return ts +} + +// tokenizeMessage tokenizes the message content string +func tokenizeMessage(contentStr string) *token.TokenList { + tokenizer := automaton.NewTokenizer(contentStr) + return tokenizer.Tokenize() +} + +// handlePatternChange handles pattern changes based on PatternChangeType from cluster manager +// Uses the change type to determine if we need to send PatternDefine/PatternDelete +// The snapshot mechanism in inflight.go tracks what's been sent for stream recovery +func (mt *MessageTranslator) handlePatternChange(pattern *clustering.Pattern, changeType clustering.PatternChangeType, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) { + switch changeType { + case clustering.PatternNew: + // New pattern - send PatternDefine (may have 0 wildcards initially) + mt.sendPatternDefine(pattern, msg, outputChan, "PatternNew", patternDefineSent, patternDefineParamCount) + + case clustering.PatternUpdated: + // Pattern structure changed (e.g., 0→N wildcards, or N→M wildcards) + // Since PatternDefine was sent before, we need to delete and redefine + mt.sendPatternDelete(pattern.PatternID, msg, outputChan) + mt.sendPatternDefine(pattern, msg, outputChan, "PatternUpdated", patternDefineSent, patternDefineParamCount) - // If pattern template needs to be sent, emit template definition message(s) first - if msg.PatternTemplateState != clustering.TemplateNotNeeded { - switch msg.PatternTemplateState { - case clustering.TemplateIsNew: - // New template - send PatternDefine - patternDatum := buildPatternDefine(msg.Pattern) - outputChan <- &message.StatefulMessage{ - Datum: patternDatum, - Metadata: &msg.MessageMetadata, - } - - case clustering.TemplateChanged: - // Updated template - send PatternDelete + PatternDefine to replace it - // This ensures intake has the updated template definition - deleteDatum := buildPatternDelete(msg.Pattern.PatternID) - outputChan <- &message.StatefulMessage{ - Datum: deleteDatum, - Metadata: &msg.MessageMetadata, - } - - defineDatum := buildPatternDefine(msg.Pattern) - outputChan <- &message.StatefulMessage{ - Datum: defineDatum, - Metadata: &msg.MessageMetadata, - } + case clustering.PatternNoChange: + // Pattern unchanged - no need to send PatternDefine + // The snapshot already has the current pattern state + log.Debugf("[MSG_TRANSLATOR] Pattern unchanged for pattern_id=%d, skipping PatternDefine", pattern.PatternID) + } +} + +// sendPatternDefine creates and sends a PatternDefine datum +func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg *message.Message, outputChan chan *message.StatefulMessage, reason string, patternDefineSent *bool, patternDefineParamCount *uint32) { + patternDatum := buildPatternDefine(pattern) + if pd := patternDatum.GetPatternDefine(); pd != nil { + *patternDefineParamCount = pd.ParamCount + log.Infof("[MSG_TRANSLATOR] Sending PatternDefine: pattern_id=%d paramCount=%d template=%q (%s)", + pattern.PatternID, *patternDefineParamCount, pd.Template, reason) + } + outputChan <- &message.StatefulMessage{ + Datum: patternDatum, + Metadata: &msg.MessageMetadata, + } + *patternDefineSent = true +} + +// sendPatternDelete creates and sends a PatternDelete datum +func (mt *MessageTranslator) sendPatternDelete(patternID uint64, msg *message.Message, outputChan chan *message.StatefulMessage) { + deleteDatum := buildPatternDelete(patternID) + log.Infof("[MSG_TRANSLATOR] Sending PatternDelete: pattern_id=%d", patternID) + outputChan <- &message.StatefulMessage{ + Datum: deleteDatum, + Metadata: &msg.MessageMetadata, + } +} + +// extractAndValidateWildcardValues extracts wildcard values and validates the count +// Returns nil if tokenList doesn't match template structure (caller should send raw log) +func (mt *MessageTranslator) extractAndValidateWildcardValues(pattern *clustering.Pattern, tokenList *token.TokenList, patternDefineParamCount uint32) []string { + wildcardValues := pattern.GetWildcardValues(tokenList) + + // nil indicates structure mismatch - return early + if wildcardValues == nil { + log.Warnf("[MSG_TRANSLATOR] GetWildcardValues returned nil for pattern_id=%d - structure mismatch detected", + pattern.PatternID) + return nil + } + + currentWildcardCount := pattern.GetWildcardCount() + + // Adjust count if mismatch + if len(wildcardValues) != currentWildcardCount { + log.Warnf("[MSG_TRANSLATOR] Wildcard values count mismatch for pattern_id=%d: got %d, expected %d", + pattern.PatternID, len(wildcardValues), currentWildcardCount) + wildcardValues = adjustWildcardValuesCount(wildcardValues, currentWildcardCount) + } + + // Validate against PatternDefine paramCount if it was sent + mt.validateWildcardValuesCount(pattern, wildcardValues, currentWildcardCount, patternDefineParamCount) + + return wildcardValues +} + +// adjustWildcardValuesCount adjusts the wildcard values slice to match expected count +func adjustWildcardValuesCount(wildcardValues []string, expectedCount int) []string { + if len(wildcardValues) < expectedCount { + // Pad with empty strings + for len(wildcardValues) < expectedCount { + wildcardValues = append(wildcardValues, "") + } + } else if len(wildcardValues) > expectedCount { + // Truncate (shouldn't happen, but be safe) + wildcardValues = wildcardValues[:expectedCount] + } + return wildcardValues +} + +// validateWildcardValuesCount validates that wildcard values count matches PatternDefine paramCount +func (mt *MessageTranslator) validateWildcardValuesCount(pattern *clustering.Pattern, wildcardValues []string, currentWildcardCount int, patternDefineParamCount uint32) { + if patternDefineParamCount > 0 { + // If PatternDefine was sent in this cycle, validate against it + if uint32(len(wildcardValues)) != patternDefineParamCount { + log.Errorf("CRITICAL: StructuredLog count mismatch! pattern_id=%d StructuredLogCount=%d PatternDefineParamCount=%d (sent in this cycle) | This will cause intake error!", + pattern.PatternID, len(wildcardValues), patternDefineParamCount) + } + } else { + // PatternDefine was NOT sent in this cycle - validate against current pattern's wildcard count + expectedParamCount := uint32(currentWildcardCount) + if uint32(len(wildcardValues)) != expectedParamCount { + log.Errorf("CRITICAL: StructuredLog count mismatch! pattern_id=%d StructuredLogCount=%d ExpectedParamCount=%d (no PatternDefine sent this cycle) | This will cause intake error!", + pattern.PatternID, len(wildcardValues), expectedParamCount) + } + } +} + +// sendRawLog creates and sends a raw log datum +func (mt *MessageTranslator) sendRawLog(outputChan chan *message.StatefulMessage, msg *message.Message, contentStr string, ts time.Time) { + logDatum := buildRawLog(contentStr, ts) + outputChan <- &message.StatefulMessage{ + Datum: logDatum, + Metadata: &msg.MessageMetadata, + } +} + +// sendStructuredLog creates and sends a StructuredLog datum +func (mt *MessageTranslator) sendStructuredLog(outputChan chan *message.StatefulMessage, msg *message.Message, pattern *clustering.Pattern, wildcardValues []string, ts time.Time, patternDefineSent bool, patternDefineParamCount uint32) { + logDatum := buildStructuredLog(pattern.PatternID, wildcardValues, ts) + + // Log StructuredLog details for debugging + if logs, ok := logDatum.Data.(*statefulpb.Datum_Logs); ok { + if sl := logs.Logs.GetStructured(); sl != nil { + dynamicValuesStr := make([]string, len(sl.DynamicValues)) + for i, dv := range sl.DynamicValues { + if sv := dv.GetStringValue(); sv != "" { + dynamicValuesStr[i] = sv + } else { + dynamicValuesStr[i] = "" } } + log.Infof("[MSG_TRANSLATOR] Sending StructuredLog: pattern_id=%d dynamicValuesCount=%d patternDefineSent=%v patternDefineParamCount=%d dynamicValues=%v", + pattern.PatternID, len(sl.DynamicValues), patternDefineSent, patternDefineParamCount, dynamicValuesStr) - // Create the Log message - either structured (with pattern) - logDatum := buildStructuredLog(msg.Pattern.PatternID, msg.WildcardValues, ts) + // CRITICAL VALIDATION: Check if we're sending the right count + currentPatternParamCount := uint32(pattern.GetWildcardCount()) - // Create StatefulMessage with the log Datum - statefulMsg := &message.StatefulMessage{ - Datum: logDatum, - Metadata: &msg.MessageMetadata, + // Validate count matches what we're claiming to send + if patternDefineSent { + if uint32(len(sl.DynamicValues)) != patternDefineParamCount { + log.Errorf("CRITICAL: StructuredLog mismatch with PatternDefine! pattern_id=%d dynamicValuesCount=%d patternDefineParamCount=%d | This WILL cause intake error!", + pattern.PatternID, len(sl.DynamicValues), patternDefineParamCount) + } + } else { + // No PatternDefine sent - intake will use previously defined pattern + // Validate against current pattern state + if uint32(len(sl.DynamicValues)) != currentPatternParamCount { + log.Errorf("CRITICAL: StructuredLog mismatch with current pattern! pattern_id=%d dynamicValuesCount=%d currentPatternParamCount=%d (no PatternDefine sent) | This WILL cause intake error!", + pattern.PatternID, len(sl.DynamicValues), currentPatternParamCount) + } } - outputChan <- statefulMsg + // Log current pattern state for debugging + if pattern.Template != nil { + templateStr := pattern.GetPatternString() + starCount := strings.Count(templateStr, "*") + log.Infof("[MSG_TRANSLATOR] Current pattern state: pattern_id=%d templateStarCount=%d currentParamCount=%d template=%q", + pattern.PatternID, starCount, currentPatternParamCount, templateStr) + } } - }() + } + + outputChan <- &message.StatefulMessage{ + Datum: logDatum, + Metadata: &msg.MessageMetadata, + } } // buildPatternDefine creates a PatternDefine Datum from a Pattern func buildPatternDefine(pattern *clustering.Pattern) *statefulpb.Datum { - // Get character positions where wildcards appear in the template string - // This allows the backend to know where to insert dynamic values + // indice of wildcards in the pattern string charPositions := pattern.GetWildcardCharPositions() + // is the indice that get converted to uint32 posList := make([]uint32, len(charPositions)) for i, pos := range charPositions { posList[i] = uint32(pos) } + // count of wildcards in the pattern template + paramCount := uint32(pattern.GetWildcardCount()) + // count of wildcards in the posList + posListCount := uint32(len(posList)) + templateStr := pattern.GetPatternString() + + // Validate that the count of wildcards matches - they should always match + // If they don't, it indicates pattern.Positions and Template.Tokens are out of sync + if paramCount != posListCount { + log.Errorf("CRITICAL: PatternDefine count mismatch! pattern_id=%d paramCount=%d (from pattern.Positions) posListCount=%d (from GetWildcardCharPositions) template=%q | This will cause intake error!", + pattern.PatternID, paramCount, posListCount, templateStr) + // Use posListCount as the authoritative source since it's what we're actually sending + paramCount = posListCount + } + + // Additional validation: count '*' in template string should match paramCount + starCount := uint32(strings.Count(templateStr, "*")) + if starCount != paramCount { + log.Errorf("CRITICAL: PatternDefine template star count mismatch! pattern_id=%d template=%q starCount=%d paramCount=%d | This will cause intake error!", + pattern.PatternID, templateStr, starCount, paramCount) + } return &statefulpb.Datum{ Data: &statefulpb.Datum_PatternDefine{ PatternDefine: &statefulpb.PatternDefine{ PatternId: pattern.PatternID, Template: pattern.GetPatternString(), - ParamCount: uint32(len(pattern.Positions)), + ParamCount: paramCount, PosList: posList, }, }, @@ -133,6 +468,20 @@ func buildStructuredLog(patternID uint64, wildcardValues []string, ts time.Time) } } +// buildRawLog creates a Datum containing a raw log (no pattern) +func buildRawLog(content string, ts time.Time) *statefulpb.Datum { + return &statefulpb.Datum{ + Data: &statefulpb.Datum_Logs{ + Logs: &statefulpb.Log{ + Timestamp: uint64(ts.UnixNano() / nanoToMillis), + Content: &statefulpb.Log_Raw{ + Raw: content, + }, + }, + }, + } +} + // toValidUtf8 ensures all characters are UTF-8 func toValidUtf8(data []byte) string { if utf8.Valid(data) { From 946b51c524de34a24bfaa3fb2a964c7a11e2ae34 Mon Sep 17 00:00:00 2001 From: yoon nguyen Date: Thu, 20 Nov 2025 14:50:17 -0500 Subject: [PATCH 15/16] + Remoctor some code + Update pattern template to match intake + Update tests to reflectnew pattern templates - Remove noisy logging --- pkg/logs/patterns/clustering/cluster.go | 2 +- .../patterns/clustering/cluster_manager.go | 89 ++---- .../clustering/cluster_manager_test.go | 12 +- pkg/logs/patterns/clustering/pattern.go | 56 ++-- pkg/logs/patterns/clustering/pattern_test.go | 19 +- pkg/logs/pipeline/pipeline.go | 5 +- pkg/logs/sender/grpc/mock_state.go | 257 +----------------- 7 files changed, 74 insertions(+), 366 deletions(-) diff --git a/pkg/logs/patterns/clustering/cluster.go b/pkg/logs/patterns/clustering/cluster.go index 7cb36795a741..5c00ddde9222 100644 --- a/pkg/logs/patterns/clustering/cluster.go +++ b/pkg/logs/patterns/clustering/cluster.go @@ -15,7 +15,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" ) -// Cluster represents a group of TokenLists with identical signatures. +// Cluster represents a cluster with a group of TokenLists that have identical signatures. // A cluster may contain multiple patterns if token lists with the same signature cannot be merged since structural Fidelity is Valuable. // Examples: // "Status: OK" → HTTP response format diff --git a/pkg/logs/patterns/clustering/cluster_manager.go b/pkg/logs/patterns/clustering/cluster_manager.go index f3c54f741e95..61e4bda83254 100644 --- a/pkg/logs/patterns/clustering/cluster_manager.go +++ b/pkg/logs/patterns/clustering/cluster_manager.go @@ -15,7 +15,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering/merging" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" - "github.com/DataDog/datadog-agent/pkg/util/log" + "github.com/DataDog/datadog-agent/pkg/trace/log" ) // PatternChangeType indicates what changed when adding a TokenList to the cluster manager @@ -47,6 +47,7 @@ func NewClusterManager() *ClusterManager { // Returns the pattern that was created/updated and a PatternChangeType indicating what changed. func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChangeType) { if tokenList == nil || tokenList.IsEmpty() { + log.Errorf("Cluster Manager failed to add log: %v for patterning. Token list is empty or nil.", tokenList.String()) return nil, PatternNoChange } @@ -62,70 +63,38 @@ func (cm *ClusterManager) Add(tokenList *token.TokenList) (*Pattern, PatternChan // Look for existing cluster with matching signature for _, cluster := range clusters { - if cluster.Signature.Equals(signature) { - // Track the state before adding - hadPatterns := len(cluster.Patterns) > 0 - oldPatternCount := len(cluster.Patterns) - - // Track if patterns had wildcards before - hadWildcards := false - var matchedPattern *Pattern - oldWildcardCount := 0 - - if hadPatterns { - // Find which pattern this tokenList will match (before adding it) - for _, p := range cluster.Patterns { - if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { - matchedPattern = p - oldWildcardCount = p.GetWildcardCount() - if p.hasWildcards() { - hadWildcards = true - } - break - } - } - } + if !cluster.Signature.Equals(signature) { + continue + } - // Add to appropriate pattern within the cluster - pattern := cluster.AddTokenListToPatterns(tokenList) - - // Determine if this created a new pattern or updated an existing one - if pattern != nil { - newPatternCount := len(cluster.Patterns) - if newPatternCount > oldPatternCount { - // New pattern was created within the cluster (multi-pattern scenario) - log.Debugf("[PATTERN_CHANGE] PatternNew: pattern_id=%d (new pattern in existing cluster)", pattern.PatternID) - return pattern, PatternNew - } - - // Check if wildcards were added to an existing pattern (0 → N) - if hadPatterns && pattern.hasWildcards() && !hadWildcards { - // Pattern gained its first wildcards - newWildcardCount := pattern.GetWildcardCount() - log.Infof("[PATTERN_CHANGE] PatternUpdated: pattern_id=%d gained first wildcards (0 → %d)", pattern.PatternID, newWildcardCount) - return pattern, PatternUpdated - } - - // Check if wildcard count changed for existing pattern (N → M where N != M) - if matchedPattern != nil && matchedPattern.PatternID == pattern.PatternID { - newWildcardCount := pattern.GetWildcardCount() - if newWildcardCount != oldWildcardCount { - // Pattern wildcard count changed - log.Infof("[PATTERN_CHANGE] PatternUpdated: pattern_id=%d wildcard count changed (%d → %d)", pattern.PatternID, oldWildcardCount, newWildcardCount) - return pattern, PatternUpdated - } - // Wildcard count unchanged - this is the normal case for stable patterns - log.Debugf("[PATTERN_CHANGE] PatternNoChange: pattern_id=%d wildcard count unchanged (%d)", pattern.PatternID, oldWildcardCount) - } else { - // No matched pattern or different pattern ID (shouldn't happen, but log it) - log.Debugf("[PATTERN_CHANGE] PatternNoChange: pattern_id=%d (no matched pattern or ID mismatch)", pattern.PatternID) - } + // Find which pattern within the cluster the tokenList will match + var matchedPattern *Pattern + var oldWildcardCount int + for _, p := range cluster.Patterns { + if p.Sample != nil && merging.CanMergeTokenLists(tokenList, p.Sample) { + matchedPattern = p + oldWildcardCount = p.GetWildcardCount() + break } - return pattern, PatternNoChange } + + // Add the tokenList to the cluster (merges or creates new pattern) + pattern := cluster.AddTokenListToPatterns(tokenList) + + // Check if a new pattern was created (no match found or merge failed) + if matchedPattern == nil || matchedPattern.PatternID != pattern.PatternID { + return pattern, PatternNew + } + + // Check if wildcard count changed (pattern evolved) + if pattern.GetWildcardCount() != oldWildcardCount { + return pattern, PatternUpdated + } + + return pattern, PatternNoChange } - // Creating a new cluster means a new pattern + // If no matching pattern was found, create a new cluster and pattern. newCluster := NewCluster(signature, tokenList) // Add the token list to create the first pattern pattern := newCluster.AddTokenListToPatterns(tokenList) diff --git a/pkg/logs/patterns/clustering/cluster_manager_test.go b/pkg/logs/patterns/clustering/cluster_manager_test.go index 76be758f7276..39c7a31953c1 100644 --- a/pkg/logs/patterns/clustering/cluster_manager_test.go +++ b/pkg/logs/patterns/clustering/cluster_manager_test.go @@ -280,17 +280,17 @@ func TestClusterManager_PatternChangeType(t *testing.T) { t.Logf("āœ… Add #2: PatternUpdated (wildcards created, logCount=%d)", pattern2.LogCount) t.Logf(" Pattern after 2 logs: '%s'", pattern2.GetPatternString()) - // Third add - pattern exists and will gain more wildcards + // Third add - pattern exists but wildcard count unchanged (still 2 wildcards) pattern3, changeType3 := cm.Add(tokenList3) - assert.Equal(t, PatternUpdated, changeType3, "Expected PatternUpdated for third add") + assert.Equal(t, PatternNoChange, changeType3, "Expected PatternNoChange for third add (wildcard count unchanged)") assert.Equal(t, pattern1.PatternID, pattern3.PatternID, "Should return same pattern for same signature") - t.Logf("āœ… Add #3: PatternUpdated (pattern updated, logCount=%d)", pattern3.LogCount) + t.Logf("āœ… Add #3: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern3.LogCount) t.Logf(" Pattern after 3 logs: '%s'", pattern3.GetPatternString()) - // Fourth add - pattern exists, so updated again + // Fourth add - pattern exists, wildcard count still unchanged pattern4, changeType4 := cm.Add(tokenList4) - assert.Equal(t, PatternUpdated, changeType4, "Expected PatternUpdated for fourth add (pattern exists)") - t.Logf("āœ… Add #4: PatternUpdated (pattern will change, logCount=%d)", pattern4.LogCount) + assert.Equal(t, PatternNoChange, changeType4, "Expected PatternNoChange for fourth add (wildcard count unchanged)") + t.Logf("āœ… Add #4: PatternNoChange (wildcard count unchanged, logCount=%d)", pattern4.LogCount) // Final pattern (eagerly generated by Add) t.Logf(" Final pattern after 4 logs: '%s'", pattern4.GetPatternString()) diff --git a/pkg/logs/patterns/clustering/pattern.go b/pkg/logs/patterns/clustering/pattern.go index 9e1552871cb4..788582678d8c 100644 --- a/pkg/logs/patterns/clustering/pattern.go +++ b/pkg/logs/patterns/clustering/pattern.go @@ -49,7 +49,8 @@ func (p *Pattern) size() int { return p.LogCount } -// GetPatternString returns the pattern template as a string with wildcards marked as "*" +// GetPatternString returns the pattern template. +// Pattern template has no wildcard placeholders and wildcard tokens are completely omitted func (p *Pattern) GetPatternString() string { if p.Template == nil { return "" @@ -57,15 +58,14 @@ func (p *Pattern) GetPatternString() string { var parts []string for _, tok := range p.Template.Tokens { - // Use "*" for wildcard positions, actual value otherwise + // Skip wildcard tokens entirely if tok.Wildcard == token.IsWildcard { - parts = append(parts, "*") - } else { - // Only use printable ASCII/UTF-8 characters in the template - cleaned := sanitizeForTemplate(tok.Value) - if cleaned != "" { - parts = append(parts, cleaned) - } + continue + } + // Only use printable ASCII/UTF-8 characters in the template + cleaned := sanitizeForTemplate(tok.Value) + if cleaned != "" { + parts = append(parts, cleaned) } } return strings.Join(parts, "") @@ -82,9 +82,10 @@ func (p *Pattern) GetWildcardCount() int { return len(p.Positions) } -// GetWildcardCharPositions returns character indices where wildcards appear in the pattern string. -// This matches the PosList that will be sent in PatternDefine. -// Example: "User * logged in from *" returns [7, 12] +// GetWildcardCharPositions returns character indices where dynamic values should be injected. +// The template does NOT contain wildcard placeholders - wildcards are omitted entirely. +// Positions mark the injection points in the template string. +// Example: Template "User logged" (wildcard omitted) returns [5] (inject after "User ") func (p *Pattern) GetWildcardCharPositions() []int { if p.Template == nil { return nil @@ -94,14 +95,12 @@ func (p *Pattern) GetWildcardCharPositions() []int { currentPos := 0 for _, tok := range p.Template.Tokens { - // Clean the token value for proper length calculation cleaned := sanitizeForTemplate(tok.Value) if tok.Wildcard == token.IsWildcard { - // Record the current character position for this wildcard + // Mark the injection point (current position in template which excludes wildcards) charPositions = append(charPositions, currentPos) - // Wildcard is represented as "*" (1 character) - currentPos += 1 + // Wildcard tokens are NOT in the template, so don't advance currentPos } else if cleaned != "" { // Add the length of the cleaned token value currentPos += len(cleaned) @@ -112,46 +111,23 @@ func (p *Pattern) GetWildcardCharPositions() []int { } // GetWildcardValues extracts the wildcard values from a specific TokenList. -// This is called per-log to get that log's specific wildcard parameter values. -// -// NOTE: AddTokenListToPatterns now verifies that tokenList matches p.Template before -// assigning it to a pattern, so this function should only be called when structures match. -// However, we keep the defensive check below as a safety measure. func (p *Pattern) GetWildcardValues(tokenList *token.TokenList) []string { if p.Template == nil || len(p.Positions) == 0 { return []string{} } - // CRITICAL CHECK: Verify tokenList matches p.Template structure - // Note: CanMergeTokenLists is not symmetric - template (with IsWildcard) vs tokenList (with PotentialWildcard) - // works one way but not the other. Since AddTokenListToPatterns already verified compatibility, - // we check both directions here as a safety measure. + // Check if tokenList matches p.Template structure templateMatches := merging.CanMergeTokenLists(p.Template, tokenList) || merging.CanMergeTokenLists(tokenList, p.Template) if !templateMatches { - // tokenList doesn't match p.Template structure in either direction - // This shouldn't happen if AddTokenListToPatterns worked correctly, but handle gracefully - // Return nil slice (not empty slice) to signal mismatch - caller should send raw log - return nil - } - - // Ensure lengths match (CanMergeTokenLists already checks this, but be safe) - if tokenList.Length() != p.Template.Length() { - // Length mismatch - return nil to signal error return nil } - // Preallocate slice with exact size to ensure count matches ParamCount wildcardValues := make([]string, len(p.Positions)) - // p.Positions are token indices in p.Template where wildcards are - // Since tokenList matches p.Template structure (verified above), - // we can use the same indices to extract values from tokenList for i, templatePos := range p.Positions { if templatePos < tokenList.Length() { wildcardValues[i] = tokenList.Tokens[templatePos].Value } else { - // Position out of bounds - use empty string to maintain count - // This shouldn't happen if structure matches correctly wildcardValues[i] = "" } } diff --git a/pkg/logs/patterns/clustering/pattern_test.go b/pkg/logs/patterns/clustering/pattern_test.go index eeddca77c624..474c72aa82b4 100644 --- a/pkg/logs/patterns/clustering/pattern_test.go +++ b/pkg/logs/patterns/clustering/pattern_test.go @@ -92,7 +92,8 @@ func TestGetPatternString_WithWildcards(t *testing.T) { pattern.Positions = []int{2} result := pattern.GetPatternString() - assert.Equal(t, "Service *", result) + // Wildcard tokens are omitted from the template + assert.Equal(t, "Service ", result) } func TestGetPatternString_NilTemplate(t *testing.T) { @@ -148,7 +149,7 @@ func TestGetParamCount(t *testing.T) { } func TestGetWildcardCharPositions(t *testing.T) { - // Create pattern: "Service *" + // Create pattern: "Service " (wildcard omitted from template) tl := token.NewTokenList() tl.Add(token.NewToken(token.TokenWord, "Service", token.NotWildcard)) tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) @@ -158,12 +159,12 @@ func TestGetWildcardCharPositions(t *testing.T) { pattern.Positions = []int{2} charPositions := pattern.GetWildcardCharPositions() - // "Service" (7 chars) + " " (1 char) = 8, wildcard at position 8 + // "Service " = 8 chars, wildcard injection point is at position 8 assert.Equal(t, []int{8}, charPositions) } func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) { - // Create pattern: "Error * in *" + // Create pattern: "Error in " (both wildcards omitted from template) tl := token.NewTokenList() tl.Add(token.NewToken(token.TokenWord, "Error", token.NotWildcard)) tl.Add(token.NewToken(token.TokenWord, " ", token.NotWildcard)) @@ -177,9 +178,10 @@ func TestGetWildcardCharPositions_MultipleWildcards(t *testing.T) { pattern.Positions = []int{2, 6} charPositions := pattern.GetWildcardCharPositions() - // "Error " = 6 chars, wildcard at position 6 - // "Error * in " = 6 + 1 (wildcard) + 4 (" in ") = 11, wildcard at position 11 - assert.Equal(t, []int{6, 11}, charPositions) + // Template is "Error in " (wildcards omitted): "Error " (6 chars) + " in " (4 chars) = 10 chars + // First wildcard injection at position 6 (after "Error ") + // Second wildcard injection at position 10 (after "Error in ") + assert.Equal(t, []int{6, 10}, charPositions) } func TestGetWildcardCharPositions_NilTemplate(t *testing.T) { @@ -411,7 +413,8 @@ func TestPattern_IntegrationScenario(t *testing.T) { assert.Equal(t, 2, pattern.LogCount) assert.True(t, pattern.hasWildcards()) assert.Equal(t, 3, getParamCount(pattern)) - assert.Equal(t, "ERROR: * * *", pattern.GetPatternString()) + // Wildcard tokens are omitted from template, leaving: "ERROR: " + " " + " " = "ERROR: " + assert.Equal(t, "ERROR: ", pattern.GetPatternString()) // 3. Extract wildcard values from new log log2 := token.NewTokenList() diff --git a/pkg/logs/pipeline/pipeline.go b/pkg/logs/pipeline/pipeline.go index 8b83929c76c3..9f36a896cc28 100644 --- a/pkg/logs/pipeline/pipeline.go +++ b/pkg/logs/pipeline/pipeline.go @@ -118,10 +118,9 @@ func getStrategy( encoder = compressor.NewCompressor(endpoints.Main.CompressionKind, endpoints.Main.CompressionLevel) } if endpoints.UseGRPC { - translator := grpcsender.NewMessageTranslator(nil) // nil creates a new cluster manager per pipeline + translator := grpcsender.NewMessageTranslator() // TODO: Consider sharing cluster manager across pipelines for better pattern clustering: - // sharedClusterManager := getSharedClusterManager() // would need to be passed in or singleton - // translator := grpcsender.NewMessageTranslator(sharedClusterManager) + // translator := grpcsender.NewMessageTranslator(getSharedClusterManager()) statefulInputChan := translator.Start(inputChan, pkgconfigsetup.Datadog().GetInt("logs_config.message_channel_size")) return grpcsender.NewBatchStrategy(statefulInputChan, outputChan, flushChan, endpoints.BatchWait, endpoints.BatchMaxSize, endpoints.BatchMaxContentSize, "logs", encoder, pipelineMonitor, instanceID) diff --git a/pkg/logs/sender/grpc/mock_state.go b/pkg/logs/sender/grpc/mock_state.go index 219fdd6dcfd3..c5b88e0938de 100644 --- a/pkg/logs/sender/grpc/mock_state.go +++ b/pkg/logs/sender/grpc/mock_state.go @@ -15,7 +15,6 @@ import ( "github.com/DataDog/datadog-agent/pkg/logs/patterns/clustering" "github.com/DataDog/datadog-agent/pkg/logs/patterns/token" "github.com/DataDog/datadog-agent/pkg/proto/pbgo/statefulpb" - "github.com/DataDog/datadog-agent/pkg/util/log" ) const nanoToMillis = 1000000 @@ -28,7 +27,7 @@ type MessageTranslator struct { // NewMessageTranslator creates a new MessageTranslator instance // If clusterManager is nil, a new one will be created -func NewMessageTranslator(clusterManager *clustering.ClusterManager) *MessageTranslator { +func NewMessageTranslator() *MessageTranslator { return &MessageTranslator{ clusterManager: clustering.NewClusterManager(), } @@ -65,7 +64,7 @@ func (mt *MessageTranslator) Start(inputChan chan *message.Message, bufferSize i // Returns the output channel for StatefulMessages func StartMessageTranslator(inputChan chan *message.Message, bufferSize int) chan *message.StatefulMessage { // Use a shared cluster manager for all pipelines (patterns shared across pipelines) - translator := NewMessageTranslator(nil) + translator := NewMessageTranslator() return translator.Start(inputChan, bufferSize) } @@ -79,7 +78,6 @@ func (mt *MessageTranslator) processMessage(msg *message.Message, outputChan cha // Get message content content := msg.GetContent() if len(content) == 0 { - log.Debugf("[MSG_TRANSLATOR] Skipping empty message") return } @@ -87,121 +85,16 @@ func (mt *MessageTranslator) processMessage(msg *message.Message, outputChan cha contentStr := string(content) tokenList := tokenizeMessage(contentStr) - // Nil check but shouldn't happen at all - if tokenList == nil || tokenList.IsEmpty() { - log.Debugf("[MSG_TRANSLATOR] Skipping message with empty token list") - mt.sendRawLog(outputChan, msg, contentStr, ts) - return - } - // Process tokenized log through cluster manager to get/create pattern pattern, changeType := mt.clusterManager.Add(tokenList) - if pattern == nil { - log.Debugf("[MSG_TRANSLATOR] No pattern created, sending as raw log") - mt.sendRawLog(outputChan, msg, contentStr, ts) - return - } - - // CRITICAL RACE CONDITION DETECTION: Capture pattern state IMMEDIATELY after Add() returns - // The pattern pointer is shared and can be modified by other goroutines after ClusterManager's lock is released. - // Log the state at multiple points to detect if it changes (proving race condition) - patternID := pattern.PatternID - capturedParamCount := uint32(pattern.GetWildcardCount()) - capturedTemplateLen := 0 - capturedPositionsLen := 0 - if pattern.Template != nil { - capturedTemplateLen = pattern.Template.Length() - capturedPositionsLen = len(pattern.Positions) - } - log.Infof("[RACE_DETECT] Step 1 - After Add(): pattern_id=%d paramCount=%d templateLen=%d positionsLen=%d", - patternID, capturedParamCount, capturedTemplateLen, capturedPositionsLen) - - // Extract wildcard values NOW, using the current pattern state - // This must happen before handlePatternChange() which might send PatternDefine - // If pattern is modified by another goroutine between now and then, we'll have inconsistent state - wildcardValues := mt.extractAndValidateWildcardValues(pattern, tokenList, capturedParamCount) - - log.Infof("[RACE_DETECT] Step 2 - After extract: pattern_id=%d wildcardValuesCount=%d", - patternID, len(wildcardValues)) - - // If wildcardValues is nil, it means tokenList doesn't match template structure - // Send as raw log instead of StructuredLog to avoid intake errors - if wildcardValues == nil { - log.Warnf("[MSG_TRANSLATOR] Pattern mismatch detected for pattern_id=%d - tokenList doesn't match template structure. Sending as raw log instead.", - patternID) - mt.sendRawLog(outputChan, msg, contentStr, ts) - return - } - - // Always use pattern-based encoding (PatternDefine + StructuredLog) - // - Patterns without wildcards: param_count=0, dynamic_values=[] - // - Patterns with wildcards: param_count>0, dynamic_values=[...] - // This ensures consistent behavior and proper pattern evolution tracking + // Extract wildcard values from the pattern + wildcardValues := pattern.GetWildcardValues(tokenList) // Handle pattern state changes (send PatternDefine/PatternDelete as needed) - // WARNING: Pattern may have been modified by another goroutine by now! - // But we've already captured wildcardValues and paramCount, so we're consistent - - // Read pattern state BEFORE handlePatternChange to detect races - beforeHandleParamCount := uint32(pattern.GetWildcardCount()) - beforeHandleTemplateLen := 0 - if pattern.Template != nil { - beforeHandleTemplateLen = pattern.Template.Length() - } - - log.Infof("[RACE_DETECT] Step 3 - Before handlePatternChange: pattern_id=%d paramCount=%d templateLen=%d", - patternID, beforeHandleParamCount, beforeHandleTemplateLen) - - // Detect if pattern changed between capture and now - if beforeHandleParamCount != capturedParamCount { - log.Errorf("[RACE_DETECTED!!!] Pattern modified between Add() and handlePatternChange! pattern_id=%d captured=%d now=%d", - patternID, capturedParamCount, beforeHandleParamCount) - } - if beforeHandleTemplateLen != capturedTemplateLen { - log.Errorf("[RACE_DETECTED!!!] Template length changed! pattern_id=%d captured=%d now=%d", - patternID, capturedTemplateLen, beforeHandleTemplateLen) - } - mt.handlePatternChange(pattern, changeType, msg, outputChan, &patternDefineSent, &patternDefineParamCount) - // Read pattern state AFTER handlePatternChange to detect races - afterHandleParamCount := uint32(pattern.GetWildcardCount()) - afterHandleTemplateLen := 0 - if pattern.Template != nil { - afterHandleTemplateLen = pattern.Template.Length() - } - - log.Infof("[RACE_DETECT] Step 4 - After handlePatternChange: pattern_id=%d paramCount=%d templateLen=%d patternDefineSent=%v patternDefineParamCount=%d", - patternID, afterHandleParamCount, afterHandleTemplateLen, patternDefineSent, patternDefineParamCount) - - // Detect if pattern changed during handlePatternChange - if afterHandleParamCount != beforeHandleParamCount { - log.Errorf("[RACE_DETECTED!!!] Pattern modified DURING handlePatternChange! pattern_id=%d before=%d after=%d", - patternID, beforeHandleParamCount, afterHandleParamCount) - } - - // Use the captured paramCount to ensure consistency with wildcardValues we extracted - // If PatternDefine was sent, it might have used updated pattern state, so validate - if patternDefineSent { - // PatternDefine was sent - validate that its paramCount matches what we captured - if patternDefineParamCount != capturedParamCount { - log.Warnf("[MSG_TRANSLATOR] Pattern paramCount changed during processing! pattern_id=%d captured=%d PatternDefine=%d. Using PatternDefine value.", - patternID, capturedParamCount, patternDefineParamCount) - // Use PatternDefine's paramCount, but validate wildcardValues matches - if uint32(len(wildcardValues)) != patternDefineParamCount { - log.Errorf("CRITICAL: Race condition detected! pattern_id=%d wildcardValuesCount=%d PatternDefineParamCount=%d | This WILL cause intake error!", - patternID, len(wildcardValues), patternDefineParamCount) - // Adjust wildcardValues to match PatternDefine - wildcardValues = adjustWildcardValuesCount(wildcardValues, int(patternDefineParamCount)) - } - } - } else { - // No PatternDefine sent - use captured paramCount - patternDefineParamCount = capturedParamCount - } - - // Always send StructuredLog with pattern_id + dynamic values + // Send StructuredLog with pattern_id + dynamic values mt.sendStructuredLog(outputChan, msg, pattern, wildcardValues, ts, patternDefineSent, patternDefineParamCount) } @@ -227,28 +120,22 @@ func (mt *MessageTranslator) handlePatternChange(pattern *clustering.Pattern, ch switch changeType { case clustering.PatternNew: // New pattern - send PatternDefine (may have 0 wildcards initially) - mt.sendPatternDefine(pattern, msg, outputChan, "PatternNew", patternDefineSent, patternDefineParamCount) + mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount) case clustering.PatternUpdated: // Pattern structure changed (e.g., 0→N wildcards, or N→M wildcards) - // Since PatternDefine was sent before, we need to delete and redefine mt.sendPatternDelete(pattern.PatternID, msg, outputChan) - mt.sendPatternDefine(pattern, msg, outputChan, "PatternUpdated", patternDefineSent, patternDefineParamCount) + mt.sendPatternDefine(pattern, msg, outputChan, patternDefineSent, patternDefineParamCount) case clustering.PatternNoChange: - // Pattern unchanged - no need to send PatternDefine - // The snapshot already has the current pattern state - log.Debugf("[MSG_TRANSLATOR] Pattern unchanged for pattern_id=%d, skipping PatternDefine", pattern.PatternID) } } // sendPatternDefine creates and sends a PatternDefine datum -func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg *message.Message, outputChan chan *message.StatefulMessage, reason string, patternDefineSent *bool, patternDefineParamCount *uint32) { +func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg *message.Message, outputChan chan *message.StatefulMessage, patternDefineSent *bool, patternDefineParamCount *uint32) { patternDatum := buildPatternDefine(pattern) if pd := patternDatum.GetPatternDefine(); pd != nil { *patternDefineParamCount = pd.ParamCount - log.Infof("[MSG_TRANSLATOR] Sending PatternDefine: pattern_id=%d paramCount=%d template=%q (%s)", - pattern.PatternID, *patternDefineParamCount, pd.Template, reason) } outputChan <- &message.StatefulMessage{ Datum: patternDatum, @@ -260,72 +147,12 @@ func (mt *MessageTranslator) sendPatternDefine(pattern *clustering.Pattern, msg // sendPatternDelete creates and sends a PatternDelete datum func (mt *MessageTranslator) sendPatternDelete(patternID uint64, msg *message.Message, outputChan chan *message.StatefulMessage) { deleteDatum := buildPatternDelete(patternID) - log.Infof("[MSG_TRANSLATOR] Sending PatternDelete: pattern_id=%d", patternID) outputChan <- &message.StatefulMessage{ Datum: deleteDatum, Metadata: &msg.MessageMetadata, } } -// extractAndValidateWildcardValues extracts wildcard values and validates the count -// Returns nil if tokenList doesn't match template structure (caller should send raw log) -func (mt *MessageTranslator) extractAndValidateWildcardValues(pattern *clustering.Pattern, tokenList *token.TokenList, patternDefineParamCount uint32) []string { - wildcardValues := pattern.GetWildcardValues(tokenList) - - // nil indicates structure mismatch - return early - if wildcardValues == nil { - log.Warnf("[MSG_TRANSLATOR] GetWildcardValues returned nil for pattern_id=%d - structure mismatch detected", - pattern.PatternID) - return nil - } - - currentWildcardCount := pattern.GetWildcardCount() - - // Adjust count if mismatch - if len(wildcardValues) != currentWildcardCount { - log.Warnf("[MSG_TRANSLATOR] Wildcard values count mismatch for pattern_id=%d: got %d, expected %d", - pattern.PatternID, len(wildcardValues), currentWildcardCount) - wildcardValues = adjustWildcardValuesCount(wildcardValues, currentWildcardCount) - } - - // Validate against PatternDefine paramCount if it was sent - mt.validateWildcardValuesCount(pattern, wildcardValues, currentWildcardCount, patternDefineParamCount) - - return wildcardValues -} - -// adjustWildcardValuesCount adjusts the wildcard values slice to match expected count -func adjustWildcardValuesCount(wildcardValues []string, expectedCount int) []string { - if len(wildcardValues) < expectedCount { - // Pad with empty strings - for len(wildcardValues) < expectedCount { - wildcardValues = append(wildcardValues, "") - } - } else if len(wildcardValues) > expectedCount { - // Truncate (shouldn't happen, but be safe) - wildcardValues = wildcardValues[:expectedCount] - } - return wildcardValues -} - -// validateWildcardValuesCount validates that wildcard values count matches PatternDefine paramCount -func (mt *MessageTranslator) validateWildcardValuesCount(pattern *clustering.Pattern, wildcardValues []string, currentWildcardCount int, patternDefineParamCount uint32) { - if patternDefineParamCount > 0 { - // If PatternDefine was sent in this cycle, validate against it - if uint32(len(wildcardValues)) != patternDefineParamCount { - log.Errorf("CRITICAL: StructuredLog count mismatch! pattern_id=%d StructuredLogCount=%d PatternDefineParamCount=%d (sent in this cycle) | This will cause intake error!", - pattern.PatternID, len(wildcardValues), patternDefineParamCount) - } - } else { - // PatternDefine was NOT sent in this cycle - validate against current pattern's wildcard count - expectedParamCount := uint32(currentWildcardCount) - if uint32(len(wildcardValues)) != expectedParamCount { - log.Errorf("CRITICAL: StructuredLog count mismatch! pattern_id=%d StructuredLogCount=%d ExpectedParamCount=%d (no PatternDefine sent this cycle) | This will cause intake error!", - pattern.PatternID, len(wildcardValues), expectedParamCount) - } - } -} - // sendRawLog creates and sends a raw log datum func (mt *MessageTranslator) sendRawLog(outputChan chan *message.StatefulMessage, msg *message.Message, contentStr string, ts time.Time) { logDatum := buildRawLog(contentStr, ts) @@ -338,49 +165,6 @@ func (mt *MessageTranslator) sendRawLog(outputChan chan *message.StatefulMessage // sendStructuredLog creates and sends a StructuredLog datum func (mt *MessageTranslator) sendStructuredLog(outputChan chan *message.StatefulMessage, msg *message.Message, pattern *clustering.Pattern, wildcardValues []string, ts time.Time, patternDefineSent bool, patternDefineParamCount uint32) { logDatum := buildStructuredLog(pattern.PatternID, wildcardValues, ts) - - // Log StructuredLog details for debugging - if logs, ok := logDatum.Data.(*statefulpb.Datum_Logs); ok { - if sl := logs.Logs.GetStructured(); sl != nil { - dynamicValuesStr := make([]string, len(sl.DynamicValues)) - for i, dv := range sl.DynamicValues { - if sv := dv.GetStringValue(); sv != "" { - dynamicValuesStr[i] = sv - } else { - dynamicValuesStr[i] = "" - } - } - log.Infof("[MSG_TRANSLATOR] Sending StructuredLog: pattern_id=%d dynamicValuesCount=%d patternDefineSent=%v patternDefineParamCount=%d dynamicValues=%v", - pattern.PatternID, len(sl.DynamicValues), patternDefineSent, patternDefineParamCount, dynamicValuesStr) - - // CRITICAL VALIDATION: Check if we're sending the right count - currentPatternParamCount := uint32(pattern.GetWildcardCount()) - - // Validate count matches what we're claiming to send - if patternDefineSent { - if uint32(len(sl.DynamicValues)) != patternDefineParamCount { - log.Errorf("CRITICAL: StructuredLog mismatch with PatternDefine! pattern_id=%d dynamicValuesCount=%d patternDefineParamCount=%d | This WILL cause intake error!", - pattern.PatternID, len(sl.DynamicValues), patternDefineParamCount) - } - } else { - // No PatternDefine sent - intake will use previously defined pattern - // Validate against current pattern state - if uint32(len(sl.DynamicValues)) != currentPatternParamCount { - log.Errorf("CRITICAL: StructuredLog mismatch with current pattern! pattern_id=%d dynamicValuesCount=%d currentPatternParamCount=%d (no PatternDefine sent) | This WILL cause intake error!", - pattern.PatternID, len(sl.DynamicValues), currentPatternParamCount) - } - } - - // Log current pattern state for debugging - if pattern.Template != nil { - templateStr := pattern.GetPatternString() - starCount := strings.Count(templateStr, "*") - log.Infof("[MSG_TRANSLATOR] Current pattern state: pattern_id=%d templateStarCount=%d currentParamCount=%d template=%q", - pattern.PatternID, starCount, currentPatternParamCount, templateStr) - } - } - } - outputChan <- &message.StatefulMessage{ Datum: logDatum, Metadata: &msg.MessageMetadata, @@ -389,41 +173,18 @@ func (mt *MessageTranslator) sendStructuredLog(outputChan chan *message.Stateful // buildPatternDefine creates a PatternDefine Datum from a Pattern func buildPatternDefine(pattern *clustering.Pattern) *statefulpb.Datum { - // indice of wildcards in the pattern string charPositions := pattern.GetWildcardCharPositions() - // is the indice that get converted to uint32 posList := make([]uint32, len(charPositions)) for i, pos := range charPositions { posList[i] = uint32(pos) } - // count of wildcards in the pattern template - paramCount := uint32(pattern.GetWildcardCount()) - // count of wildcards in the posList - posListCount := uint32(len(posList)) - templateStr := pattern.GetPatternString() - - // Validate that the count of wildcards matches - they should always match - // If they don't, it indicates pattern.Positions and Template.Tokens are out of sync - if paramCount != posListCount { - log.Errorf("CRITICAL: PatternDefine count mismatch! pattern_id=%d paramCount=%d (from pattern.Positions) posListCount=%d (from GetWildcardCharPositions) template=%q | This will cause intake error!", - pattern.PatternID, paramCount, posListCount, templateStr) - // Use posListCount as the authoritative source since it's what we're actually sending - paramCount = posListCount - } - - // Additional validation: count '*' in template string should match paramCount - starCount := uint32(strings.Count(templateStr, "*")) - if starCount != paramCount { - log.Errorf("CRITICAL: PatternDefine template star count mismatch! pattern_id=%d template=%q starCount=%d paramCount=%d | This will cause intake error!", - pattern.PatternID, templateStr, starCount, paramCount) - } return &statefulpb.Datum{ Data: &statefulpb.Datum_PatternDefine{ PatternDefine: &statefulpb.PatternDefine{ PatternId: pattern.PatternID, Template: pattern.GetPatternString(), - ParamCount: paramCount, + ParamCount: uint32(pattern.GetWildcardCount()), PosList: posList, }, }, From 37b6bd3000879c2aecb66cf0fdc1bc879654151f Mon Sep 17 00:00:00 2001 From: Bits Date: Fri, 21 Nov 2025 10:31:10 +0000 Subject: [PATCH 16/16] update proto package --- pkg/logs/sender/grpc/stateful_encoding.proto | 109 -------- pkg/logs/sender/grpc/stream_worker.go | 3 +- .../datadog/stateful/stateful_encoding.proto | 22 +- .../pbgo/statefulpb/stateful_encoding.pb.go | 243 ++++++++++++------ 4 files changed, 177 insertions(+), 200 deletions(-) delete mode 100644 pkg/logs/sender/grpc/stateful_encoding.proto diff --git a/pkg/logs/sender/grpc/stateful_encoding.proto b/pkg/logs/sender/grpc/stateful_encoding.proto deleted file mode 100644 index e555c152ed95..000000000000 --- a/pkg/logs/sender/grpc/stateful_encoding.proto +++ /dev/null @@ -1,109 +0,0 @@ -syntax = "proto3"; - -package intake; -option go_package = "github.com/DataDog/datadog-agent/pkg/logs/sender/grpc"; - -// --------------------------------------------------------------------------- -// Dictionary-encoded -// --------------------------------------------------------------------------- - -message DictEntryDefine { - uint64 id = 1; - string value = 2; -} - -message DictEntryDelete { - uint64 id = 1; -} - -// --------------------------------------------------------------------------- -// Pattern dictionary -// --------------------------------------------------------------------------- - -// pos_list is used to indicate where dynamic values should be inserted -// it's more accurate than a marker -message PatternDefine { - uint64 pattern_id = 1; - string template = 2; - uint32 param_count = 3; - repeated uint32 pos_list = 4; -} - -message PatternUpdate { - uint64 pattern_id = 1; - string new_template = 2; - uint32 param_count = 3; - repeated uint32 pos_list = 4; -} - -message PatternDelete { - uint64 pattern_id = 1; -} - -// --------------------------------------------------------------------------- -// Log payload -// --------------------------------------------------------------------------- - -message Log { - uint64 timestamp = 1; - oneof content { - StructuredLog structured = 2; - string raw = 3; - } -} - -message StructuredLog { - uint64 pattern_id = 1; - repeated DynamicValue dynamic_values = 2; -} - -// TODO not sure we need numeric type -message DynamicValue { - oneof value { - int64 int_value = 1; - double float_value = 2; - string string_value = 3; - uint64 dict_index = 4; - } -} - -// --------------------------------------------------------------------------- -// Streaming envelope -// --------------------------------------------------------------------------- - -message Datum { - oneof data { - PatternDefine pattern_define = 1; - PatternUpdate pattern_update = 2; - PatternDelete pattern_delete = 3; - DictEntryDefine dict_entry_define = 4; - DictEntryDelete dict_entry_delete = 5; - Log logs = 6; - } -} - -// data is sequence of pattern/dictionary changes + logs -// the ordering is significant, must be processed in order -message StatefulBatch { - uint32 batch_id = 1; - repeated Datum data = 2; -} - -message BatchStatus { - int32 batch_id = 1; - - // See Status Code Mappings section below for more details - enum Status { - UNKNOWN=0; - OK=1; - } - Status status = 2; -} - -// --------------------------------------------------------------------------- -// gRPC service definition (bi-directional streaming) -// --------------------------------------------------------------------------- - -service StatefulLogsService { - rpc LogsStream(stream StatefulBatch) returns (stream BatchStatus); -} diff --git a/pkg/logs/sender/grpc/stream_worker.go b/pkg/logs/sender/grpc/stream_worker.go index e7cd3b571f22..f2cee3ad3592 100644 --- a/pkg/logs/sender/grpc/stream_worker.go +++ b/pkg/logs/sender/grpc/stream_worker.go @@ -641,6 +641,8 @@ func (s *streamWorker) receiverLoop(streamInfo *streamInfo) { // Stream-level gRPC status (non-OK): RPC is over → signal receiver failure or block terminal if st, ok := status.FromError(err); ok { + log.Warnf("Worker %s: recv: gRPC error (code %v): %v", s.workerID, st.Code(), err) + switch st.Code() { case codes.Unauthenticated, codes.PermissionDenied: // Terminal until fixed; do not signal receiver failure here @@ -652,7 +654,6 @@ func (s *streamWorker) receiverLoop(streamInfo *streamInfo) { return default: // All other non-OK statuses: signal receiver failure - log.Warnf("Worker %s: gRPC error (code %v): %v", s.workerID, st.Code(), err) s.signalRecvFailure(streamInfo) return } diff --git a/pkg/proto/datadog/stateful/stateful_encoding.proto b/pkg/proto/datadog/stateful/stateful_encoding.proto index 4d92b4cae9f4..6696e971b163 100644 --- a/pkg/proto/datadog/stateful/stateful_encoding.proto +++ b/pkg/proto/datadog/stateful/stateful_encoding.proto @@ -1,6 +1,7 @@ syntax = "proto3"; -package intake; +package datadog.intake.stateful; + option go_package = "pkg/proto/pbgo/statefulpb"; // --------------------------------------------------------------------------- @@ -37,12 +38,23 @@ message PatternDelete { // Log payload // --------------------------------------------------------------------------- +message Tag { + DynamicValue key = 1; + DynamicValue value = 2; +} + message Log { uint64 timestamp = 1; oneof content { StructuredLog structured = 2; string raw = 3; } + // TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream + // state and auto-populate them downstream. + // Required tags: `service`, `hostname`, + // Other tags on agent payload: `status`, `source` + // All other tags are sent as `ddtags` + repeated Tag tags = 4; } message StructuredLog { @@ -91,12 +103,12 @@ message StatefulBatch { } message BatchStatus { - int32 batch_id = 1; + uint32 batch_id = 1; - // See Status Code Mappings section below for more details + // TODO: only OK is used right now - should we just remove this enum? enum Status { - UNKNOWN=0; - OK=1; + UNKNOWN = 0; + OK = 1; } Status status = 2; } diff --git a/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go index 810308ca7fa3..c70bb84bea12 100644 --- a/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go +++ b/pkg/proto/pbgo/statefulpb/stateful_encoding.pb.go @@ -25,7 +25,7 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// See Status Code Mappings section below for more details +// TODO: only OK is used right now - should we just remove this enum? type BatchStatus_Status int32 const ( @@ -69,7 +69,7 @@ func (x BatchStatus_Status) Number() protoreflect.EnumNumber { // Deprecated: Use BatchStatus_Status.Descriptor instead. func (BatchStatus_Status) EnumDescriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10, 0} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11, 0} } type DictEntryDefine struct { @@ -282,6 +282,58 @@ func (x *PatternDelete) GetPatternId() uint64 { return 0 } +type Tag struct { + state protoimpl.MessageState `protogen:"open.v1"` + Key *DynamicValue `protobuf:"bytes,1,opt,name=key,proto3" json:"key,omitempty"` + Value *DynamicValue `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *Tag) Reset() { + *x = Tag{} + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *Tag) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Tag) ProtoMessage() {} + +func (x *Tag) ProtoReflect() protoreflect.Message { + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Tag.ProtoReflect.Descriptor instead. +func (*Tag) Descriptor() ([]byte, []int) { + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{4} +} + +func (x *Tag) GetKey() *DynamicValue { + if x != nil { + return x.Key + } + return nil +} + +func (x *Tag) GetValue() *DynamicValue { + if x != nil { + return x.Value + } + return nil +} + type Log struct { state protoimpl.MessageState `protogen:"open.v1"` Timestamp uint64 `protobuf:"varint,1,opt,name=timestamp,proto3" json:"timestamp,omitempty"` @@ -289,14 +341,20 @@ type Log struct { // // *Log_Structured // *Log_Raw - Content isLog_Content `protobuf_oneof:"content"` + Content isLog_Content `protobuf_oneof:"content"` + // TODO: right now we are assuming logs are attached per tag - in the future we may have common tags in the stream + // state and auto-populate them downstream. + // Required tags: `service`, `hostname`, + // Other tags on agent payload: `status`, `source` + // All other tags are sent as `ddtags` + Tags []*Tag `protobuf:"bytes,4,rep,name=tags,proto3" json:"tags,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *Log) Reset() { *x = Log{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -308,7 +366,7 @@ func (x *Log) String() string { func (*Log) ProtoMessage() {} func (x *Log) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[4] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -321,7 +379,7 @@ func (x *Log) ProtoReflect() protoreflect.Message { // Deprecated: Use Log.ProtoReflect.Descriptor instead. func (*Log) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{4} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{5} } func (x *Log) GetTimestamp() uint64 { @@ -356,6 +414,13 @@ func (x *Log) GetRaw() string { return "" } +func (x *Log) GetTags() []*Tag { + if x != nil { + return x.Tags + } + return nil +} + type isLog_Content interface { isLog_Content() } @@ -382,7 +447,7 @@ type StructuredLog struct { func (x *StructuredLog) Reset() { *x = StructuredLog{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -394,7 +459,7 @@ func (x *StructuredLog) String() string { func (*StructuredLog) ProtoMessage() {} func (x *StructuredLog) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[5] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -407,7 +472,7 @@ func (x *StructuredLog) ProtoReflect() protoreflect.Message { // Deprecated: Use StructuredLog.ProtoReflect.Descriptor instead. func (*StructuredLog) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{5} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{6} } func (x *StructuredLog) GetPatternId() uint64 { @@ -440,7 +505,7 @@ type DynamicValue struct { func (x *DynamicValue) Reset() { *x = DynamicValue{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -452,7 +517,7 @@ func (x *DynamicValue) String() string { func (*DynamicValue) ProtoMessage() {} func (x *DynamicValue) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[6] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -465,7 +530,7 @@ func (x *DynamicValue) ProtoReflect() protoreflect.Message { // Deprecated: Use DynamicValue.ProtoReflect.Descriptor instead. func (*DynamicValue) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{6} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{7} } func (x *DynamicValue) GetValue() isDynamicValue_Value { @@ -555,7 +620,7 @@ type Datum struct { func (x *Datum) Reset() { *x = Datum{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -567,7 +632,7 @@ func (x *Datum) String() string { func (*Datum) ProtoMessage() {} func (x *Datum) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[7] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -580,7 +645,7 @@ func (x *Datum) ProtoReflect() protoreflect.Message { // Deprecated: Use Datum.ProtoReflect.Descriptor instead. func (*Datum) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{7} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{8} } func (x *Datum) GetData() isDatum_Data { @@ -680,7 +745,7 @@ type DatumSequence struct { func (x *DatumSequence) Reset() { *x = DatumSequence{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -692,7 +757,7 @@ func (x *DatumSequence) String() string { func (*DatumSequence) ProtoMessage() {} func (x *DatumSequence) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[8] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -705,7 +770,7 @@ func (x *DatumSequence) ProtoReflect() protoreflect.Message { // Deprecated: Use DatumSequence.ProtoReflect.Descriptor instead. func (*DatumSequence) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{8} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{9} } func (x *DatumSequence) GetData() []*Datum { @@ -729,7 +794,7 @@ type StatefulBatch struct { func (x *StatefulBatch) Reset() { *x = StatefulBatch{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -741,7 +806,7 @@ func (x *StatefulBatch) String() string { func (*StatefulBatch) ProtoMessage() {} func (x *StatefulBatch) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[9] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -754,7 +819,7 @@ func (x *StatefulBatch) ProtoReflect() protoreflect.Message { // Deprecated: Use StatefulBatch.ProtoReflect.Descriptor instead. func (*StatefulBatch) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{9} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10} } func (x *StatefulBatch) GetBatchId() uint32 { @@ -773,15 +838,15 @@ func (x *StatefulBatch) GetData() []byte { type BatchStatus struct { state protoimpl.MessageState `protogen:"open.v1"` - BatchId int32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` - Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=intake.BatchStatus_Status" json:"status,omitempty"` + BatchId uint32 `protobuf:"varint,1,opt,name=batch_id,json=batchId,proto3" json:"batch_id,omitempty"` + Status BatchStatus_Status `protobuf:"varint,2,opt,name=status,proto3,enum=datadog.intake.stateful.BatchStatus_Status" json:"status,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *BatchStatus) Reset() { *x = BatchStatus{} - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -793,7 +858,7 @@ func (x *BatchStatus) String() string { func (*BatchStatus) ProtoMessage() {} func (x *BatchStatus) ProtoReflect() protoreflect.Message { - mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[10] + mi := &file_datadog_stateful_stateful_encoding_proto_msgTypes[11] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -806,10 +871,10 @@ func (x *BatchStatus) ProtoReflect() protoreflect.Message { // Deprecated: Use BatchStatus.ProtoReflect.Descriptor instead. func (*BatchStatus) Descriptor() ([]byte, []int) { - return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{10} + return file_datadog_stateful_stateful_encoding_proto_rawDescGZIP(), []int{11} } -func (x *BatchStatus) GetBatchId() int32 { +func (x *BatchStatus) GetBatchId() uint32 { if x != nil { return x.BatchId } @@ -827,7 +892,7 @@ var File_datadog_stateful_stateful_encoding_proto protoreflect.FileDescriptor const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" + "\n" + - "(datadog/stateful/stateful_encoding.proto\x12\x06intake\"7\n" + + "(datadog/stateful/stateful_encoding.proto\x12\x17datadog.intake.stateful\"7\n" + "\x0fDictEntryDefine\x12\x0e\n" + "\x02id\x18\x01 \x01(\x04R\x02id\x12\x14\n" + "\x05value\x18\x02 \x01(\tR\x05value\"!\n" + @@ -843,17 +908,21 @@ const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" + "\rPatternDelete\x12\x1d\n" + "\n" + "pattern_id\x18\x01 \x01(\x04R\tpatternId\"{\n" + + "\x03Tag\x127\n" + + "\x03key\x18\x01 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x03key\x12;\n" + + "\x05value\x18\x02 \x01(\v2%.datadog.intake.stateful.DynamicValueR\x05value\"\xbe\x01\n" + "\x03Log\x12\x1c\n" + - "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x127\n" + + "\ttimestamp\x18\x01 \x01(\x04R\ttimestamp\x12H\n" + "\n" + - "structured\x18\x02 \x01(\v2\x15.intake.StructuredLogH\x00R\n" + + "structured\x18\x02 \x01(\v2&.datadog.intake.stateful.StructuredLogH\x00R\n" + "structured\x12\x12\n" + - "\x03raw\x18\x03 \x01(\tH\x00R\x03rawB\t\n" + - "\acontent\"k\n" + + "\x03raw\x18\x03 \x01(\tH\x00R\x03raw\x120\n" + + "\x04tags\x18\x04 \x03(\v2\x1c.datadog.intake.stateful.TagR\x04tagsB\t\n" + + "\acontent\"|\n" + "\rStructuredLog\x12\x1d\n" + "\n" + - "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12;\n" + - "\x0edynamic_values\x18\x02 \x03(\v2\x14.intake.DynamicValueR\rdynamicValues\"\x9f\x01\n" + + "pattern_id\x18\x01 \x01(\x04R\tpatternId\x12L\n" + + "\x0edynamic_values\x18\x02 \x03(\v2%.datadog.intake.stateful.DynamicValueR\rdynamicValues\"\x9f\x01\n" + "\fDynamicValue\x12\x1d\n" + "\tint_value\x18\x01 \x01(\x03H\x00R\bintValue\x12!\n" + "\vfloat_value\x18\x02 \x01(\x01H\x00R\n" + @@ -861,28 +930,28 @@ const file_datadog_stateful_stateful_encoding_proto_rawDesc = "" + "\fstring_value\x18\x03 \x01(\tH\x00R\vstringValue\x12\x1f\n" + "\n" + "dict_index\x18\x04 \x01(\x04H\x00R\tdictIndexB\a\n" + - "\x05value\"\xc0\x02\n" + - "\x05Datum\x12>\n" + - "\x0epattern_define\x18\x01 \x01(\v2\x15.intake.PatternDefineH\x00R\rpatternDefine\x12>\n" + - "\x0epattern_delete\x18\x02 \x01(\v2\x15.intake.PatternDeleteH\x00R\rpatternDelete\x12E\n" + - "\x11dict_entry_define\x18\x03 \x01(\v2\x17.intake.DictEntryDefineH\x00R\x0fdictEntryDefine\x12E\n" + - "\x11dict_entry_delete\x18\x04 \x01(\v2\x17.intake.DictEntryDeleteH\x00R\x0fdictEntryDelete\x12!\n" + - "\x04logs\x18\x05 \x01(\v2\v.intake.LogH\x00R\x04logsB\x06\n" + - "\x04data\"2\n" + - "\rDatumSequence\x12!\n" + - "\x04data\x18\x01 \x03(\v2\r.intake.DatumR\x04data\">\n" + + "\x05value\"\x95\x03\n" + + "\x05Datum\x12O\n" + + "\x0epattern_define\x18\x01 \x01(\v2&.datadog.intake.stateful.PatternDefineH\x00R\rpatternDefine\x12O\n" + + "\x0epattern_delete\x18\x02 \x01(\v2&.datadog.intake.stateful.PatternDeleteH\x00R\rpatternDelete\x12V\n" + + "\x11dict_entry_define\x18\x03 \x01(\v2(.datadog.intake.stateful.DictEntryDefineH\x00R\x0fdictEntryDefine\x12V\n" + + "\x11dict_entry_delete\x18\x04 \x01(\v2(.datadog.intake.stateful.DictEntryDeleteH\x00R\x0fdictEntryDelete\x122\n" + + "\x04logs\x18\x05 \x01(\v2\x1c.datadog.intake.stateful.LogH\x00R\x04logsB\x06\n" + + "\x04data\"C\n" + + "\rDatumSequence\x122\n" + + "\x04data\x18\x01 \x03(\v2\x1e.datadog.intake.stateful.DatumR\x04data\">\n" + "\rStatefulBatch\x12\x19\n" + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12\x12\n" + - "\x04data\x18\x02 \x01(\fR\x04data\"{\n" + + "\x04data\x18\x02 \x01(\fR\x04data\"\x8c\x01\n" + "\vBatchStatus\x12\x19\n" + - "\bbatch_id\x18\x01 \x01(\x05R\abatchId\x122\n" + - "\x06status\x18\x02 \x01(\x0e2\x1a.intake.BatchStatus.StatusR\x06status\"\x1d\n" + + "\bbatch_id\x18\x01 \x01(\rR\abatchId\x12C\n" + + "\x06status\x18\x02 \x01(\x0e2+.datadog.intake.stateful.BatchStatus.StatusR\x06status\"\x1d\n" + "\x06Status\x12\v\n" + "\aUNKNOWN\x10\x00\x12\x06\n" + - "\x02OK\x10\x012S\n" + - "\x13StatefulLogsService\x12<\n" + + "\x02OK\x10\x012u\n" + + "\x13StatefulLogsService\x12^\n" + "\n" + - "LogsStream\x12\x15.intake.StatefulBatch\x1a\x13.intake.BatchStatus(\x010\x01B\x1bZ\x19pkg/proto/pbgo/statefulpbb\x06proto3" + "LogsStream\x12&.datadog.intake.stateful.StatefulBatch\x1a$.datadog.intake.stateful.BatchStatus(\x010\x01B\x1bZ\x19pkg/proto/pbgo/statefulpbb\x06proto3" var ( file_datadog_stateful_stateful_encoding_proto_rawDescOnce sync.Once @@ -897,38 +966,42 @@ func file_datadog_stateful_stateful_encoding_proto_rawDescGZIP() []byte { } var file_datadog_stateful_stateful_encoding_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_datadog_stateful_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 11) +var file_datadog_stateful_stateful_encoding_proto_msgTypes = make([]protoimpl.MessageInfo, 12) var file_datadog_stateful_stateful_encoding_proto_goTypes = []any{ - (BatchStatus_Status)(0), // 0: intake.BatchStatus.Status - (*DictEntryDefine)(nil), // 1: intake.DictEntryDefine - (*DictEntryDelete)(nil), // 2: intake.DictEntryDelete - (*PatternDefine)(nil), // 3: intake.PatternDefine - (*PatternDelete)(nil), // 4: intake.PatternDelete - (*Log)(nil), // 5: intake.Log - (*StructuredLog)(nil), // 6: intake.StructuredLog - (*DynamicValue)(nil), // 7: intake.DynamicValue - (*Datum)(nil), // 8: intake.Datum - (*DatumSequence)(nil), // 9: intake.DatumSequence - (*StatefulBatch)(nil), // 10: intake.StatefulBatch - (*BatchStatus)(nil), // 11: intake.BatchStatus + (BatchStatus_Status)(0), // 0: datadog.intake.stateful.BatchStatus.Status + (*DictEntryDefine)(nil), // 1: datadog.intake.stateful.DictEntryDefine + (*DictEntryDelete)(nil), // 2: datadog.intake.stateful.DictEntryDelete + (*PatternDefine)(nil), // 3: datadog.intake.stateful.PatternDefine + (*PatternDelete)(nil), // 4: datadog.intake.stateful.PatternDelete + (*Tag)(nil), // 5: datadog.intake.stateful.Tag + (*Log)(nil), // 6: datadog.intake.stateful.Log + (*StructuredLog)(nil), // 7: datadog.intake.stateful.StructuredLog + (*DynamicValue)(nil), // 8: datadog.intake.stateful.DynamicValue + (*Datum)(nil), // 9: datadog.intake.stateful.Datum + (*DatumSequence)(nil), // 10: datadog.intake.stateful.DatumSequence + (*StatefulBatch)(nil), // 11: datadog.intake.stateful.StatefulBatch + (*BatchStatus)(nil), // 12: datadog.intake.stateful.BatchStatus } var file_datadog_stateful_stateful_encoding_proto_depIdxs = []int32{ - 6, // 0: intake.Log.structured:type_name -> intake.StructuredLog - 7, // 1: intake.StructuredLog.dynamic_values:type_name -> intake.DynamicValue - 3, // 2: intake.Datum.pattern_define:type_name -> intake.PatternDefine - 4, // 3: intake.Datum.pattern_delete:type_name -> intake.PatternDelete - 1, // 4: intake.Datum.dict_entry_define:type_name -> intake.DictEntryDefine - 2, // 5: intake.Datum.dict_entry_delete:type_name -> intake.DictEntryDelete - 5, // 6: intake.Datum.logs:type_name -> intake.Log - 8, // 7: intake.DatumSequence.data:type_name -> intake.Datum - 0, // 8: intake.BatchStatus.status:type_name -> intake.BatchStatus.Status - 10, // 9: intake.StatefulLogsService.LogsStream:input_type -> intake.StatefulBatch - 11, // 10: intake.StatefulLogsService.LogsStream:output_type -> intake.BatchStatus - 10, // [10:11] is the sub-list for method output_type - 9, // [9:10] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 8, // 0: datadog.intake.stateful.Tag.key:type_name -> datadog.intake.stateful.DynamicValue + 8, // 1: datadog.intake.stateful.Tag.value:type_name -> datadog.intake.stateful.DynamicValue + 7, // 2: datadog.intake.stateful.Log.structured:type_name -> datadog.intake.stateful.StructuredLog + 5, // 3: datadog.intake.stateful.Log.tags:type_name -> datadog.intake.stateful.Tag + 8, // 4: datadog.intake.stateful.StructuredLog.dynamic_values:type_name -> datadog.intake.stateful.DynamicValue + 3, // 5: datadog.intake.stateful.Datum.pattern_define:type_name -> datadog.intake.stateful.PatternDefine + 4, // 6: datadog.intake.stateful.Datum.pattern_delete:type_name -> datadog.intake.stateful.PatternDelete + 1, // 7: datadog.intake.stateful.Datum.dict_entry_define:type_name -> datadog.intake.stateful.DictEntryDefine + 2, // 8: datadog.intake.stateful.Datum.dict_entry_delete:type_name -> datadog.intake.stateful.DictEntryDelete + 6, // 9: datadog.intake.stateful.Datum.logs:type_name -> datadog.intake.stateful.Log + 9, // 10: datadog.intake.stateful.DatumSequence.data:type_name -> datadog.intake.stateful.Datum + 0, // 11: datadog.intake.stateful.BatchStatus.status:type_name -> datadog.intake.stateful.BatchStatus.Status + 11, // 12: datadog.intake.stateful.StatefulLogsService.LogsStream:input_type -> datadog.intake.stateful.StatefulBatch + 12, // 13: datadog.intake.stateful.StatefulLogsService.LogsStream:output_type -> datadog.intake.stateful.BatchStatus + 13, // [13:14] is the sub-list for method output_type + 12, // [12:13] is the sub-list for method input_type + 12, // [12:12] is the sub-list for extension type_name + 12, // [12:12] is the sub-list for extension extendee + 0, // [0:12] is the sub-list for field type_name } func init() { file_datadog_stateful_stateful_encoding_proto_init() } @@ -936,17 +1009,17 @@ func file_datadog_stateful_stateful_encoding_proto_init() { if File_datadog_stateful_stateful_encoding_proto != nil { return } - file_datadog_stateful_stateful_encoding_proto_msgTypes[4].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[5].OneofWrappers = []any{ (*Log_Structured)(nil), (*Log_Raw)(nil), } - file_datadog_stateful_stateful_encoding_proto_msgTypes[6].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ (*DynamicValue_IntValue)(nil), (*DynamicValue_FloatValue)(nil), (*DynamicValue_StringValue)(nil), (*DynamicValue_DictIndex)(nil), } - file_datadog_stateful_stateful_encoding_proto_msgTypes[7].OneofWrappers = []any{ + file_datadog_stateful_stateful_encoding_proto_msgTypes[8].OneofWrappers = []any{ (*Datum_PatternDefine)(nil), (*Datum_PatternDelete)(nil), (*Datum_DictEntryDefine)(nil), @@ -959,7 +1032,7 @@ func file_datadog_stateful_stateful_encoding_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_datadog_stateful_stateful_encoding_proto_rawDesc), len(file_datadog_stateful_stateful_encoding_proto_rawDesc)), NumEnums: 1, - NumMessages: 11, + NumMessages: 12, NumExtensions: 0, NumServices: 1, }, @@ -997,7 +1070,7 @@ func NewStatefulLogsServiceClient(cc grpc.ClientConnInterface) StatefulLogsServi } func (c *statefulLogsServiceClient) LogsStream(ctx context.Context, opts ...grpc.CallOption) (StatefulLogsService_LogsStreamClient, error) { - stream, err := c.cc.NewStream(ctx, &_StatefulLogsService_serviceDesc.Streams[0], "/intake.StatefulLogsService/LogsStream", opts...) + stream, err := c.cc.NewStream(ctx, &_StatefulLogsService_serviceDesc.Streams[0], "/datadog.intake.stateful.StatefulLogsService/LogsStream", opts...) if err != nil { return nil, err } @@ -1071,7 +1144,7 @@ func (x *statefulLogsServiceLogsStreamServer) Recv() (*StatefulBatch, error) { } var _StatefulLogsService_serviceDesc = grpc.ServiceDesc{ - ServiceName: "intake.StatefulLogsService", + ServiceName: "datadog.intake.stateful.StatefulLogsService", HandlerType: (*StatefulLogsServiceServer)(nil), Methods: []grpc.MethodDesc{}, Streams: []grpc.StreamDesc{