From c1f33cc5d2ece10e87f202cf04b194be1eede25f Mon Sep 17 00:00:00 2001 From: Patrick Date: Wed, 11 Feb 2026 15:37:58 +0700 Subject: [PATCH] feat: add keyword and limit filters to discover-source-metadata Add -k/--keyword flag to filter discovery results by source name or category (case-insensitive substring match), and -l/--limit to cap the number of returned sources. The keyword filter is applied client-side after the aggregation query completes, so it does not reduce API usage but makes the output much easier to work with. Closes #10. --- lib/sumologic/cli.rb | 7 +- .../discover_source_metadata_command.rb | 5 +- lib/sumologic/client.rb | 6 +- .../metadata/source_metadata_discovery.rb | 19 ++++- .../source_metadata_discovery_spec.rb | 76 +++++++++++++++++++ 5 files changed, 105 insertions(+), 8 deletions(-) create mode 100644 spec/sumologic/metadata/source_metadata_discovery_spec.rb diff --git a/lib/sumologic/cli.rb b/lib/sumologic/cli.rb index 4fb358b..b2a9411 100644 --- a/lib/sumologic/cli.rb +++ b/lib/sumologic/cli.rb @@ -145,8 +145,9 @@ def list_sources # Filter by source category (ECS only) sumo-query discover-source-metadata --filter '_sourceCategory=*ecs*' - # Discover CloudWatch sources - sumo-query discover-source-metadata --filter '_sourceCategory=*cloudwatch*' + # Filter results by keyword (matches name or category) + sumo-query discover-source-metadata --keyword nginx + sumo-query discover-source-metadata --keyword nginx -l 10 # Save to file sumo-query discover-source-metadata --output discovered-sources.json @@ -158,6 +159,8 @@ def list_sources option :time_zone, type: :string, default: 'UTC', aliases: '-z', desc: 'Time zone (UTC, EST, AEST, +00:00, America/New_York, Australia/Sydney)' option :filter, type: :string, desc: 'Optional filter query (e.g., _sourceCategory=*ecs*)' + option :keyword, type: :string, aliases: '-k', desc: 'Filter results by keyword (matches name or category)' + option :limit, type: :numeric, aliases: '-l', desc: 'Maximum number of sources to return' def discover_source_metadata Commands::DiscoverSourceMetadataCommand.new(options, create_client).execute end diff --git a/lib/sumologic/cli/commands/discover_source_metadata_command.rb b/lib/sumologic/cli/commands/discover_source_metadata_command.rb index c85dc8e..b96d792 100644 --- a/lib/sumologic/cli/commands/discover_source_metadata_command.rb +++ b/lib/sumologic/cli/commands/discover_source_metadata_command.rb @@ -40,6 +40,7 @@ def log_discovery_info end warn "Time Zone: #{@parsed_timezone}" warn "Filter: #{options[:filter] || 'none (all sources)'}" + warn "Keyword: #{options[:keyword]}" if options[:keyword] warn '-' * 60 warn 'Running aggregation query to discover sources...' $stderr.puts @@ -50,7 +51,9 @@ def perform_discovery from_time: @parsed_from, to_time: @parsed_to, time_zone: @parsed_timezone, - filter: options[:filter] + filter: options[:filter], + keyword: options[:keyword], + limit: options[:limit] ) end end diff --git a/lib/sumologic/client.rb b/lib/sumologic/client.rb index 60f1dcc..498b15d 100644 --- a/lib/sumologic/client.rb +++ b/lib/sumologic/client.rb @@ -111,14 +111,14 @@ def list_all_sources # @param from_time [String] Start time (ISO 8601, unix timestamp, or relative) # @param to_time [String] End time # @param time_zone [String] Time zone (default: UTC) - # @param filter [String, nil] Optional filter query to scope results + # @param options [Hash] Optional filters — :filter, :keyword, :limit # @return [Hash] Discovery results with source metadata - def discover_source_metadata(from_time:, to_time:, time_zone: 'UTC', filter: nil) + def discover_source_metadata(from_time:, to_time:, time_zone: 'UTC', **options) @source_metadata_discovery.discover( from_time: from_time, to_time: to_time, time_zone: time_zone, - filter: filter + **options ) end diff --git a/lib/sumologic/metadata/source_metadata_discovery.rb b/lib/sumologic/metadata/source_metadata_discovery.rb index 01b1be5..d17716f 100644 --- a/lib/sumologic/metadata/source_metadata_discovery.rb +++ b/lib/sumologic/metadata/source_metadata_discovery.rb @@ -22,8 +22,12 @@ def initialize(http_client:, search_job:, config: nil) # @param from_time [String] Start time (ISO 8601, unix timestamp, or relative) # @param to_time [String] End time # @param time_zone [String] Time zone (default: UTC) - # @param filter [String, nil] Optional filter query to scope results - def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil) + # @param options [Hash] Optional filters — :filter, :keyword, :limit + def discover(from_time:, to_time:, time_zone: 'UTC', **options) + filter = options[:filter] + keyword = options[:keyword] + limit = options[:limit] + query = build_query(filter) log_info "Discovering source metadata with query: #{query}" log_info "Time range: #{from_time} to #{to_time} (#{time_zone})" @@ -39,6 +43,8 @@ def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil) ) source_models = parse_aggregation_results(records) + source_models = filter_by_keyword(source_models, keyword) if keyword + source_models = source_models.take(limit) if limit { 'time_range' => { @@ -47,6 +53,7 @@ def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil) 'time_zone' => time_zone }, 'filter' => filter, + 'keyword' => keyword, 'total_sources' => source_models.size, 'sources' => source_models.map(&:to_h) } @@ -56,6 +63,14 @@ def discover(from_time:, to_time:, time_zone: 'UTC', filter: nil) private + def filter_by_keyword(source_models, keyword) + pattern = keyword.downcase + source_models.select do |s| + (s.name || '').downcase.include?(pattern) || + (s.category || '').downcase.include?(pattern) + end + end + # Build aggregation query to discover sources def build_query(filter) base = filter || '*' diff --git a/spec/sumologic/metadata/source_metadata_discovery_spec.rb b/spec/sumologic/metadata/source_metadata_discovery_spec.rb new file mode 100644 index 0000000..146d199 --- /dev/null +++ b/spec/sumologic/metadata/source_metadata_discovery_spec.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +RSpec.describe Sumologic::Metadata::SourceMetadataDiscovery do + let(:http_client) { instance_double('Sumologic::Http::Client') } + let(:search_job) { instance_double(Sumologic::Search::Job) } + let(:config) { instance_double(Sumologic::Configuration) } + let(:discovery) { described_class.new(http_client: http_client, search_job: search_job, config: config) } + + let(:aggregation_records) do + [ + { 'map' => { '_sourcename' => 'nginx_access', '_sourcecategory' => 'production/web', '_count' => '5000' } }, + { 'map' => { '_sourcename' => 'nginx_error', '_sourcecategory' => 'production/web', '_count' => '200' } }, + { 'map' => { '_sourcename' => 'app_logs', '_sourcecategory' => 'production/api', '_count' => '3000' } }, + { 'map' => { '_sourcename' => 'cloudwatch_lambda', '_sourcecategory' => 'aws/lambda', '_count' => '1500' } }, + { 'map' => { '_sourcename' => 'ecs_task', '_sourcecategory' => 'aws/ecs/nginx', '_count' => '800' } } + ] + end + + before do + allow(search_job).to receive(:execute_aggregation).and_return(aggregation_records) + end + + describe '#discover' do + let(:base_params) { { from_time: '-24h', to_time: 'now', time_zone: 'UTC' } } + + it 'returns all sources when no keyword given' do + result = discovery.discover(**base_params) + expect(result['total_sources']).to eq(5) + expect(result['keyword']).to be_nil + end + + it 'filters by keyword matching source name' do + result = discovery.discover(**base_params, keyword: 'nginx') + names = result['sources'].map { |s| s['name'] } + expect(names).to eq(%w[nginx_access ecs_task nginx_error]) + end + + it 'filters by keyword matching source category' do + result = discovery.discover(**base_params, keyword: 'aws') + names = result['sources'].map { |s| s['name'] } + expect(names).to eq(%w[cloudwatch_lambda ecs_task]) + end + + it 'is case-insensitive' do + result = discovery.discover(**base_params, keyword: 'NGINX') + expect(result['total_sources']).to eq(3) + end + + it 'limits results' do + result = discovery.discover(**base_params, limit: 2) + expect(result['total_sources']).to eq(2) + end + + it 'applies keyword filter before limit' do + result = discovery.discover(**base_params, keyword: 'nginx', limit: 1) + expect(result['total_sources']).to eq(1) + expect(result['sources'].first['name']).to eq('nginx_access') + end + + it 'includes keyword in response' do + result = discovery.discover(**base_params, keyword: 'nginx') + expect(result['keyword']).to eq('nginx') + end + + it 'returns empty when keyword matches nothing' do + result = discovery.discover(**base_params, keyword: 'nonexistent') + expect(result['total_sources']).to eq(0) + expect(result['sources']).to eq([]) + end + + it 'raises Error on failure' do + allow(search_job).to receive(:execute_aggregation).and_raise(StandardError, 'timeout') + expect { discovery.discover(**base_params) }.to raise_error(Sumologic::Error, /Failed to discover/) + end + end +end