diff --git a/cycode/cli/apps/scan/commit_range_scanner.py b/cycode/cli/apps/scan/commit_range_scanner.py index eb0296c8..85497d5f 100644 --- a/cycode/cli/apps/scan/commit_range_scanner.py +++ b/cycode/cli/apps/scan/commit_range_scanner.py @@ -186,7 +186,7 @@ def _scan_commit_range_documents( def _scan_sca_commit_range(ctx: typer.Context, repo_path: str, commit_range: str, **_) -> None: scan_parameters = get_scan_parameters(ctx, (repo_path,)) - from_commit_rev, to_commit_rev = parse_commit_range(commit_range, repo_path) + from_commit_rev, to_commit_rev, _ = parse_commit_range(commit_range, repo_path) from_commit_documents, to_commit_documents, _ = get_commit_range_modified_documents( ctx.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES, repo_path, from_commit_rev, to_commit_rev ) @@ -227,7 +227,7 @@ def _scan_secret_commit_range( def _scan_sast_commit_range(ctx: typer.Context, repo_path: str, commit_range: str, **_) -> None: scan_parameters = get_scan_parameters(ctx, (repo_path,)) - from_commit_rev, to_commit_rev = parse_commit_range(commit_range, repo_path) + from_commit_rev, to_commit_rev, _ = parse_commit_range(commit_range, repo_path) _, commit_documents, diff_documents = get_commit_range_modified_documents( ctx.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES, diff --git a/cycode/cli/files_collector/commit_range_documents.py b/cycode/cli/files_collector/commit_range_documents.py index 2ca54dd5..d92aea81 100644 --- a/cycode/cli/files_collector/commit_range_documents.py +++ b/cycode/cli/files_collector/commit_range_documents.py @@ -67,12 +67,17 @@ def collect_commit_range_diff_documents( commit_documents_to_scan = [] repo = git_proxy.get_repo(path) - total_commits_count = int(repo.git.rev_list('--count', commit_range)) - logger.debug('Calculating diffs for %s commits in the commit range %s', total_commits_count, commit_range) + + normalized_commit_range = normalize_commit_range(commit_range, path) + + total_commits_count = int(repo.git.rev_list('--count', normalized_commit_range)) + logger.debug( + 'Calculating diffs for %s commits in the commit range %s', total_commits_count, normalized_commit_range + ) progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count) - for scanned_commits_count, commit in enumerate(repo.iter_commits(rev=commit_range)): + for scanned_commits_count, commit in enumerate(repo.iter_commits(rev=normalized_commit_range)): if _does_reach_to_max_commits_to_scan_limit(commit_ids_to_scan, max_commits_count): logger.debug('Reached to max commits to scan count. Going to scan only %s last commits', max_commits_count) progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count) @@ -96,7 +101,12 @@ def collect_commit_range_diff_documents( logger.debug( 'Found all relevant files in commit %s', - {'path': path, 'commit_range': commit_range, 'commit_id': commit_id}, + { + 'path': path, + 'commit_range': commit_range, + 'normalized_commit_range': normalized_commit_range, + 'commit_id': commit_id, + }, ) logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan}) @@ -428,8 +438,9 @@ def get_pre_commit_modified_documents( return git_head_documents, pre_committed_documents, diff_documents -def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Optional[str]]: +def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Optional[str], Optional[str]]: """Parses a git commit range string and returns the full SHAs for the 'from' and 'to' commits. + Also, it returns the separator in the commit range. Supports: - 'from..to' @@ -440,8 +451,10 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Opt """ repo = git_proxy.get_repo(path) + separator = '..' if '...' in commit_range: from_spec, to_spec = commit_range.split('...', 1) + separator = '...' elif '..' in commit_range: from_spec, to_spec = commit_range.split('..', 1) else: @@ -459,7 +472,28 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Opt # Use rev_parse to resolve each specifier to its full commit SHA from_commit_rev = repo.rev_parse(from_spec).hexsha to_commit_rev = repo.rev_parse(to_spec).hexsha - return from_commit_rev, to_commit_rev + return from_commit_rev, to_commit_rev, separator except git_proxy.get_git_command_error() as e: logger.warning("Failed to parse commit range '%s'", commit_range, exc_info=e) - return None, None + return None, None, None + + +def normalize_commit_range(commit_range: str, path: str) -> str: + """Normalize a commit range string to handle various formats consistently with all scan types. + + Returns: + A normalized commit range string suitable for Git operations (e.g., 'full_sha1..full_sha2') + """ + from_commit_rev, to_commit_rev, separator = parse_commit_range(commit_range, path) + if from_commit_rev is None or to_commit_rev is None: + logger.warning('Failed to parse commit range "%s", falling back to raw string.', commit_range) + return commit_range + + # Construct a normalized range string using the original separator for iter_commits + normalized_commit_range = f'{from_commit_rev}{separator}{to_commit_rev}' + logger.debug( + 'Normalized commit range "%s" to "%s"', + commit_range, + normalized_commit_range, + ) + return normalized_commit_range diff --git a/tests/cli/files_collector/test_commit_range_documents.py b/tests/cli/files_collector/test_commit_range_documents.py index d27d24a3..0779f678 100644 --- a/tests/cli/files_collector/test_commit_range_documents.py +++ b/tests/cli/files_collector/test_commit_range_documents.py @@ -13,6 +13,7 @@ _get_default_branches_for_merge_base, calculate_pre_push_commit_range, calculate_pre_receive_commit_range, + collect_commit_range_diff_documents, get_diff_file_path, get_safe_head_reference_for_diff, parse_commit_range, @@ -846,40 +847,40 @@ def test_two_dot_linear_history(self) -> None: with temporary_git_repository() as (temp_dir, repo): a, b, c = self._make_linear_history(repo, temp_dir) - parsed_from, parsed_to = parse_commit_range(f'{a}..{c}', temp_dir) - assert (parsed_from, parsed_to) == (a, c) + parsed_from, parsed_to, separator = parse_commit_range(f'{a}..{c}', temp_dir) + assert (parsed_from, parsed_to, separator) == (a, c, '..') def test_three_dot_linear_history(self) -> None: """For 'A...C' in linear history, expect (A,C).""" with temporary_git_repository() as (temp_dir, repo): a, b, c = self._make_linear_history(repo, temp_dir) - parsed_from, parsed_to = parse_commit_range(f'{a}...{c}', temp_dir) - assert (parsed_from, parsed_to) == (a, c) + parsed_from, parsed_to, separator = parse_commit_range(f'{a}...{c}', temp_dir) + assert (parsed_from, parsed_to, separator) == (a, c, '...') def test_open_right_linear_history(self) -> None: """For 'A..', expect (A,HEAD=C).""" with temporary_git_repository() as (temp_dir, repo): a, b, c = self._make_linear_history(repo, temp_dir) - parsed_from, parsed_to = parse_commit_range(f'{a}..', temp_dir) - assert (parsed_from, parsed_to) == (a, c) + parsed_from, parsed_to, separator = parse_commit_range(f'{a}..', temp_dir) + assert (parsed_from, parsed_to, separator) == (a, c, '..') def test_open_left_linear_history(self) -> None: """For '..C' where HEAD==C, expect (HEAD=C,C).""" with temporary_git_repository() as (temp_dir, repo): a, b, c = self._make_linear_history(repo, temp_dir) - parsed_from, parsed_to = parse_commit_range(f'..{c}', temp_dir) - assert (parsed_from, parsed_to) == (c, c) + parsed_from, parsed_to, separator = parse_commit_range(f'..{c}', temp_dir) + assert (parsed_from, parsed_to, separator) == (c, c, '..') def test_single_commit_spec(self) -> None: """For 'A', expect (A,HEAD=C).""" with temporary_git_repository() as (temp_dir, repo): a, b, c = self._make_linear_history(repo, temp_dir) - parsed_from, parsed_to = parse_commit_range(a, temp_dir) - assert (parsed_from, parsed_to) == (a, c) + parsed_from, parsed_to, separator = parse_commit_range(a, temp_dir) + assert (parsed_from, parsed_to, separator) == (a, c, '..') class TestParsePreReceiveInput: @@ -1047,3 +1048,58 @@ def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_co work_repo.close() finally: server_repo.close() + + +class TestCollectCommitRangeDiffDocuments: + """Test the collect_commit_range_diff_documents function with various commit range formats.""" + + def test_collect_with_various_commit_range_formats(self) -> None: + """Test that different commit range formats are normalized and work correctly.""" + with temporary_git_repository() as (temp_dir, repo): + # Create three commits + a_file = os.path.join(temp_dir, 'a.txt') + with open(a_file, 'w') as f: + f.write('A') + repo.index.add(['a.txt']) + a_commit = repo.index.commit('A') + + with open(a_file, 'a') as f: + f.write('B') + repo.index.add(['a.txt']) + b_commit = repo.index.commit('B') + + with open(a_file, 'a') as f: + f.write('C') + repo.index.add(['a.txt']) + c_commit = repo.index.commit('C') + + # Create mock context + mock_ctx = Mock() + mock_progress_bar = Mock() + mock_progress_bar.set_section_length = Mock() + mock_progress_bar.update = Mock() + mock_ctx.obj = {'progress_bar': mock_progress_bar} + + # Test two-dot range - should collect documents from commits B and C (2 commits, 2 documents) + commit_range = f'{a_commit.hexsha}..{c_commit.hexsha}' + documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range) + assert len(documents) == 2, f'Expected 2 documents from range A..C, got {len(documents)}' + commit_ids_in_documents = {doc.unique_id for doc in documents if doc.unique_id} + assert b_commit.hexsha in commit_ids_in_documents + assert c_commit.hexsha in commit_ids_in_documents + + # Test three-dot range - should collect documents from commits B and C (2 commits, 2 documents) + commit_range = f'{a_commit.hexsha}...{c_commit.hexsha}' + documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range) + assert len(documents) == 2, f'Expected 2 documents from range A...C, got {len(documents)}' + + # Test parent notation with three-dot - should collect document from commit C (1 commit, 1 document) + commit_range = f'{c_commit.hexsha}~1...{c_commit.hexsha}' + documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range) + assert len(documents) == 1, f'Expected 1 document from range C~1...C, got {len(documents)}' + assert documents[0].unique_id == c_commit.hexsha + + # Test single commit spec - should be interpreted as A..HEAD (commits B and C, 2 documents) + commit_range = a_commit.hexsha + documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range) + assert len(documents) == 2, f'Expected 2 documents from single commit A, got {len(documents)}'