Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cycode/cli/apps/scan/commit_range_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def _scan_commit_range_documents(
def _scan_sca_commit_range(ctx: typer.Context, repo_path: str, commit_range: str, **_) -> None:
scan_parameters = get_scan_parameters(ctx, (repo_path,))

from_commit_rev, to_commit_rev = parse_commit_range(commit_range, repo_path)
from_commit_rev, to_commit_rev, _ = parse_commit_range(commit_range, repo_path)
from_commit_documents, to_commit_documents, _ = get_commit_range_modified_documents(
ctx.obj['progress_bar'], ScanProgressBarSection.PREPARE_LOCAL_FILES, repo_path, from_commit_rev, to_commit_rev
)
Expand Down Expand Up @@ -227,7 +227,7 @@ def _scan_secret_commit_range(
def _scan_sast_commit_range(ctx: typer.Context, repo_path: str, commit_range: str, **_) -> None:
scan_parameters = get_scan_parameters(ctx, (repo_path,))

from_commit_rev, to_commit_rev = parse_commit_range(commit_range, repo_path)
from_commit_rev, to_commit_rev, _ = parse_commit_range(commit_range, repo_path)
_, commit_documents, diff_documents = get_commit_range_modified_documents(
ctx.obj['progress_bar'],
ScanProgressBarSection.PREPARE_LOCAL_FILES,
Expand Down
48 changes: 41 additions & 7 deletions cycode/cli/files_collector/commit_range_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,17 @@ def collect_commit_range_diff_documents(
commit_documents_to_scan = []

repo = git_proxy.get_repo(path)
total_commits_count = int(repo.git.rev_list('--count', commit_range))
logger.debug('Calculating diffs for %s commits in the commit range %s', total_commits_count, commit_range)

normalized_commit_range = normalize_commit_range(commit_range, path)

total_commits_count = int(repo.git.rev_list('--count', normalized_commit_range))
logger.debug(
'Calculating diffs for %s commits in the commit range %s', total_commits_count, normalized_commit_range
)

progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count)

for scanned_commits_count, commit in enumerate(repo.iter_commits(rev=commit_range)):
for scanned_commits_count, commit in enumerate(repo.iter_commits(rev=normalized_commit_range)):
if _does_reach_to_max_commits_to_scan_limit(commit_ids_to_scan, max_commits_count):
logger.debug('Reached to max commits to scan count. Going to scan only %s last commits', max_commits_count)
progress_bar.update(ScanProgressBarSection.PREPARE_LOCAL_FILES, total_commits_count - scanned_commits_count)
Expand All @@ -96,7 +101,12 @@ def collect_commit_range_diff_documents(

logger.debug(
'Found all relevant files in commit %s',
{'path': path, 'commit_range': commit_range, 'commit_id': commit_id},
{
'path': path,
'commit_range': commit_range,
'normalized_commit_range': normalized_commit_range,
'commit_id': commit_id,
},
)

logger.debug('List of commit ids to scan, %s', {'commit_ids': commit_ids_to_scan})
Expand Down Expand Up @@ -428,8 +438,9 @@ def get_pre_commit_modified_documents(
return git_head_documents, pre_committed_documents, diff_documents


def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Optional[str]]:
def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Optional[str], Optional[str]]:
"""Parses a git commit range string and returns the full SHAs for the 'from' and 'to' commits.
Also, it returns the separator in the commit range.

Supports:
- 'from..to'
Expand All @@ -440,8 +451,10 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Opt
"""
repo = git_proxy.get_repo(path)

separator = '..'
if '...' in commit_range:
from_spec, to_spec = commit_range.split('...', 1)
separator = '...'
elif '..' in commit_range:
from_spec, to_spec = commit_range.split('..', 1)
else:
Expand All @@ -459,7 +472,28 @@ def parse_commit_range(commit_range: str, path: str) -> tuple[Optional[str], Opt
# Use rev_parse to resolve each specifier to its full commit SHA
from_commit_rev = repo.rev_parse(from_spec).hexsha
to_commit_rev = repo.rev_parse(to_spec).hexsha
return from_commit_rev, to_commit_rev
return from_commit_rev, to_commit_rev, separator
except git_proxy.get_git_command_error() as e:
logger.warning("Failed to parse commit range '%s'", commit_range, exc_info=e)
return None, None
return None, None, None


def normalize_commit_range(commit_range: str, path: str) -> str:
"""Normalize a commit range string to handle various formats consistently with all scan types.

Returns:
A normalized commit range string suitable for Git operations (e.g., 'full_sha1..full_sha2')
"""
from_commit_rev, to_commit_rev, separator = parse_commit_range(commit_range, path)
if from_commit_rev is None or to_commit_rev is None:
logger.warning('Failed to parse commit range "%s", falling back to raw string.', commit_range)
return commit_range

# Construct a normalized range string using the original separator for iter_commits
normalized_commit_range = f'{from_commit_rev}{separator}{to_commit_rev}'
logger.debug(
'Normalized commit range "%s" to "%s"',
commit_range,
normalized_commit_range,
)
return normalized_commit_range
76 changes: 66 additions & 10 deletions tests/cli/files_collector/test_commit_range_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
_get_default_branches_for_merge_base,
calculate_pre_push_commit_range,
calculate_pre_receive_commit_range,
collect_commit_range_diff_documents,
get_diff_file_path,
get_safe_head_reference_for_diff,
parse_commit_range,
Expand Down Expand Up @@ -846,40 +847,40 @@ def test_two_dot_linear_history(self) -> None:
with temporary_git_repository() as (temp_dir, repo):
a, b, c = self._make_linear_history(repo, temp_dir)

parsed_from, parsed_to = parse_commit_range(f'{a}..{c}', temp_dir)
assert (parsed_from, parsed_to) == (a, c)
parsed_from, parsed_to, separator = parse_commit_range(f'{a}..{c}', temp_dir)
assert (parsed_from, parsed_to, separator) == (a, c, '..')

def test_three_dot_linear_history(self) -> None:
"""For 'A...C' in linear history, expect (A,C)."""
with temporary_git_repository() as (temp_dir, repo):
a, b, c = self._make_linear_history(repo, temp_dir)

parsed_from, parsed_to = parse_commit_range(f'{a}...{c}', temp_dir)
assert (parsed_from, parsed_to) == (a, c)
parsed_from, parsed_to, separator = parse_commit_range(f'{a}...{c}', temp_dir)
assert (parsed_from, parsed_to, separator) == (a, c, '...')

def test_open_right_linear_history(self) -> None:
"""For 'A..', expect (A,HEAD=C)."""
with temporary_git_repository() as (temp_dir, repo):
a, b, c = self._make_linear_history(repo, temp_dir)

parsed_from, parsed_to = parse_commit_range(f'{a}..', temp_dir)
assert (parsed_from, parsed_to) == (a, c)
parsed_from, parsed_to, separator = parse_commit_range(f'{a}..', temp_dir)
assert (parsed_from, parsed_to, separator) == (a, c, '..')

def test_open_left_linear_history(self) -> None:
"""For '..C' where HEAD==C, expect (HEAD=C,C)."""
with temporary_git_repository() as (temp_dir, repo):
a, b, c = self._make_linear_history(repo, temp_dir)

parsed_from, parsed_to = parse_commit_range(f'..{c}', temp_dir)
assert (parsed_from, parsed_to) == (c, c)
parsed_from, parsed_to, separator = parse_commit_range(f'..{c}', temp_dir)
assert (parsed_from, parsed_to, separator) == (c, c, '..')

def test_single_commit_spec(self) -> None:
"""For 'A', expect (A,HEAD=C)."""
with temporary_git_repository() as (temp_dir, repo):
a, b, c = self._make_linear_history(repo, temp_dir)

parsed_from, parsed_to = parse_commit_range(a, temp_dir)
assert (parsed_from, parsed_to) == (a, c)
parsed_from, parsed_to, separator = parse_commit_range(a, temp_dir)
assert (parsed_from, parsed_to, separator) == (a, c, '..')


class TestParsePreReceiveInput:
Expand Down Expand Up @@ -1047,3 +1048,58 @@ def test_initial_oldest_commit_without_parent_with_two_commits_returns_single_co
work_repo.close()
finally:
server_repo.close()


class TestCollectCommitRangeDiffDocuments:
"""Test the collect_commit_range_diff_documents function with various commit range formats."""

def test_collect_with_various_commit_range_formats(self) -> None:
"""Test that different commit range formats are normalized and work correctly."""
with temporary_git_repository() as (temp_dir, repo):
# Create three commits
a_file = os.path.join(temp_dir, 'a.txt')
with open(a_file, 'w') as f:
f.write('A')
repo.index.add(['a.txt'])
a_commit = repo.index.commit('A')

with open(a_file, 'a') as f:
f.write('B')
repo.index.add(['a.txt'])
b_commit = repo.index.commit('B')

with open(a_file, 'a') as f:
f.write('C')
repo.index.add(['a.txt'])
c_commit = repo.index.commit('C')

# Create mock context
mock_ctx = Mock()
mock_progress_bar = Mock()
mock_progress_bar.set_section_length = Mock()
mock_progress_bar.update = Mock()
mock_ctx.obj = {'progress_bar': mock_progress_bar}

# Test two-dot range - should collect documents from commits B and C (2 commits, 2 documents)
commit_range = f'{a_commit.hexsha}..{c_commit.hexsha}'
documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range)
assert len(documents) == 2, f'Expected 2 documents from range A..C, got {len(documents)}'
commit_ids_in_documents = {doc.unique_id for doc in documents if doc.unique_id}
assert b_commit.hexsha in commit_ids_in_documents
assert c_commit.hexsha in commit_ids_in_documents

# Test three-dot range - should collect documents from commits B and C (2 commits, 2 documents)
commit_range = f'{a_commit.hexsha}...{c_commit.hexsha}'
documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range)
assert len(documents) == 2, f'Expected 2 documents from range A...C, got {len(documents)}'

# Test parent notation with three-dot - should collect document from commit C (1 commit, 1 document)
commit_range = f'{c_commit.hexsha}~1...{c_commit.hexsha}'
documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range)
assert len(documents) == 1, f'Expected 1 document from range C~1...C, got {len(documents)}'
assert documents[0].unique_id == c_commit.hexsha

# Test single commit spec - should be interpreted as A..HEAD (commits B and C, 2 documents)
commit_range = a_commit.hexsha
documents = collect_commit_range_diff_documents(mock_ctx, temp_dir, commit_range)
assert len(documents) == 2, f'Expected 2 documents from single commit A, got {len(documents)}'