diff --git a/purl2vcs/src/purl2vcs/find_source_repo.py b/purl2vcs/src/purl2vcs/find_source_repo.py index 58e9a9b2..672a0c5d 100644 --- a/purl2vcs/src/purl2vcs/find_source_repo.py +++ b/purl2vcs/src/purl2vcs/find_source_repo.py @@ -201,6 +201,18 @@ def get_source_repo(package: Package) -> PackageURL: source_purls = list(convert_repo_urls_to_purls(repo_urls)) if not source_purls: return + # Filter out clearly unrelated repositories + pkg_name = package.name.lower() + filtered_source_purls = [] + + for purl in source_purls: + repo_name = (purl.name or "").lower() + if repo_name in pkg_name or pkg_name in repo_name: + filtered_source_purls.append(purl) + + if filtered_source_purls: + source_purls = filtered_source_purls + source_purls = list(set(source_purls)) source_purl_with_tag = find_package_version_tag_and_commit( version=package.version, source_purls=source_purls @@ -208,7 +220,6 @@ def get_source_repo(package: Package) -> PackageURL: if source_purl_with_tag: return source_purl_with_tag - def get_repo_urls(package: Package) -> Generator[str, None, None]: """ Return the URL of the source repository of a package diff --git a/purl2vcs/tests/test_find_source_repo.py b/purl2vcs/tests/test_find_source_repo.py index 53722576..4014a1a4 100644 --- a/purl2vcs/tests/test_find_source_repo.py +++ b/purl2vcs/tests/test_find_source_repo.py @@ -311,3 +311,25 @@ def test_from_purl_to_git(self): ) expected = "pkg:bitbucket/connect2id/oauth-2.0-sdk-with-openid-connect-extensions@9.36?commit=e86fb3431972d302fcb615aca0baed4d8ab89791" self.assertEqual(expected, response.data["git_repo"]) + + def test_filter_unrelated_repo_candidates(self): + """ + Ensure unrelated repository candidates are filtered when + detecting the source repository. + """ + + pkg_name = "inherits" + + source_purls = [ + PackageURL(type="github", namespace="substack", name="node-browserify"), + PackageURL(type="github", namespace="isaacs", name="inherits"), + ] + + filtered = [] + for purl in source_purls: + repo_name = (purl.name or "").lower() + if repo_name in pkg_name or pkg_name in repo_name: + filtered.append(purl) + + self.assertTrue(any(p.name == "inherits" for p in filtered)) + self.assertFalse(any(p.name == "node-browserify" for p in filtered))