diff --git a/minecode_pipelines/pipes/alpine.py b/minecode_pipelines/pipes/alpine.py index 009cf637..c819b9e4 100644 --- a/minecode_pipelines/pipes/alpine.py +++ b/minecode_pipelines/pipes/alpine.py @@ -364,6 +364,18 @@ "https://dl-cdn.alpinelinux.org/alpine/v3.9/main/x86_64/APKINDEX.tar.gz", ] +EDGE_APKINDEX_URLS = [url for url in ALPINE_LINUX_APKINDEX_URLS if "/edge/" in url] + +LATEST_STABLE_APKINDEX_URLS = [ + url for url in ALPINE_LINUX_APKINDEX_URLS if "/latest-stable/" in url +] + +VERSIONED_APKINDEX_URLS = [ + url + for url in ALPINE_LINUX_APKINDEX_URLS + if "/edge/" not in url and "/latest-stable/" not in url +] + def parse_email(text): """ @@ -545,16 +557,17 @@ def get_package_from_index(self, apkindex_url, logger=None): yield current_purl, [pd.purl] -def mine_and_publish_alpine_packageurls( +def mine_alpine_indexes( + index_urls, data_cluster, checked_out_repos, working_path, commit_msg_func, logger, ): - """Yield PackageURLs from Alpine index.""" + """Mine and publish Alpine package URLs from a list of index URLs.""" - index_count = len(ALPINE_LINUX_APKINDEX_URLS) + index_count = len(index_urls) progress = LoopProgress( total_iterations=index_count, logger=logger, @@ -563,7 +576,8 @@ def mine_and_publish_alpine_packageurls( logger(f"Mine PackageURL from {index_count:,d} alpine index.") alpine_collector = AlpineCollector() - for index in progress.iter(ALPINE_LINUX_APKINDEX_URLS): + + for index in progress.iter(index_urls): logger(f"Mine PackageURL from {index} index.") _mine_and_publish_packageurls( packageurls=alpine_collector.get_package_from_index(index), @@ -575,3 +589,40 @@ def mine_and_publish_alpine_packageurls( commit_msg_func=commit_msg_func, logger=logger, ) + + +def mine_and_publish_alpine_packageurls( + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, +): + """Mine PackageURLs from Alpine indexes.""" + + mine_alpine_indexes( + EDGE_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) + + mine_alpine_indexes( + LATEST_STABLE_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) + + mine_alpine_indexes( + VERSIONED_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) diff --git a/minecode_pipelines/tests/pipes/test_alpine.py b/minecode_pipelines/tests/pipes/test_alpine.py index ff0b91b3..2f33dd3b 100644 --- a/minecode_pipelines/tests/pipes/test_alpine.py +++ b/minecode_pipelines/tests/pipes/test_alpine.py @@ -27,3 +27,21 @@ def test_parse_apkindex_and_build_package(self): packages.append(pd.to_dict()) expected_loc = self.get_test_loc("alpine/expected_packages.json") check_against_expected_json_file(packages, expected_loc, regen=False) + + def test_alpine_url_grouping(self): + from minecode_pipelines.pipes.alpine import ( + ALPINE_LINUX_APKINDEX_URLS, + EDGE_APKINDEX_URLS, + LATEST_STABLE_APKINDEX_URLS, + VERSIONED_APKINDEX_URLS, + ) + + combined = EDGE_APKINDEX_URLS + LATEST_STABLE_APKINDEX_URLS + VERSIONED_APKINDEX_URLS + + # Ensure all URLs are included + assert sorted(combined) == sorted(ALPINE_LINUX_APKINDEX_URLS) + + # Ensure no overlap + assert not set(EDGE_APKINDEX_URLS) & set(LATEST_STABLE_APKINDEX_URLS) + assert not set(EDGE_APKINDEX_URLS) & set(VERSIONED_APKINDEX_URLS) + assert not set(LATEST_STABLE_APKINDEX_URLS) & set(VERSIONED_APKINDEX_URLS)