From 11b7c00bc4978b82a01d36599d54fbdcdab82f7c Mon Sep 17 00:00:00 2001 From: Monal-Reddy Date: Mon, 16 Mar 2026 16:03:33 +0530 Subject: [PATCH 1/2] Split Alpine PURL mining pipeline into separate optional steps Signed-off-by: Monal-Reddy --- minecode_pipelines/pipes/alpine.py | 59 ++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/minecode_pipelines/pipes/alpine.py b/minecode_pipelines/pipes/alpine.py index 009cf637..c819b9e4 100644 --- a/minecode_pipelines/pipes/alpine.py +++ b/minecode_pipelines/pipes/alpine.py @@ -364,6 +364,18 @@ "https://dl-cdn.alpinelinux.org/alpine/v3.9/main/x86_64/APKINDEX.tar.gz", ] +EDGE_APKINDEX_URLS = [url for url in ALPINE_LINUX_APKINDEX_URLS if "/edge/" in url] + +LATEST_STABLE_APKINDEX_URLS = [ + url for url in ALPINE_LINUX_APKINDEX_URLS if "/latest-stable/" in url +] + +VERSIONED_APKINDEX_URLS = [ + url + for url in ALPINE_LINUX_APKINDEX_URLS + if "/edge/" not in url and "/latest-stable/" not in url +] + def parse_email(text): """ @@ -545,16 +557,17 @@ def get_package_from_index(self, apkindex_url, logger=None): yield current_purl, [pd.purl] -def mine_and_publish_alpine_packageurls( +def mine_alpine_indexes( + index_urls, data_cluster, checked_out_repos, working_path, commit_msg_func, logger, ): - """Yield PackageURLs from Alpine index.""" + """Mine and publish Alpine package URLs from a list of index URLs.""" - index_count = len(ALPINE_LINUX_APKINDEX_URLS) + index_count = len(index_urls) progress = LoopProgress( total_iterations=index_count, logger=logger, @@ -563,7 +576,8 @@ def mine_and_publish_alpine_packageurls( logger(f"Mine PackageURL from {index_count:,d} alpine index.") alpine_collector = AlpineCollector() - for index in progress.iter(ALPINE_LINUX_APKINDEX_URLS): + + for index in progress.iter(index_urls): logger(f"Mine PackageURL from {index} index.") _mine_and_publish_packageurls( packageurls=alpine_collector.get_package_from_index(index), @@ -575,3 +589,40 @@ def mine_and_publish_alpine_packageurls( commit_msg_func=commit_msg_func, logger=logger, ) + + +def mine_and_publish_alpine_packageurls( + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, +): + """Mine PackageURLs from Alpine indexes.""" + + mine_alpine_indexes( + EDGE_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) + + mine_alpine_indexes( + LATEST_STABLE_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) + + mine_alpine_indexes( + VERSIONED_APKINDEX_URLS, + data_cluster, + checked_out_repos, + working_path, + commit_msg_func, + logger, + ) From fd40474cc4161e6bcc7baba8fe10c863809c97f2 Mon Sep 17 00:00:00 2001 From: Monal-Reddy Date: Sat, 21 Mar 2026 12:15:46 +0530 Subject: [PATCH 2/2] adding tests Signed-off-by: Monal-Reddy --- minecode_pipelines/tests/pipes/test_alpine.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/minecode_pipelines/tests/pipes/test_alpine.py b/minecode_pipelines/tests/pipes/test_alpine.py index ff0b91b3..2f33dd3b 100644 --- a/minecode_pipelines/tests/pipes/test_alpine.py +++ b/minecode_pipelines/tests/pipes/test_alpine.py @@ -27,3 +27,21 @@ def test_parse_apkindex_and_build_package(self): packages.append(pd.to_dict()) expected_loc = self.get_test_loc("alpine/expected_packages.json") check_against_expected_json_file(packages, expected_loc, regen=False) + + def test_alpine_url_grouping(self): + from minecode_pipelines.pipes.alpine import ( + ALPINE_LINUX_APKINDEX_URLS, + EDGE_APKINDEX_URLS, + LATEST_STABLE_APKINDEX_URLS, + VERSIONED_APKINDEX_URLS, + ) + + combined = EDGE_APKINDEX_URLS + LATEST_STABLE_APKINDEX_URLS + VERSIONED_APKINDEX_URLS + + # Ensure all URLs are included + assert sorted(combined) == sorted(ALPINE_LINUX_APKINDEX_URLS) + + # Ensure no overlap + assert not set(EDGE_APKINDEX_URLS) & set(LATEST_STABLE_APKINDEX_URLS) + assert not set(EDGE_APKINDEX_URLS) & set(VERSIONED_APKINDEX_URLS) + assert not set(LATEST_STABLE_APKINDEX_URLS) & set(VERSIONED_APKINDEX_URLS)