Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions minecode/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from packagedb.models import PackageSet
from packagedb.models import Party
from packagedb.models import Resource

from packagedb.serializers import DependentPackageSerializer
from packagedb.serializers import PartySerializer

Expand Down Expand Up @@ -67,6 +68,23 @@ def add_package_to_scan_queue(package, pipelines=DEFAULT_PIPELINES, priority=0,
logger.debug(f" + Inserted ScannableURI\t: {uri}")


def _create_vcs_aliases(old_url, new_url):
try:
from purl2vcs.find_source_repo import convert_repo_urls_to_purls
from packagedb.models import VcsAlias

old_purls = list(convert_repo_urls_to_purls([old_url]))
new_purls = list(convert_repo_urls_to_purls([new_url]))

for old_purl in old_purls:
for new_purl in new_purls:
VcsAlias.objects.get_or_create(
old_vcs_purl=str(old_purl), new_vcs_purl=str(new_purl)
)
except Exception as e:
logger.error(f"Failed to create VcsAlias: {e}")


def merge_packages(existing_package, new_package_data, replace=False):
"""
Merge the data from the `new_package_data` mapping into the
Expand All @@ -82,7 +100,6 @@ def merge_packages(existing_package, new_package_data, replace=False):
field value is left unchanged in this case.
"""
existing_mapping = existing_package.to_dict()

# We remove `purl` from `existing_mapping` because we use the other purl
# fields (type, namespace, name, version, etc.) to generate the purl.
existing_mapping.pop("purl")
Expand Down Expand Up @@ -209,6 +226,10 @@ def merge_packages(existing_package, new_package_data, replace=False):
new_value = new_mapping.extra_data.get("package_content")
if not new_value:
continue
elif existing_field == "vcs_url" or existing_field == "homepage_url":
if existing_value and new_value and existing_value != new_value:
_create_vcs_aliases(existing_value, new_value)
# Continue normally to update the field
elif existing_field in fields_to_skip:
# Continue to next field
continue
Expand Down Expand Up @@ -243,7 +264,6 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena
merged = False
package = None
map_error = ""

mining_level = visit_level
if override:
# this will force the data override
Expand Down Expand Up @@ -396,6 +416,28 @@ def merge_or_create_package(scanned_package, visit_level, override=False, filena
if created:
created_package.append_to_history(f"New Package created from URI: {package_uri}")

older_packages = Package.objects.filter(
type=scanned_package.type or "",
namespace=scanned_package.namespace or "",
name=scanned_package.name or "",
).exclude(version=scanned_package.version)

if older_packages.exists():
older_package = older_packages.order_by("-pk").first()
if (
older_package.vcs_url
and created_package.vcs_url
and older_package.vcs_url != created_package.vcs_url
):
_create_vcs_aliases(older_package.vcs_url, created_package.vcs_url)
if (
older_package.homepage_url
and created_package.homepage_url
and older_package.homepage_url != created_package.homepage_url
):
# Some packages have their homepage url set to their vcs url, so we should create an alias for that too
_create_vcs_aliases(older_package.homepage_url, created_package.homepage_url)

# This is used in the case of Maven packages created from the priority queue
for h in history:
created_package.append_to_history(h)
Expand Down
26 changes: 26 additions & 0 deletions packagedb/migrations/0095_vcsalias.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Generated by Django 5.1.13 on 2026-02-23 17:03

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('packagedb', '0094_package_packagedb_p_package_d39839_idx'),
]

operations = [
migrations.CreateModel(
name='VcsAlias',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('old_vcs_purl', models.CharField(db_index=True, max_length=2048)),
('new_vcs_purl', models.CharField(db_index=True, max_length=2048)),
('created_date', models.DateTimeField(auto_now_add=True)),
],
options={
'indexes': [models.Index(fields=['old_vcs_purl'], name='packagedb_v_old_vcs_88807e_idx'), models.Index(fields=['new_vcs_purl'], name='packagedb_v_new_vcs_0f8a3b_idx')],
'unique_together': {('old_vcs_purl', 'new_vcs_purl')},
},
),
]
33 changes: 33 additions & 0 deletions packagedb/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,39 @@ def create_auth_token(sender, instance=None, created=False, **kwargs):
Token.objects.get_or_create(user_id=instance.pk)


class VcsAlias(models.Model):
old_vcs_purl = models.CharField(max_length=2048, db_index=True)
new_vcs_purl = models.CharField(max_length=2048, db_index=True)
created_date = models.DateTimeField(auto_now_add=True)

class Meta:
unique_together = ["old_vcs_purl", "new_vcs_purl"]
indexes = [
models.Index(fields=["old_vcs_purl"]),
models.Index(fields=["new_vcs_purl"]),
]

@classmethod
def resolve_purl(cls, vcs_purl_str):
"""
Given a VCS PURL string, follows the VcsAlias chain to find and return
the latest active PURL. Returns the original string if no alias exists.
"""
current_purl = vcs_purl_str
visited = set()

while current_purl not in visited:
visited.add(current_purl)
alias = cls.objects.filter(old_vcs_purl=current_purl).first()

if not alias:
break

current_purl = alias.new_vcs_purl

return current_purl


class PackageActivity(FederatedCodePackageActivityMixin):
"""Record of package activity from a FederatedCode."""

Expand Down