Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion apps/commons/mixins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Self, Tuple

from django.contrib.auth.models import Group, Permission
from django.contrib.contenttypes.models import ContentType
Expand Down Expand Up @@ -408,3 +408,23 @@ def get_slug(self) -> str:
if self.get_id_field_name(slug) != "slug":
slug = f"{self.slug_prefix}-{slug}"
return slug


class HasEmbending:
def vectorize(self):
if not getattr(self, "embedding", None):
model_embedding = type(self).embedding.related.related_model
self.embedding = model_embedding(item=self)
self.embedding.save()
self.embedding.vectorize()

def similars(self, threshold: float = 0.15) -> QuerySet[Self]:
"""return similars documents"""
if getattr(self, "embedding", None):
vector = self.embedding.embedding
model_embedding = type(self).embedding.related.related_model
queryset = type(self).objects.all()
return model_embedding.vector_search(vector, queryset, threshold).exclude(
pk=self.pk
)
return type(self).objects.none()
111 changes: 90 additions & 21 deletions services/crisalid/admin.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from contextlib import suppress
from typing import Any

from django.contrib import admin, messages
from django.db.models import Count
from django.db.models.query import QuerySet
from django.http.request import HttpRequest

from apps.accounts.models import ProjectUser
from apps.accounts.models import PeopleGroup, ProjectUser
from apps.commons.admin import TranslateObjectAdminMixin
from services.crisalid.manager import CrisalidQuerySet
from services.crisalid.tasks import vectorize_documents

from .models import (
Expand All @@ -13,9 +17,21 @@
DocumentContributor,
Identifier,
Researcher,
Structure,
)


class IdentifierAminMixin:
@admin.display(description="identifiers count", ordering="identifiers_count")
def get_identifiers(self, instance):
# list all harvester name from this profile
result = [o.harvester for o in instance.identifiers.all()]
if not result:
return None

return f"{', '.join(result)} ({len(result)})"


@admin.register(Identifier)
class IdentifierAdmin(admin.ModelAdmin):
list_display = ("harvester", "value", "get_researcher", "get_documents")
Expand Down Expand Up @@ -45,7 +61,7 @@ class DocumentContributorAdminInline(admin.StackedInline):


@admin.register(Document)
class DocumentAdmin(TranslateObjectAdminMixin, admin.ModelAdmin):
class DocumentAdmin(TranslateObjectAdminMixin, IdentifierAminMixin, admin.ModelAdmin):
list_display = (
"title",
"publication_date",
Expand Down Expand Up @@ -89,22 +105,16 @@ def get_queryset(self, request):
def get_contributors(self, instance):
return instance.contributors.count()

@admin.display(description="identifiers count", ordering="identifiers_count")
def get_identifiers(self, instance):
# list all harvester name from this profile
result = [o.harvester for o in instance.identifiers.all()]
if not result:
return None
return f"{', '.join(result)} ({len(result)})"


@admin.register(Researcher)
class ResearcherAdmin(admin.ModelAdmin):
class ResearcherAdmin(IdentifierAminMixin, admin.ModelAdmin):
list_display = (
"given_name",
"family_name",
"user",
"get_documents",
"get_memberships",
"get_employments",
"get_identifiers",
)
search_fields = (
Expand All @@ -124,6 +134,8 @@ def get_queryset(self, request):
.prefetch_related("identifiers", "documents")
.annotate(identifiers_count=Count("identifiers__id"))
.annotate(documents_count=Count("documents__id", distinct=True))
.annotate(memberships_count=Count("memberships__id", distinct=True))
.annotate(employments_count=Count("employments__id", distinct=True))
)

@admin.action(description="assign researcher on projects")
Expand All @@ -138,17 +150,18 @@ def assign_user(self, request, queryset):
continue

for identifier in research.identifiers.all():
if identifier.harvester != Identifier.Harvester.EPPN.value:
if identifier.harvester != Identifier.Harvester.LOCAL.value:
continue

user = None
email = identifier.value
with suppress(ProjectUser.DoesNotExist):
user = ProjectUser.objects.get(email=identifier.value)
user = ProjectUser.objects.get(email=email)

if not user:
created += 1
user = ProjectUser(
email=identifier.value,
email=email,
given_name=research.given_name,
family_name=research.family_name,
)
Expand Down Expand Up @@ -177,14 +190,70 @@ def assign_user(self, request, queryset):
def get_documents(self, instance):
return instance.documents_count

@admin.display(description="identifiers count", ordering="identifiers_count")
def get_identifiers(self, instance):
# list all harvester name from this profile
result = [o.harvester for o in instance.identifiers.all()]
if not result:
return None
@admin.display(description="number of memberships", ordering="-memberships_count")
def get_memberships(self, instance):
return instance.memberships_count

return f"{', '.join(result)} ({len(result)})"
@admin.display(description="number of employments", ordering="-employments_count")
def get_employments(self, instance):
return instance.employments_count


@admin.register(Structure)
class StructureAdmin(IdentifierAminMixin, admin.ModelAdmin):
list_display = (
"acronym",
"name",
"organization",
"get_memberships",
"get_employments",
"get_identifiers",
)
search_fields = ("acronym", "name", "organization__code")
autocomplete_fields = ("organization",)
actions = ("assign_group",)

def get_queryset(self, request: HttpRequest) -> QuerySet[Any]:
return (
super()
.get_queryset(request)
.select_related("organization")
.annotate(
memberships_count=Count("memberships__pk", distinct=True),
employments_count=Count("employments__pk", distinct=True),
)
)

@admin.action(description="create/update groups")
def assign_group(self, request, queryset: CrisalidQuerySet):
for structure in queryset:
name = structure.name or structure.acronym
if not name:
continue

parent = PeopleGroup.update_or_create_root(structure.organization)
group = PeopleGroup.objects.filter(
parent=parent, name=name, organization=structure.organization
).first()
if not group:
group = PeopleGroup(
name=name, parent=parent, organization=structure.organization
)

group.save()
member_group = group.get_members()
for membership in structure.memberships.select_related("user").filter(
user__isnull=False
):
membership.user.groups.add(member_group)

@admin.display(description="number of memberships", ordering="-memberships_count")
def get_memberships(self, instance):
return instance.memberships_count

@admin.display(description="number of employments", ordering="-employments_count")
def get_employments(self, instance):
return instance.employments_count


@admin.register(CrisalidConfig)
Expand Down
2 changes: 2 additions & 0 deletions services/crisalid/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def value(self):
Identifier.Harvester.EPPN: faker.unique.email(),
Identifier.Harvester.DOI: faker.unique.doi(),
Identifier.Harvester.PMID: faker.unique.url(),
Identifier.Harvester.NNS: faker.unique.uuid4(),
Identifier.Harvester.RNSR: faker.unique.uuid4(),
}[self.harvester]


Expand Down
19 changes: 16 additions & 3 deletions services/crisalid/management/commands/populate_crisalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
Identifier,
Researcher,
)
from services.crisalid.populates import PopulateDocument, PopulateResearcher
from services.crisalid.populates import (
PopulateDocument,
PopulateResearcher,
PopulateStructure,
)
from services.crisalid.populates.base import AbstractPopulate
from services.crisalid.utils.timer import timeit
from services.mistral.models import DocumentEmbedding
Expand All @@ -23,13 +27,13 @@ def add_arguments(self, parser):
parser.add_argument(
"organization",
choices=CrisalidConfig.objects.filter(
organization__code__isnull=False
organization__code__isnull=False, active=True
).values_list("organization__code", flat=True),
help="organization code",
)
parser.add_argument(
"command",
choices=("document", "researcher", "all"),
choices=("document", "researcher", "structure", "all"),
help="elements to populate",
)
parser.add_argument(
Expand Down Expand Up @@ -111,3 +115,12 @@ def handle(self, **options):
where={"external_EQ": False},
**options,
)

if command in ("all", "structure"):
populate = PopulateStructure(config)
self.populate_crisalid(
service,
populate,
query="organisations",
**options,
)
23 changes: 23 additions & 0 deletions services/crisalid/migrations/0003_alter_document_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 5.2.10 on 2026-02-10 14:11

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("crisalid", "0002_crisalidconfig_and_more"),
]

operations = [
migrations.AlterModelOptions(
name="document",
options={
"ordering": (
models.OrderBy(
models.F("publication_date"), descending=True, nulls_last=True
),
)
},
),
]
60 changes: 39 additions & 21 deletions services/crisalid/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
from django.db import models
from django.db.models.functions import Lower

from apps.commons.mixins import OrganizationRelated
from apps.commons.mixins import HasEmbending, OrganizationRelated
from apps.organizations.models import Organization
from services.crisalid import relators
from services.mistral.models import DocumentEmbedding
from services.translator.mixins import HasAutoTranslatedFields

from .manager import CrisalidQuerySet, DocumentQuerySet
Expand Down Expand Up @@ -58,6 +57,8 @@ class Harvester(models.TextChoices):
EPPN = "eppn"
DOI = "doi"
PMID = "pmid"
NNS = "nns"
RNSR = "rnsr"

harvester = models.CharField(max_length=50, choices=Harvester.choices)
value = models.CharField(max_length=255)
Expand Down Expand Up @@ -94,6 +95,12 @@ class Researcher(CrisalidDataModel):
)

objects = CrisalidQuerySet.as_manager()
memberships = models.ManyToManyField(
"crisalid.Structure", related_name="memberships"
)
employments = models.ManyToManyField(
"crisalid.Structure", related_name="employments"
)

def __str__(self):
if hasattr(self, "user") and self.user is not None:
Expand Down Expand Up @@ -121,7 +128,9 @@ class Meta:
]


class Document(OrganizationRelated, HasAutoTranslatedFields, CrisalidDataModel):
class Document(
HasEmbending, OrganizationRelated, HasAutoTranslatedFields, CrisalidDataModel
):
"""
Represents a research publicaiton (or 'document') in the Crisalid system.
"""
Expand Down Expand Up @@ -199,6 +208,10 @@ class DocumentType(models.TextChoices):

organization_query_string = "contributors__user__groups__organizations"

class Meta:
# order by publicattion date, and put "null date" at last
ordering = (models.F("publication_date").desc(nulls_last=True),)

def get_related_organizations(self):
"""organizations from user"""
return list(
Expand All @@ -217,24 +230,6 @@ def document_type_centralized(self) -> list[str]:
return vals
return [self.document_type]

def vectorize(self):
if not getattr(self, "embedding", None):
self.embedding = DocumentEmbedding(item=self)
self.embedding.save()
self.embedding.vectorize()

def similars(self, threshold: float = 0.15) -> DocumentQuerySet:
"""return similars documents"""
if getattr(self, "embedding", None):
vector = self.embedding.embedding
queryset = Document.objects.all()
return (
DocumentEmbedding.vector_search(vector, queryset, threshold)
.filter(document_type__in=self.document_type_centralized)
.exclude(pk=self.pk)
)
return Document.objects.none()

def save(self, *ar, **kw):
md = super().save(*ar, **kw)
# when we update models , re-calculate vectorize
Expand Down Expand Up @@ -285,6 +280,29 @@ def values(cls) -> Generator[tuple[str]]:
yield v


class Structure(OrganizationRelated, CrisalidDataModel):
acronym = models.TextField(null=True, blank=True)
name = models.TextField()
identifiers = models.ManyToManyField(
"crisalid.Identifier", related_name="structures"
)
organization = models.ForeignKey(
"organizations.Organization",
on_delete=models.CASCADE,
related_name="structures",
)
objects = CrisalidQuerySet.as_manager()
group = models.ForeignKey(
"accounts.PeopleGroup",
on_delete=models.SET_NULL,
null=True,
related_name="structure",
)

def __str__(self):
return self.name


class CrisalidConfig(OrganizationRelated, models.Model):
"""model for crisalid config with host/pass for connected to crisalid,
is linked to a one organization
Expand Down
Loading
Loading