From ff443beb8fb3eb6e3be52fe720749fe352fc6a7a Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 26 Feb 2026 10:16:04 +0100 Subject: [PATCH 01/17] Added support for multiple labels in keywords --- tripper/datadoc/keywords.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index d0969e3d..56764024 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -1060,9 +1060,11 @@ def isproperty(v): else: r = self.data.resources[domainname].copy() resources[domainname] = r - r.keywords[label] = d + for lbl in [label] if isinstance(label, str) else label: + r.keywords[lbl] = d else: - resources[domainname].keywords[label] = d + for lbl in [label] if isinstance(label, str) else label: + resources[domainname].keywords[lbl] = d if "range" in value: _types = asseq(d.get("type", OWL.AnnotationProperty)) types = [expand_iri(t, p) for t in _types] From dcb1e08ddcc31e3168f36ce36f8d2cc631a328d8 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 26 Feb 2026 10:32:05 +0100 Subject: [PATCH 02/17] Remove duplicated labels in keyworkds --- tripper/datadoc/keywords.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index 56764024..d427b38b 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -1060,11 +1060,11 @@ def isproperty(v): else: r = self.data.resources[domainname].copy() resources[domainname] = r - for lbl in [label] if isinstance(label, str) else label: - r.keywords[lbl] = d + for l in [label] if isinstance(label, str) else set(label): + r.keywords[l] = d else: - for lbl in [label] if isinstance(label, str) else label: - resources[domainname].keywords[lbl] = d + for l in [label] if isinstance(label, str) else set(label): + resources[domainname].keywords[l] = d if "range" in value: _types = asseq(d.get("type", OWL.AnnotationProperty)) types = [expand_iri(t, p) for t in _types] From be46b4d68d9b7148b5c6981afa0c476deff3ed54 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Thu, 26 Feb 2026 10:49:49 +0100 Subject: [PATCH 03/17] Allow duplicated labels --- tripper/datadoc/keywords.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index d427b38b..f8638159 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -1031,7 +1031,8 @@ def isproperty(v): break else: label = iriname(k) - resources[label] = d + for lbl in [label] if isinstance(label, str) else set(label): + resources[lbl] = d clslabels[d.iri] = label # Add properties @@ -1149,7 +1150,7 @@ def _load_rdf( SELECT DISTINCT ?s WHERE { VALUES ?o { owl:DatatypeProperty owl:ObjectProperty owl:AnnotationProperty - rdf:Property + rdf:Property owl:Class } ?s a ?o . } From 2f1db7c14d629d95730468c7e4209db96441730b Mon Sep 17 00:00:00 2001 From: "Francesca.L.Bleken@sintef.no" Date: Mon, 2 Mar 2026 12:19:44 +0100 Subject: [PATCH 04/17] Do not include blank nodes when testing for klasses in keywords --- tests/datadoc/test_keywords.py | 11 +++++++++++ tripper/datadoc/keywords.py | 1 + 2 files changed, 12 insertions(+) diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index 25a2c0e1..e957d6cc 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -502,12 +502,23 @@ def test_load2(): "hasChild", "hasName", } + print(kw.classnames()) assert set(kw.classnames()) == { "Person", "Parent", "Child", "Skill", "Resource", + "Age", + "Dauther", + "Father", + "Female", + "Male", + "Mother", + "Name", + "Property", + "Son", + "Weight", } d = kw["hasAge"] assert d.iri == "fam:hasAge" diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index f8638159..fbe43851 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -1153,6 +1153,7 @@ def _load_rdf( rdf:Property owl:Class } ?s a ?o . + FILTER(isIRI(?s)) } """ iris = [iri[0] for iri in ts.query(query)] From 2d2a930400dd41b8e43dff8aeff78f6408021c02 Mon Sep 17 00:00:00 2001 From: "Francesca L. Bleken" <48128015+francescalb@users.noreply.github.com> Date: Mon, 2 Mar 2026 17:13:52 +0100 Subject: [PATCH 05/17] Apply suggestion from @francescalb --- tests/datadoc/test_keywords.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index e957d6cc..444cb601 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -502,7 +502,6 @@ def test_load2(): "hasChild", "hasName", } - print(kw.classnames()) assert set(kw.classnames()) == { "Person", "Parent", From 79b7ca814ddc6744b531f90cd181acf71f243fe4 Mon Sep 17 00:00:00 2001 From: "Francesca.L.Bleken@sintef.no" Date: Tue, 3 Mar 2026 12:39:45 +0100 Subject: [PATCH 06/17] Allow epansion on individuals (not in context) when expanding restrictions --- tripper/datadoc/dataset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index 34fee424..77b3c3be 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -708,6 +708,7 @@ def addrestriction(source, prop, value): update_restrictions(value, context, restrictions) # Local context + context = get_context(context=context) if "@context" in source: context = context.copy() @@ -717,8 +718,8 @@ def addrestriction(source, prop, value): restrictions = infer_restriction_types(source, context) else: restrictions = { - "*" if ckey == "*" else context.expand(ckey, strict=True): { - context.expand(pkey, strict=True): pval + "*" if ckey == "*" else context.expand(ckey, strict=False): { + context.expand(pkey, strict=False): pval for pkey, pval in cval.items() } for ckey, cval in restrictions.items() @@ -736,6 +737,7 @@ def addrestriction(source, prop, value): # Ensure that source is only of type owl:Class # Move all other types to subClassOf types = {context.expand(t): t for t in get(source, "@type")} + if OWL.Class in types: for e, t in types.items(): if e == OWL.Class: From dc1264f0a6d73434ad0b55830ebe527be5ac126e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Tue, 3 Mar 2026 13:11:08 +0100 Subject: [PATCH 07/17] Added new function: update_context() --- tripper/datadoc/dataset.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index 77b3c3be..da5aaf07 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -309,6 +309,10 @@ def addsuperclasses(d, cls): if not k.startswith("@") and k not in keywords: # pylint: disable=logging-fstring-interpolation logging.info(f"Property not in keywords: {k}") + + if k == "subClassOf": + add(d, "@type", OWL.Class) + if k in ("@context", "@id", "@type"): pass elif k == "@graph": @@ -514,6 +518,35 @@ def _isclass(d, context): ) +def update_context( + source: "Union[dict, list]", + context: "Context", +) -> None: + """Update `context` with information from `source`. + + Currently this only adds classes defined in `source` to `context`. + """ + sources = ( + source + if isinstance(source, list) + else source["@graph"] if "@graph" in source else [source] + ) + prefixes = context.get_prefixes() + for d in sources: + for k, v in d.items(): + if k == "@graph" or isinstance(v, dict): + update_context(v, context) + elif k == "subClassOf": + context.add_context( + { + k: { + "@id": expand_iri(k, prefixes, strict=True), + "@type": OWL.Class, + } + } + ) + + def infer_restriction_types( source: "Union[dict, list]", context: "Optional[Context]" = None, @@ -708,7 +741,6 @@ def addrestriction(source, prop, value): update_restrictions(value, context, restrictions) # Local context - context = get_context(context=context) if "@context" in source: context = context.copy() @@ -737,7 +769,6 @@ def addrestriction(source, prop, value): # Ensure that source is only of type owl:Class # Move all other types to subClassOf types = {context.expand(t): t for t in get(source, "@type")} - if OWL.Class in types: for e, t in types.items(): if e == OWL.Class: From 6138d7b1fe20b2f2b5a06b16a0f5a09724349d56 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Tue, 3 Mar 2026 22:23:15 +0100 Subject: [PATCH 08/17] Updated tests --- tests/datadoc/test_dataset.py | 96 ++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index 5534f5db..19c9b0a3 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -1,6 +1,7 @@ """Test the dataset module.""" # pylint: disable=invalid-name,too-many-locals,duplicate-code +# pylint: disable=too-many-lines import pytest @@ -440,7 +441,7 @@ def test_infer_restriction_types(): "@id": "ex:MyDevice", # "@type": "owl:Class", "subClassOf": HUME.Device, - "hasPart": HUME.MeasuringInstrument, + "hasPart": [HUME.MeasuringInstrument, "ex:MyDevice"], }, ], } @@ -565,6 +566,13 @@ def test_update_restrictions(): "@type": HUME.Device, "isDefinedBy": HUME.MeasuringInstrument, }, + { + # An individial relating to two classes and an individual. + # Should be converted to an existential restriction. + "@id": "ex:instr3", + "@type": HUME.Device, + "hasPart": [HUME.MeasuringInstrument, "MyDevice", "ex:instr"], + }, { # A class relating to a class. # Should be converted to an existential restriction. @@ -574,54 +582,70 @@ def test_update_restrictions(): "@id": "ex:MyDevice", # "@type": "owl:Class", "subClassOf": HUME.Device, + "label": "MyDevice", "hasPart": HUME.MeasuringInstrument, }, + { + # A class relating to two classes + "@id": "ex:MyDevice2", + "@type": "owl:Class", + "subClassOf": HUME.Device, + "label": "MyDevice2", + "hasPart": [HUME.MeasuringInstrument, "MyDevice"], + }, ], } r6 = deepcopy(d6) update_restrictions(r6, ctx) - assert r6 == { - "@context": { - "MeasuringInstrument": { - "@id": "https://w3id.org/emmo/hume#MeasuringInstrument", - "@type": "owl:Class", - } - }, - "@graph": [ + res6 = {d["@id"]: d for d in r6["@graph"]} + assert res6["ex:instr"] == { + "@id": "ex:instr", + "@type": "https://w3id.org/emmo/hume#Device", + "isDefinedBy": "https://w3id.org/emmo/hume#MeasuringSystem", + } + assert res6["ex:instr2"] == { + "@id": "ex:instr2", + "@type": [ + "https://w3id.org/emmo/hume#Device", { - "@id": "ex:instr", - "@type": "https://w3id.org/emmo/hume#Device", - "isDefinedBy": "https://w3id.org/emmo/hume#MeasuringSystem", + "rdf:type": "owl:Restriction", + "owl:onProperty": ( + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy" + ), + "owl:someValuesFrom": ( + "https://w3id.org/emmo/hume#MeasuringInstrument" + ), }, + ], + } + assert res6["ex:instr3"] == { + "@id": "ex:instr3", + "@type": [ + "https://w3id.org/emmo/hume#Device", { - "@id": "ex:instr2", - "@type": [ - "https://w3id.org/emmo/hume#Device", - { - "rdf:type": "owl:Restriction", - "owl:onProperty": ( - "http://www.w3.org/2000/01/rdf-schema#isDefinedBy" - ), - "owl:someValuesFrom": ( - "https://w3id.org/emmo/hume#MeasuringInstrument" - ), - }, - ], + "rdf:type": "owl:Restriction", + "owl:onProperty": ( + "http://www.w3.org/2000/01/rdf-schema#isDefinedBy" + ), + "owl:someValuesFrom": ( + "https://w3id.org/emmo/hume#MeasuringInstrument" + ), }, + ], + } + assert res6["ex:MyDevice"] == { + "@id": "ex:MyDevice", + "subClassOf": [ + "https://w3id.org/emmo/hume#Device", { - "@id": "ex:MyDevice", - "subClassOf": [ - "https://w3id.org/emmo/hume#Device", - { - "rdf:type": "owl:Restriction", - "owl:onProperty": "http://purl.org/dc/terms/hasPart", - "owl:someValuesFrom": ( - "https://w3id.org/emmo/hume#MeasuringInstrument" - ), - }, - ], + "rdf:type": "owl:Restriction", + "owl:onProperty": "http://purl.org/dc/terms/hasPart", + "owl:someValuesFrom": ( + "https://w3id.org/emmo/hume#MeasuringInstrument" + ), }, ], + "label": "MyDevice", } From 89981df30888eb4782f05efea79e253f1343f4d0 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 9 Mar 2026 09:14:48 +0100 Subject: [PATCH 09/17] latest state --- tests/datadoc/test_context.py | 16 ++++++++++ tripper/datadoc/context.py | 24 +++++++++++++++ tripper/datadoc/dataset.py | 8 +++++ tripper/datadoc/errors.py | 4 +++ tripper/datadoc/keywords.py | 58 ++++++++++++++++++++++++++++++++--- tripper/datadoc/utils.py | 2 +- 6 files changed, 107 insertions(+), 5 deletions(-) diff --git a/tests/datadoc/test_context.py b/tests/datadoc/test_context.py index 98dd359e..761b309b 100644 --- a/tests/datadoc/test_context.py +++ b/tests/datadoc/test_context.py @@ -94,6 +94,22 @@ def test_get_prefixes(): assert "mediaType" not in prefixes +def test_get_properties(): + """Test get_prefixes() method.""" + properties = ctx.get_properties() + assert "adms" not in properties + assert properties["mediaType"] == "http://www.w3.org/ns/dcat#mediaType" + assert "Document" not in properties + + +def test_get_classes(): + """Test get_prefixes() method.""" + classes = ctx.get_classes() + assert "adms" not in classes + assert "mediaType" not in classes + assert classes["Document"] == "http://xmlns.com/foaf/0.1/Document" + + def test_sync_prefixes(): """Test sync_prefixes() method.""" from tripper import Triplestore diff --git a/tripper/datadoc/context.py b/tripper/datadoc/context.py index 2411f16d..381b4c75 100644 --- a/tripper/datadoc/context.py +++ b/tripper/datadoc/context.py @@ -12,6 +12,7 @@ from tripper import OWL, RDF, RDFS, Triplestore from tripper.datadoc.errors import InvalidContextError, PrefixMismatchError +from tripper.datadoc.utils import asseq from tripper.errors import NamespaceError, NamespaceWarning from tripper.utils import MATCH_IRI, MATCH_PREFIXED_IRI, openfile, prefix_iri @@ -75,6 +76,7 @@ def get_context( ) if prefixes: context.add_context({k: str(v) for k, v in prefixes.items()}) + return context @@ -205,6 +207,8 @@ def rec(dct): d[k] = rec(v) return d + print("***", rec(context)) + self.ctx = self.ld.process_context(self.ctx, rec(context), options={}) # Clear caches @@ -255,6 +259,26 @@ def get_prefixes(self) -> dict: prefixes[k] = v["@id"] return prefixes + def get_properties(self) -> dict: + """Return a dict mapping classes to IRIs.""" + return { + k: v["@id"] + for k, v in self.ctx["mappings"].items() + if "@id" in v + and v.get("_prefix") is False + and OWL.Class not in asseq(v.get("@type")) + } + + def get_classes(self) -> dict: + """Return a dict mapping classes to IRIs.""" + return { + k: v["@id"] + for k, v in self.ctx["mappings"].items() + if "@id" in v + and v.get("_prefix") is False + and OWL.Class in asseq(v.get("@type")) + } + def sync_prefixes( self, ts: Triplestore, update: "Optional[bool]" = None ) -> None: diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index da5aaf07..c3a1306c 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -158,6 +158,8 @@ def told( Dict with an updated copy of `descr` as valid JSON-LD. """ + # pylint: disable=too-many-statements + single = "@id", "@type", "@graph" multi = "keywordfile", "prefixes", "base" singlerepr = isinstance(descr, list) or any(s in descr for s in single) @@ -174,6 +176,10 @@ def told( ) else: keywords = get_keywords(keywords=keywords) + + if prefixes: + keywords.add(prefixes, redefine="allow") + resources = keywords.data.resources # Whether the context has been copied. Used within addcontext() @@ -421,6 +427,7 @@ def store( context=context, prefixes=prefixes, default_theme=None, + copy=True, # we are calling update_context() below ) doc = told( @@ -430,6 +437,7 @@ def store( context=context, prefixes=prefixes, ) + update_context(doc, context) docs = doc if isinstance(doc, list) else doc.get("@graph", [doc]) for d in docs: diff --git a/tripper/datadoc/errors.py b/tripper/datadoc/errors.py index e14344df..7db0441f 100644 --- a/tripper/datadoc/errors.py +++ b/tripper/datadoc/errors.py @@ -50,6 +50,10 @@ class ParseError(TripperError): """Error when parsing a file.""" +class InconsistentKeywordError(DatadocValueError): + """Inconsistent keyword.""" + + # ========== # Warnings # ========== diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index fbe43851..6611db78 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -23,6 +23,7 @@ from tripper import DDOC, OWL, RDF, RDFS, XSD, Triplestore from tripper.datadoc.errors import ( DatadocValueError, + InconsistentKeywordError, InvalidDatadocError, InvalidKeywordError, MissingKeyError, @@ -48,6 +49,8 @@ if TYPE_CHECKING: # pragma: no cover from typing import IO, Any, Iterable, List, Optional, Set, Tuple, Union + from tripper.datadoc.context import ContextType + FileLoc = Union[Path, str] KeywordsType = Union["Keywords", dict, IO, Path, str, Sequence] @@ -77,7 +80,7 @@ def get_keywords( keywords: "Optional[KeywordsType]" = None, format: "Optional[str]" = None, theme: "Optional[Union[str, Sequence[str]]]" = "ddoc:datadoc", - yamlfile: "Optional[FileLoc]" = None, + context: "Optional[ContextType]" = None, timeout: float = 3, strict: bool = False, redefine: str = "raise", @@ -89,9 +92,7 @@ def get_keywords( format: Format of input if `keywords` refer to a file that can be loaded. theme: IRI of one of more themes to load keywords for. - yamlfile: YAML file with keyword definitions to parse. May also - be an URI in which case it will be accessed via HTTP GET. - Deprecated. Use the `load_yaml()` or `add()` methods instead. + context: Initialise from this Context instance. timeout: Timeout in case `yamlfile` is a URI. strict: Whether to raise an `InvalidKeywordError` exception if `d` contains an unknown key. @@ -106,6 +107,25 @@ def get_keywords( Returns: Keywords instance. """ + # pylint: disable=import-outside-toplevel + from tripper.datadoc.context import get_context + + if keywords: + if isinstance(keywords, Keywords): + kw = keywords + else: + kw = Keywords(theme=theme) + if keywords: + kw.add( + keywords, + format=format, + timeout=timeout, + strict=strict, + redefine=redefine, + ) + elif context: + pass # work-in-progress... + if isinstance(keywords, Keywords): kw = keywords else: @@ -129,6 +149,36 @@ def get_keywords( yamlfile, timeout=timeout, strict=strict, redefine=redefine ) + if context: + # Context is provided + context = get_context(context) + if keywords or yamlfile: + # If keywords or yamlfile are also provided, then we only + # add missing prefixes from context. + for prefix, ns in context.get_prefixes(): + kw.add_prefix(prefix, ns, replace=False) + else: + # If keywords or yamlfile are not provided, overwrite + # prefixes. + # For now we raise an exception for inconsistent terms. + for prefix, ns in context.get_prefixes(): + kw.add_prefix(prefix, ns, replace=True) + d = kw.get_context() + for name, iri in context.get_mappings().items(): + if name in d and ( + (isinstance(d[name], str) and iri != d[name]) + or iri != d[name].get("@id") + ): + # TODO: if this exception become a problem, we + # could remove the inconsistent terms from + # keywords. + # However, this requires implementation of a + # method(s) for removing properties and classes from + # a Keywords object. + raise InconsistentKeywordError( + f"Inconsistent IRI for keyword '{name}' in context " + f"({iri}) and keywords object ({d[name]})." + ) return kw diff --git a/tripper/datadoc/utils.py b/tripper/datadoc/utils.py index fd5933e1..ebc9c896 100644 --- a/tripper/datadoc/utils.py +++ b/tripper/datadoc/utils.py @@ -184,7 +184,7 @@ def get( def asseq(value: "Union[str, Sequence]") -> "Sequence": """Returns a string or sequence as an iterable.""" - return [value] if isinstance(value, str) else value + return [value] if isinstance(value, str) else value if value else [] def iriname(value: str) -> str: From d6615fc7252cf431c2b627710d997b11ed6761ac Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 9 Mar 2026 12:25:08 +0100 Subject: [PATCH 10/17] cleanup --- tests/datadoc/test_dataset.py | 34 ++++++++++++ tripper/datadoc/context.py | 16 ++++-- tripper/datadoc/keywords.py | 102 ++++++++++++++++------------------ 3 files changed, 94 insertions(+), 58 deletions(-) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index 19c9b0a3..a3d3bbae 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -391,6 +391,40 @@ def test_store(): } +def test_update_context(): + """Test update_context().""" + from tripper import HUME + from tripper.datadoc import get_context + from tripper.datadoc.dataset import told, update_context + + sources = { + "@context": { + "MeasuringInstrument": { + "@id": HUME.MeasuringInstrument, + "@type": "owl:Class", + }, + }, + "@graph": [ + { + # Not inferred, since hume:MeasuringSystem is not in context + "@id": "ex:instr", + "@type": HUME.Device, + "isDefinedBy": HUME.MeasuringSystem, + }, + { + "@id": "ex:instr2", + "isDefinedBy": HUME.MeasuringInstrument, + }, + { + "@id": "ex:MyDevice", + # "@type": "owl:Class", + "subClassOf": HUME.Device, + "hasPart": [HUME.MeasuringInstrument, "ex:MyDevice"], + }, + ], + } + + def test_infer_restriction_types(): """Test infer_restriction_types().""" from tripper import DCTERMS, HUME, RDFS, Namespace diff --git a/tripper/datadoc/context.py b/tripper/datadoc/context.py index 381b4c75..0e2dd971 100644 --- a/tripper/datadoc/context.py +++ b/tripper/datadoc/context.py @@ -207,8 +207,6 @@ def rec(dct): d[k] = rec(v) return d - print("***", rec(context)) - self.ctx = self.ld.process_context(self.ctx, rec(context), options={}) # Clear caches @@ -260,7 +258,7 @@ def get_prefixes(self) -> dict: return prefixes def get_properties(self) -> dict: - """Return a dict mapping classes to IRIs.""" + """Return a dict mapping property names to IRIs.""" return { k: v["@id"] for k, v in self.ctx["mappings"].items() @@ -269,8 +267,18 @@ def get_properties(self) -> dict: and OWL.Class not in asseq(v.get("@type")) } + def get_object_properties(self) -> dict: + """Return a dict mapping object property names to IRIs.""" + return { + k: v["@id"] + for k, v in self.ctx["mappings"].items() + if "@id" in v + and v.get("_prefix") is False + and v.get("@type") == "@id" + } + def get_classes(self) -> dict: - """Return a dict mapping classes to IRIs.""" + """Return a dict mapping class names to IRIs.""" return { k: v["@id"] for k, v in self.ctx["mappings"].items() diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index 6611db78..2339d661 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -23,7 +23,6 @@ from tripper import DDOC, OWL, RDF, RDFS, XSD, Triplestore from tripper.datadoc.errors import ( DatadocValueError, - InconsistentKeywordError, InvalidDatadocError, InvalidKeywordError, MissingKeyError, @@ -110,6 +109,50 @@ def get_keywords( # pylint: disable=import-outside-toplevel from tripper.datadoc.context import get_context + def from_context(): + """Return a keywords dict from context.""" + warnings.warn( + "Adding keywords from context - information may be lost. " + "Classes are added to the root and properties to 'Resource'." + ) + prefixes = context.get_prefixes() + classes = context.get_classes() + properties = context.get_properties() + d = {} + if prefixes: + d["prefixes"] = prefixes + if classes: + d["resources"] = { + name: {"iri": iri} for name, iri in classes.items() + } + if properties: + props = {} + ctx = context.get_context_dict() + for name, iri in properties.items(): + if ctx[name]["@type"] == "@id": + props[name] = {"iri": iri} + else: + props[name] = { + "iri": iri, + "range": "rdf:Literal", + "datatype": ctx[name]["@type"], + } + if "resources" not in d: + d["resources"] = {} + d["resources"]["Resource"] = { + "iri": "dcat:Resource", + "keywords": props, + } + return d + + if context: + context = get_context(context) + + # If keywords AND context is given, the "redefine" argument + # determine whether the context can overwrite the keywords. + # + # If only context is given, we create a default keywords (from + # theme) and overwrite it with the context. if keywords: if isinstance(keywords, Keywords): kw = keywords @@ -123,62 +166,13 @@ def get_keywords( strict=strict, redefine=redefine, ) - elif context: - pass # work-in-progress... - - if isinstance(keywords, Keywords): - kw = keywords + if context: + kw.add(from_context(), redefine=redefine) else: kw = Keywords(theme=theme) - if keywords: - kw.add( - keywords, - format=format, - timeout=timeout, - strict=strict, - redefine=redefine, - ) - - if yamlfile: - warnings.warn( - "The `yamlfile` argument is deprecated. Use the `load_yaml()` or " - "`add()` methods instead.", - DeprecationWarning, - ) - kw.load_yaml( - yamlfile, timeout=timeout, strict=strict, redefine=redefine - ) + if context: + kw.add(from_context(), redefine="allow") - if context: - # Context is provided - context = get_context(context) - if keywords or yamlfile: - # If keywords or yamlfile are also provided, then we only - # add missing prefixes from context. - for prefix, ns in context.get_prefixes(): - kw.add_prefix(prefix, ns, replace=False) - else: - # If keywords or yamlfile are not provided, overwrite - # prefixes. - # For now we raise an exception for inconsistent terms. - for prefix, ns in context.get_prefixes(): - kw.add_prefix(prefix, ns, replace=True) - d = kw.get_context() - for name, iri in context.get_mappings().items(): - if name in d and ( - (isinstance(d[name], str) and iri != d[name]) - or iri != d[name].get("@id") - ): - # TODO: if this exception become a problem, we - # could remove the inconsistent terms from - # keywords. - # However, this requires implementation of a - # method(s) for removing properties and classes from - # a Keywords object. - raise InconsistentKeywordError( - f"Inconsistent IRI for keyword '{name}' in context " - f"({iri}) and keywords object ({d[name]})." - ) return kw From 328861f4ca12324b6d5d58fd816ee2f348b54c12 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 9 Mar 2026 12:49:38 +0100 Subject: [PATCH 11/17] Updated test --- tests/datadoc/test_dataset.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index 9ac23da3..26682e7b 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -665,29 +665,27 @@ def test_update_restrictions(): } assert res6["ex:instr3"] == { # WRONG! Should be converted to restrictions - '@id': 'ex:instr3', - '@type': 'https://w3id.org/emmo/hume#Device', - 'hasPart': [ - 'https://w3id.org/emmo/hume#MeasuringInstrument', - 'MyDevice', - 'ex:instr' - ] + "@id": "ex:instr3", + "@type": "https://w3id.org/emmo/hume#Device", + "hasPart": [ + "https://w3id.org/emmo/hume#MeasuringInstrument", + "MyDevice", + "ex:instr", + ], } assert res6["ex:MyDevice"] == { - '@id': 'ex:MyDevice', - 'subClassOf': [ - 'https://w3id.org/emmo/hume#Device', + "@id": "ex:MyDevice", + "subClassOf": [ + "https://w3id.org/emmo/hume#Device", { - '@type': 'owl:Restriction', - 'owl:onProperty': { - '@id': 'http://purl.org/dc/terms/hasPart' + "@type": "owl:Restriction", + "owl:onProperty": {"@id": "http://purl.org/dc/terms/hasPart"}, + "owl:someValuesFrom": { + "@id": "https://w3id.org/emmo/hume#MeasuringInstrument" }, - 'owl:someValuesFrom': { - '@id': 'https://w3id.org/emmo/hume#MeasuringInstrument' - } - } + }, ], - 'label': 'MyDevice' + "label": "MyDevice", } From 636c4bd65bdb5065881292a64fd1ffc039469a24 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 9 Mar 2026 14:54:26 +0100 Subject: [PATCH 12/17] Fixed expansion of subPropertyOf --- LICENSE | 2 +- tripper/backends/ontopy.py | 2 ++ tripper/datadoc/dataset.py | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/LICENSE b/LICENSE index 62826d5b..ed208512 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022-2025 SINTEF +Copyright (c) 2022-2026 SINTEF Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/tripper/backends/ontopy.py b/tripper/backends/ontopy.py index 5cf8d393..351f467e 100644 --- a/tripper/backends/ontopy.py +++ b/tripper/backends/ontopy.py @@ -11,6 +11,8 @@ from tripper.literal import Literal +## + try: from ontopy.ontology import Ontology, _unabbreviate, get_ontology except ImportError as exc: diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index 70e79c73..8042fd55 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -563,7 +563,6 @@ def update_context( if isinstance(source, list) else source["@graph"] if "@graph" in source else [source] ) - prefixes = context.get_prefixes() for d in sources: for k, v in d.items(): if k == "@graph" or isinstance(v, dict): @@ -572,7 +571,7 @@ def update_context( context.add_context( { k: { - "@id": expand_iri(k, prefixes, strict=True), + "@id": context.expand(k, strict=True), "@type": OWL.Class, } } From f2e53becc48e645a35b56c60f2a524aed1f703e7 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 9 Mar 2026 16:40:16 +0100 Subject: [PATCH 13/17] Fixed test_infer_restriction_types() --- tests/datadoc/test_dataset.py | 5 ++++- tripper/datadoc/dataset.py | 9 +++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index 26682e7b..db8c5983 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -404,6 +404,9 @@ def test_store(): def test_update_context(): """Test update_context().""" + # WORK-IN-PROGRESS + # pylint: disable=unused-variable,unused-import + from tripper import HUME from tripper.datadoc import get_context from tripper.datadoc.dataset import told, update_context @@ -460,7 +463,7 @@ def test_infer_restriction_types(): "http://example.org#A": { DCTERMS.creator: "some", DCTERMS.hasPart: "value", - DCTERMS.issued: "value", + # DCTERMS.issued: "value", } } diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index 8042fd55..e7211d25 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -675,12 +675,9 @@ def infer_restriction_types( ): vexp = context.expand(v, strict=False) d[kexp] = "some" if _isclass(vexp, context) else "value" - elif not isinstance(v, str) or ( - context - and kexp in context - and not context.is_annotation_property(kexp) - ): - d[kexp] = "value" + elif isinstance(v, list): + if any(_isclass(e, context) for e in v): + d[kexp] = "some" elif _isclass(v, context): d[kexp] = "some" if d: From a4556e211c663951b0f443048629899260c59638 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Tue, 10 Mar 2026 19:04:03 +0100 Subject: [PATCH 14/17] Added missing tests --- tests/datadoc/test_context.py | 17 ++++++++++++++--- tests/datadoc/test_keywords.py | 19 ++++++++++++++++++- tripper/datadoc/keywords.py | 6 +++--- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/tests/datadoc/test_context.py b/tests/datadoc/test_context.py index 761b309b..f19c75cf 100644 --- a/tests/datadoc/test_context.py +++ b/tests/datadoc/test_context.py @@ -95,11 +95,22 @@ def test_get_prefixes(): def test_get_properties(): - """Test get_prefixes() method.""" + """Test get_properties() method.""" properties = ctx.get_properties() - assert "adms" not in properties + assert "adms" not in properties # prefix is not a property + assert "Document" not in properties # class is not a property assert properties["mediaType"] == "http://www.w3.org/ns/dcat#mediaType" - assert "Document" not in properties + + +def test_get_object_properties(): + """Test get_object_properties() method.""" + from tripper import DCTERMS + + objprop = ctx.get_object_properties() + assert "adms" not in objprop # prefix is not an object property + assert "Document" not in objprop # class is not an object property + assert "title" not in objprop # annotation is not an object property + assert objprop["hasPart"] == DCTERMS.hasPart def test_get_classes(): diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index 444cb601..661f396d 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -17,7 +17,7 @@ def test_get_keywords(): from dataset_paths import testdir # pylint: disable=import-error from tripper import DDOC - from tripper.datadoc import get_keywords + from tripper.datadoc import get_context, get_keywords kw1 = get_keywords() assert kw1.data == keywords.data @@ -65,6 +65,23 @@ def test_get_keywords(): assert kw6.data.theme == ["ddoc:datadoc", "ddoc:prefixes", "ddoc:process"] assert "batchNumber" in kw6 + kw7 = get_keywords(theme=None) + assert len(kw7) == 0 + kw7.add({"resources": {"MyClass": {"iri": "http://example.com/MyClass"}}}) + assert len(kw7) == 0 # no keywords (properties) + + ctx = get_context(default_theme=None) + ctx.add_context({"ex": "http://example.com/"}) + kw8 = get_keywords(kw7, context=ctx, theme=None) + assert len(kw8) == 0 # no keywords (properties) + assert kw8.get_prefixes()["ex"] == "http://example.com/" + assert kw8.classnames() == ["MyClass"] + + kw9 = get_keywords(context=ctx, theme=None) + assert len(kw9) == 0 # no keywords (properties) + assert kw9.get_prefixes()["ex"] == "http://example.com/" + assert kw9.classnames() == [] + def test_iter(): """Test __iter__() method.""" diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index 2339d661..bdebe2af 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -153,7 +153,7 @@ def from_context(): # # If only context is given, we create a default keywords (from # theme) and overwrite it with the context. - if keywords: + if keywords is not None: if isinstance(keywords, Keywords): kw = keywords else: @@ -166,11 +166,11 @@ def from_context(): strict=strict, redefine=redefine, ) - if context: + if context is not None: kw.add(from_context(), redefine=redefine) else: kw = Keywords(theme=theme) - if context: + if context is not None: kw.add(from_context(), redefine="allow") return kw From 5b51fb5db9c6d1f13c872d79da38cea2d595c416 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Tue, 10 Mar 2026 23:23:44 +0100 Subject: [PATCH 15/17] Added more tests and ignored some warnings --- tests/datadoc/test_datadoc_utils.py | 14 ++++++++++ tests/datadoc/test_dataset.py | 31 ++++++++++----------- tests/datadoc/test_keywords.py | 43 ++++++++++++++++++++--------- tests/datadoc/test_tabledoc.py | 4 +++ tests/test_markdown_doctest.py | 3 ++ tripper/datadoc/dataset.py | 41 +++++++++++++++++++++------ tripper/datadoc/keywords.py | 8 +++++- tripper/datadoc/utils.py | 41 ++++++++++++++++++++++++++- 8 files changed, 145 insertions(+), 40 deletions(-) diff --git a/tests/datadoc/test_datadoc_utils.py b/tests/datadoc/test_datadoc_utils.py index 12861320..0d0288c3 100644 --- a/tests/datadoc/test_datadoc_utils.py +++ b/tests/datadoc/test_datadoc_utils.py @@ -112,3 +112,17 @@ def test_iriname(): assert iriname("abc") == "abc" assert iriname("rdf:JSON") == "JSON" assert iriname("https://w3id.org/emmo#Ampere") == "Ampere" + + +def test_getlabel(): + """Test utility function getlabel().""" + from tripper.datadoc.errors import InvalidDatadocError + from tripper.datadoc.utils import getlabel + + assert getlabel({"@id": "ex:A", "prefLabel": "a"}) == "a" + assert getlabel({"@id": "ex:A", "label": "a"}) == "a" + assert getlabel({"@id": "ex:A", "rdfs:label": "a"}) == "a" + assert getlabel({"@id": "ex:A"}, default="a") == "a" + assert getlabel({"@id": "ex:A"}) == "A" + with pytest.raises(InvalidDatadocError): + getlabel({"x": "ex:A"}) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index db8c5983..9290fc4d 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -404,39 +404,38 @@ def test_store(): def test_update_context(): """Test update_context().""" - # WORK-IN-PROGRESS - # pylint: disable=unused-variable,unused-import - - from tripper import HUME + from tripper import HUME, OWL, Namespace from tripper.datadoc import get_context - from tripper.datadoc.dataset import told, update_context + from tripper.datadoc.dataset import update_context + EX = Namespace("http://example.com/") sources = { "@context": { - "MeasuringInstrument": { - "@id": HUME.MeasuringInstrument, - "@type": "owl:Class", - }, + "ex": str(EX), + "hume": str(HUME), }, "@graph": [ { - # Not inferred, since hume:MeasuringSystem is not in context "@id": "ex:instr", - "@type": HUME.Device, - "isDefinedBy": HUME.MeasuringSystem, + "@type": "hume:Device", }, { + # Not added to context, since there is no @type "@id": "ex:instr2", - "isDefinedBy": HUME.MeasuringInstrument, }, { "@id": "ex:MyDevice", - # "@type": "owl:Class", - "subClassOf": HUME.Device, - "hasPart": [HUME.MeasuringInstrument, "ex:MyDevice"], + "skos:prefLabel": "MyDevice", + "subClassOf": "hume:Device", }, ], } + context = get_context(default_theme=None) + update_context(sources, context) + c = context.get_context_dict() + assert c["instr"] == {"@id": EX.instr, "@type": HUME.Device} + assert c["MyDevice"] == {"@id": EX.MyDevice, "@type": OWL.Class} + assert c["Device"] == {"@id": HUME.Device, "@type": OWL.Class} def test_infer_restriction_types(): diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index 661f396d..502add7c 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -1,11 +1,12 @@ """Test the Keywords class.""" +# pylint: disable=too-many-statements,wrong-import-position + import pytest pytest.importorskip("yaml") pytest.importorskip("pyld") -# pylint: disable=wrong-import-position from tripper.datadoc import Keywords # A fixture used by all the tests @@ -14,10 +15,13 @@ def test_get_keywords(): """Test get_keywords() function.""" + import warnings + from dataset_paths import testdir # pylint: disable=import-error - from tripper import DDOC + from tripper import DDOC, OWL, XSD from tripper.datadoc import get_context, get_keywords + from tripper.errors import TripperWarning kw1 = get_keywords() assert kw1.data == keywords.data @@ -68,19 +72,32 @@ def test_get_keywords(): kw7 = get_keywords(theme=None) assert len(kw7) == 0 kw7.add({"resources": {"MyClass": {"iri": "http://example.com/MyClass"}}}) - assert len(kw7) == 0 # no keywords (properties) + assert len(kw7) == 0 # no properties in keywords ctx = get_context(default_theme=None) - ctx.add_context({"ex": "http://example.com/"}) - kw8 = get_keywords(kw7, context=ctx, theme=None) - assert len(kw8) == 0 # no keywords (properties) - assert kw8.get_prefixes()["ex"] == "http://example.com/" - assert kw8.classnames() == ["MyClass"] - - kw9 = get_keywords(context=ctx, theme=None) - assert len(kw9) == 0 # no keywords (properties) - assert kw9.get_prefixes()["ex"] == "http://example.com/" - assert kw9.classnames() == [] + ctx.add_context( + { + "ex": "http://example.com/", + "owl": str(OWL), + "xsd": str(XSD), + "objprop": {"@id": "ex:objprop", "@type": "@id"}, + "dataprop": {"@id": "ex:dataprop", "@type": "xsd:string"}, + "cls": {"@id": "ex:cls", "@type": "owl:Class"}, + } + ) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=TripperWarning) + + kw8 = get_keywords(kw7, context=ctx, theme=None) + assert len(kw8) == 2 # 2 properties in keywords + assert kw8.get_prefixes()["ex"] == "http://example.com/" + assert set(kw8.classnames()) == {"Resource", "MyClass", "cls"} + + kw9 = get_keywords(context=ctx, theme=None) + assert len(kw9) == 2 + assert kw9.get_prefixes()["ex"] == "http://example.com/" + assert set(kw9.classnames()) == {"Resource", "cls"} def test_iter(): diff --git a/tests/datadoc/test_tabledoc.py b/tests/datadoc/test_tabledoc.py index 8a494ff7..e2be9fba 100644 --- a/tests/datadoc/test_tabledoc.py +++ b/tests/datadoc/test_tabledoc.py @@ -5,6 +5,9 @@ pytest.importorskip("pyld") +@pytest.mark.filterwarnings( + "ignore:ConjunctiveGraph.*|builtin type SwigPy.*:DeprecationWarning" +) def test_asdicts(): """Test the asdicts() method.""" @@ -113,6 +116,7 @@ def test_fromdicts(): ] +@pytest.mark.filterwarnings("ignore:ConjunctiveGraph.*:DeprecationWarning") def test_csv(): """Test parsing a csv file.""" import io diff --git a/tests/test_markdown_doctest.py b/tests/test_markdown_doctest.py index 9446aeaa..5bac8e06 100644 --- a/tests/test_markdown_doctest.py +++ b/tests/test_markdown_doctest.py @@ -11,6 +11,9 @@ pytest.importorskip("pyld") +@pytest.mark.filterwarnings( + "ignore:ConjunctiveGraph.*|builtin type SwigPy.*:DeprecationWarning" +) @pytest.mark.skipif(sys.version_info < (3, 9), reason="pint needs Python 3.9") def test_markdown_doctest(): """Runs doctest on all markdown files in the docs/ folder.""" diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index e7211d25..54a0fd71 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -41,6 +41,8 @@ from pathlib import Path from typing import TYPE_CHECKING +from pyld import jsonld + from tripper import ( OWL, RDF, @@ -58,7 +60,8 @@ ValidateError, ) from tripper.datadoc.keywords import Keywords, get_keywords -from tripper.datadoc.utils import add, asseq, get, iriname +from tripper.datadoc.utils import add, asseq, get, getlabel, iriname +from tripper.errors import NamespaceError from tripper.utils import ( AttrDict, as_python, @@ -558,22 +561,42 @@ def update_context( Currently this only adds classes defined in `source` to `context`. """ + subclassof = (RDFS.subClassOf, "rdfs:subClassOf", "subClassOf") + + if isinstance(source, dict) and "@context" in source: + context.add_context(source["@context"]) + sources = ( source if isinstance(source, list) else source["@graph"] if "@graph" in source else [source] ) + for d in sources: - for k, v in d.items(): - if k == "@graph" or isinstance(v, dict): - update_context(v, context) - elif k == "subClassOf": + if not isinstance(d, dict): + continue + if "@id" in d: + try: + iri = context.expand(d["@id"], strict=True) + except NamespaceError: + continue + label = getlabel(d) + if "/" in label: + continue # do not add IDs with slash to context + superclasses = [d[s] for s in subclassof if s in d] + if "@type" in d: + try: + context.add_context( + {label: {"@id": iri, "@type": d["@type"]}} + ) + except jsonld.JsonLdError: + continue + elif superclasses: + supercl = context.expand(superclasses[0], strict=True) context.add_context( { - k: { - "@id": context.expand(k, strict=True), - "@type": OWL.Class, - } + label: {"@id": iri, "@type": OWL.Class}, + iriname(supercl): {"@id": supercl, "@type": OWL.Class}, } ) diff --git a/tripper/datadoc/keywords.py b/tripper/datadoc/keywords.py index bdebe2af..1b79fc6d 100644 --- a/tripper/datadoc/keywords.py +++ b/tripper/datadoc/keywords.py @@ -34,6 +34,7 @@ SkipRedefineKeywordWarning, ) from tripper.datadoc.utils import add, asseq, iriname, merge +from tripper.errors import TripperWarning from tripper.utils import ( AttrDict, expand_iri, @@ -113,7 +114,9 @@ def from_context(): """Return a keywords dict from context.""" warnings.warn( "Adding keywords from context - information may be lost. " - "Classes are added to the root and properties to 'Resource'." + "Classes are added to the root and properties to 'Resource'.", + category=TripperWarning, + stacklevel=3, ) prefixes = context.get_prefixes() classes = context.get_classes() @@ -616,6 +619,9 @@ def to_prefixed(x): key = prefix_iri(val["iri"], prefixes) if len(val) > 1 or key not in iridefs: iridefs[key] = val + expkey = expand_iri(val["iri"], prefixes) + if len(val) > 1 or expkey not in iridefs: + iridefs[expkey] = val # Resources for cls, defs in d.get("resources", AttrDict()).items(): diff --git a/tripper/datadoc/utils.py b/tripper/datadoc/utils.py index ebc9c896..01ccefdb 100644 --- a/tripper/datadoc/utils.py +++ b/tripper/datadoc/utils.py @@ -3,6 +3,9 @@ import re from typing import TYPE_CHECKING, Mapping, Sequence +from tripper.datadoc.errors import InvalidDatadocError +from tripper.namespace import RDFS, SKOS + if TYPE_CHECKING: # pragma: no cover from typing import Any, Iterable, Optional, Union @@ -193,7 +196,43 @@ def iriname(value: str) -> str: """ if ":" not in value: return value - m = re.search("[:/#]([a-zA-Z_][a-zA-Z0-9_.+-]*)$", value) + m = re.search("[:/#]([a-zA-Z_][a-zA-Z0-9_./+-]*)$", value) if not m or not m.groups(): raise ValueError(f"Cannot infer name of IRI: {value}") return m.groups()[0] + + +def getlabel(d: dict, default: "Optional[str]" = None) -> str: + """Return label from a JSON-LD dict `d`. + + Any of the following keys in `d` will be interpreted as a label: + - skos:prefLabel + - rdfs:label + - prefLabel + - label + + If `d` has none of the above keys and `default` is not None, + `default` is returned. Otherwise `iriname(d["@id"])` is returned. + + Example: + + >>> getlabel({"@id": "ex:A", "label": "a"}) + 'a' + + """ + labels = ( + SKOS.prefLabel, + "skos:prefLabel", + RDFS.label, + "rdfs:label", + "prefLabel", + "label", + ) + for label in labels: + if label in d: + return d[label] + if default: + return default + if "@id" in d: + return iriname(d["@id"]) + raise InvalidDatadocError(f"Cannot infer label from JSON-LD dict: {d}") From 38c17fd8df6c9b38481f98bcca13d3b6e3bacf9e Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 11 Mar 2026 19:36:47 +0100 Subject: [PATCH 16/17] Moved configurations for ignoring warnings to pyproject.toml --- pyproject.toml | 3 +++ tests/datadoc/test_datadoc_utils.py | 15 +++++++++++++++ tests/datadoc/test_dataset.py | 11 ++++++++++- tests/datadoc/test_keywords.py | 2 ++ tests/datadoc/test_tabledoc.py | 4 ---- tests/test_markdown_doctest.py | 3 --- tripper/backends/ontopy.py | 2 -- tripper/datadoc/context.py | 1 - tripper/datadoc/dataset.py | 11 ++--------- tripper/datadoc/utils.py | 5 ++++- 10 files changed, 36 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5bbd6fd7..e6812d9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,9 +144,12 @@ addopts = """-rs --cov=tripper --cov-report=term \ """ filterwarnings = [ "ignore:.*imp module.*:DeprecationWarning", + "ignore:ConjunctiveGraph.*:DeprecationWarning", # in pyld + "ignore:builtin type SwigPy.*:DeprecationWarning", # in pyld "ignore:::tripper.literal:243", # Ignore warning in doctest ] + [tool.setuptools.package-data] "tripper.context" = ["*.json", "*.yaml"] diff --git a/tests/datadoc/test_datadoc_utils.py b/tests/datadoc/test_datadoc_utils.py index 0d0288c3..edaef562 100644 --- a/tests/datadoc/test_datadoc_utils.py +++ b/tests/datadoc/test_datadoc_utils.py @@ -116,6 +116,7 @@ def test_iriname(): def test_getlabel(): """Test utility function getlabel().""" + from tripper import SKOS from tripper.datadoc.errors import InvalidDatadocError from tripper.datadoc.utils import getlabel @@ -124,5 +125,19 @@ def test_getlabel(): assert getlabel({"@id": "ex:A", "rdfs:label": "a"}) == "a" assert getlabel({"@id": "ex:A"}, default="a") == "a" assert getlabel({"@id": "ex:A"}) == "A" + + # Check for precedence of labels + assert ( + getlabel({"@id": "ex:A", "rdfs:label": "a", "prefLabel": "b"}) == "a" + ) + assert ( + getlabel({"@id": "ex:A", "rdfs:label": "a", "skos:prefLabel": "b"}) + == "b" + ) + assert ( + getlabel({"@id": "ex:A", "rdfs:label": "a", SKOS.prefLabel: "b"}) + == "b" + ) + with pytest.raises(InvalidDatadocError): getlabel({"x": "ex:A"}) diff --git a/tests/datadoc/test_dataset.py b/tests/datadoc/test_dataset.py index 9290fc4d..2e7f79da 100644 --- a/tests/datadoc/test_dataset.py +++ b/tests/datadoc/test_dataset.py @@ -416,6 +416,7 @@ def test_update_context(): }, "@graph": [ { + # Instances are not added to context "@id": "ex:instr", "@type": "hume:Device", }, @@ -433,10 +434,15 @@ def test_update_context(): context = get_context(default_theme=None) update_context(sources, context) c = context.get_context_dict() - assert c["instr"] == {"@id": EX.instr, "@type": HUME.Device} + assert "instr" not in c + assert "instr2" not in c + assert "MyDevice" in c assert c["MyDevice"] == {"@id": EX.MyDevice, "@type": OWL.Class} assert c["Device"] == {"@id": HUME.Device, "@type": OWL.Class} + # TODO: add tests for what happens if there is mismatch between + # previously added context and updated_context... + def test_infer_restriction_types(): """Test infer_restriction_types().""" @@ -640,6 +646,9 @@ def test_update_restrictions(): "label": "MyDevice2", "hasPart": [HUME.MeasuringInstrument, "MyDevice"], }, + # TODO: for completeness, add tests for individual + # relating to one individual and individual related to a + # list of individuals ], } r6 = deepcopy(d6) diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index 502add7c..0a276757 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -86,6 +86,8 @@ def test_get_keywords(): } ) + # Test `context` argument to get_keywords(). Ignore expected + # warnings about loss of information with warnings.catch_warnings(): warnings.simplefilter("ignore", category=TripperWarning) diff --git a/tests/datadoc/test_tabledoc.py b/tests/datadoc/test_tabledoc.py index e2be9fba..8a494ff7 100644 --- a/tests/datadoc/test_tabledoc.py +++ b/tests/datadoc/test_tabledoc.py @@ -5,9 +5,6 @@ pytest.importorskip("pyld") -@pytest.mark.filterwarnings( - "ignore:ConjunctiveGraph.*|builtin type SwigPy.*:DeprecationWarning" -) def test_asdicts(): """Test the asdicts() method.""" @@ -116,7 +113,6 @@ def test_fromdicts(): ] -@pytest.mark.filterwarnings("ignore:ConjunctiveGraph.*:DeprecationWarning") def test_csv(): """Test parsing a csv file.""" import io diff --git a/tests/test_markdown_doctest.py b/tests/test_markdown_doctest.py index 5bac8e06..9446aeaa 100644 --- a/tests/test_markdown_doctest.py +++ b/tests/test_markdown_doctest.py @@ -11,9 +11,6 @@ pytest.importorskip("pyld") -@pytest.mark.filterwarnings( - "ignore:ConjunctiveGraph.*|builtin type SwigPy.*:DeprecationWarning" -) @pytest.mark.skipif(sys.version_info < (3, 9), reason="pint needs Python 3.9") def test_markdown_doctest(): """Runs doctest on all markdown files in the docs/ folder.""" diff --git a/tripper/backends/ontopy.py b/tripper/backends/ontopy.py index 351f467e..5cf8d393 100644 --- a/tripper/backends/ontopy.py +++ b/tripper/backends/ontopy.py @@ -11,8 +11,6 @@ from tripper.literal import Literal -## - try: from ontopy.ontology import Ontology, _unabbreviate, get_ontology except ImportError as exc: diff --git a/tripper/datadoc/context.py b/tripper/datadoc/context.py index 0e2dd971..dfee2a0f 100644 --- a/tripper/datadoc/context.py +++ b/tripper/datadoc/context.py @@ -76,7 +76,6 @@ def get_context( ) if prefixes: context.add_context({k: str(v) for k, v in prefixes.items()}) - return context diff --git a/tripper/datadoc/dataset.py b/tripper/datadoc/dataset.py index 54a0fd71..8fc25d0a 100644 --- a/tripper/datadoc/dataset.py +++ b/tripper/datadoc/dataset.py @@ -41,8 +41,6 @@ from pathlib import Path from typing import TYPE_CHECKING -from pyld import jsonld - from tripper import ( OWL, RDF, @@ -584,13 +582,8 @@ def update_context( if "/" in label: continue # do not add IDs with slash to context superclasses = [d[s] for s in subclassof if s in d] - if "@type" in d: - try: - context.add_context( - {label: {"@id": iri, "@type": d["@type"]}} - ) - except jsonld.JsonLdError: - continue + if d.get("@type") in (OWL.Class, "owl:Class"): + context.add_context({label: {"@id": iri, "@type": d["@type"]}}) elif superclasses: supercl = context.expand(superclasses[0], strict=True) context.add_context( diff --git a/tripper/datadoc/utils.py b/tripper/datadoc/utils.py index 01ccefdb..f93637af 100644 --- a/tripper/datadoc/utils.py +++ b/tripper/datadoc/utils.py @@ -205,7 +205,8 @@ def iriname(value: str) -> str: def getlabel(d: dict, default: "Optional[str]" = None) -> str: """Return label from a JSON-LD dict `d`. - Any of the following keys in `d` will be interpreted as a label: + Any of the following keys in `d` (listed in the order of + precedense, from high to low) will be interpreted as a label: - skos:prefLabel - rdfs:label - prefLabel @@ -221,6 +222,8 @@ def getlabel(d: dict, default: "Optional[str]" = None) -> str: """ labels = ( + # The order is by purpose. prefLabel has precedense over label. + # But qualified IRIs has precedence over keywords. SKOS.prefLabel, "skos:prefLabel", RDFS.label, From e307f5c3013e5d2b0776766dd59f83131a1d1ebe Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 11 Mar 2026 19:59:00 +0100 Subject: [PATCH 17/17] Cleaned up some additional warnings --- tests/datadoc/test_keywords.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/datadoc/test_keywords.py b/tests/datadoc/test_keywords.py index 0a276757..ad554ab8 100644 --- a/tests/datadoc/test_keywords.py +++ b/tests/datadoc/test_keywords.py @@ -191,7 +191,11 @@ def test_load_yaml(): """ from dataset_paths import indir # pylint: disable=import-error - from tripper.datadoc.errors import ParseError + from tripper.datadoc.errors import ( + ParseError, + RedefineKeywordWarning, + SkipRedefineKeywordWarning, + ) kw = keywords.copy() @@ -231,10 +235,12 @@ def test_load_yaml(): # keywords are unchanged by failures # assert kw == keywords - kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="skip") + with pytest.warns(SkipRedefineKeywordWarning): + kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="skip") assert kw["title"].iri == "dcterms:title" - kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="allow") + with pytest.warns(RedefineKeywordWarning): + kw.load_yaml(indir / "invalid_keywords9.yaml", redefine="allow") assert kw["title"].iri == "myonto:a" kw.load_yaml(indir / "valid_keywords.yaml") @@ -521,6 +527,7 @@ def test_load2(): from tripper import Triplestore from tripper.datadoc import get_keywords + from tripper.datadoc.errors import RedefineKeywordWarning from tripper.utils import AttrDict ts = Triplestore("rdflib") @@ -575,7 +582,8 @@ def test_load2(): # Create a new Keywords object with # default keywords and load from the triplestore kw2 = get_keywords() - kw2.load_rdf(ts, redefine="allow") + with pytest.warns(RedefineKeywordWarning): + kw2.load_rdf(ts, redefine="allow") # Ensure that the specified keywords are in kw2 assert not {