Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 39 additions & 3 deletions tests/datadoc/test_tabledoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

pytest.importorskip("pyld")


def test_asdicts():
if 1:
# def test_asdicts():
"""Test the asdicts() method."""

pytest.importorskip("rdflib")

from tripper import IANA, Triplestore
from tripper import IANA, OWL, RDF, RDFS, XSD, Literal, Triplestore
from tripper.datadoc import TableDoc

td = TableDoc(
Expand Down Expand Up @@ -87,6 +87,34 @@ def test_asdicts():
td.save(ts)
print(ts.serialize())

# Test optional arguments `ts` and `strict`
DS = ts.namespaces["ds"]
ONTO = ts.namespaces["onto"]
ts.add_triples(
[
(ONTO.rel, RDF.type, OWL.ObjectProperty),
(ONTO.rel, RDFS.domain, ONTO.T1),
(ONTO.rel, RDFS.range, ONTO.T2),
(ONTO.val, RDF.type, OWL.DatatypeProperty),
(ONTO.val, RDFS.domain, ONTO.T1),
(ONTO.val, RDFS.range, XSD.double),
]
)
td = TableDoc(
header=[
"@id",
"@type",
"title",
"onto:rel",
"onto:val",
],
data=[
("ds:a", "onto:A", "a", "ds:d1", "1.1"),
("ds:b", "onto:B", "b", DS.d2, "2.2"),
],
)
td.save(ts)


def test_fromdicts():
"""Test the fromdicts() method."""
Expand Down Expand Up @@ -268,6 +296,14 @@ def test_csvsniff():
assert dialect.lineterminator == "\n"
assert dialect.quotechar == "'"

lines = [
"1;1.1;2.2",
"3.3;4.4;5.5",
]
dialect = csvsniff("\r\n".join(lines))
assert dialect.delimiter == ";"
assert dialect.lineterminator == "\r\n"


def test_csv_keywords():
"""Test load CSV with custom keywords file."""
Expand Down
8 changes: 7 additions & 1 deletion tripper/datadoc/clitool.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def subcommand_add(ts, args):
redefine=args.redefine,
**kw,
)
td.save(ts)
td.save(ts, strict=args.strict)
else:
raise ValueError(f"Unknown input format: {fmt}")

Expand Down Expand Up @@ -210,6 +210,12 @@ def maincommand(argv=None):
choices=["raise", "allow", "skip"],
help="How to handle redifinition of existing keywords.",
)
parser_add.add_argument(
"--strict",
action=argparse.BooleanOptionalAction,
default=True,
help="Whether to allow cell with unexpected datatype.",
)

# Subcommand: delete
parser_delete = subparsers.add_parser(
Expand Down
4 changes: 4 additions & 0 deletions tripper/datadoc/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,7 @@ class RedefineKeywordWarning(TripperWarning):
class SkipRedefineKeywordWarning(TripperWarning):
"""Skip redefining an existing keyword in a user-defined keyword
definition (by mapping it to a new IRI)."""


class TypeConversionWarning(TripperWarning):
"""Cannot convert to the given type."""
2 changes: 2 additions & 0 deletions tripper/datadoc/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -1148,6 +1148,8 @@ def _load_rdf(
for ref in ("domain", "range"):
if ref in d:
for domain in asseq(d[ref]):
if isinstance(domain, dict): # skip blank nodes
continue
expanded = expand_iri(domain, prefixes)
if expanded.startswith(str(XSD)):
continue
Expand Down
74 changes: 61 additions & 13 deletions tripper/datadoc/tabledoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@

import csv
import re
import warnings
from pathlib import Path
from typing import TYPE_CHECKING

from tripper import Triplestore
from tripper.datadoc.context import get_context
from tripper.datadoc.dataset import store, told
from tripper.datadoc.dataset import acquire, store, told
from tripper.datadoc.errors import TypeConversionWarning
from tripper.datadoc.keywords import get_keywords
from tripper.datadoc.utils import addnested, stripnested
from tripper.literal import Literal
from tripper.utils import AttrDict, openfile
from tripper.utils import AttrDict, is_curie, is_uri, openfile

if TYPE_CHECKING: # pragma: no cover
from typing import Iterable, List, Optional, Protocol, Sequence, Union
Expand Down Expand Up @@ -96,33 +98,79 @@ def __init__(
)
self.strip = strip

def save(self, ts: Triplestore) -> None:
"""Save tabular datadocumentation to triplestore."""
def save(self, ts: Triplestore, strict: bool = True) -> None:
"""Save tabular datadocumentation to triplestore.

If `strict` is false, do not raise an exception if a cell
value cannot be converted to the expected datatype.
"""
self.context.add_context(
{prefix: str(ns) for prefix, ns in ts.namespaces.items()}
)

for prefix, ns in self.context.get_prefixes().items():
ts.bind(prefix, ns)

store(ts, self.asdicts(), type=self.type, context=self.context)
store(
ts,
self.asdicts(ts=ts, strict=strict),
type=self.type,
context=self.context,
)

def asdicts(
self, ts: "Optional[Triplestore]" = None, strict: bool = True
) -> "List[dict]":
"""Return the table as a list of dicts.

def asdicts(self) -> "List[dict]":
"""Return the table as a list of dicts."""
Arguments:
ts: Optional triplestore to look up header names in if they
are not found in the json-ld context.
strict: Whether to raise an error if a cell contain data that
cannot be converted to expected datatype.

"""
# pylint: disable=too-many-nested-blocks
results = []
for row in self.data:
d = AttrDict()
print()
for i, colname in enumerate(self.header):
cell = row[i].strip() if row[i] and self.strip else row[i]
print("* colname:", colname)
print(" cell:", repr(cell))
if cell:

# Convert cell value to correct Python type
if not colname.startswith("@"):
leafname = colname.split(".")[-1]
df = self.context.getdef(leafname.split("[")[0])
if "@type" in df and df["@type"] != "@id":
cell = Literal(cell, datatype=df["@type"]).value

leafname = colname.split(".")[-1].split("[")[0]
dt = None
if leafname in self.context:
df = self.context.getdef(leafname)
t = df.get("@type")
dt = t if t != "@id" else None
elif is_curie(leafname) or is_uri(leafname):
if ts:
df = acquire(ts, leafname)
dt = df.get("range")
# TODO: if acquire() fails, check if `leafname`
# is a resolvable URI that returns a turtle file.
# If so, extract the definition of `leafname` from
# this file.
print(" dt: ", dt)
if dt:
try:
cell = Literal(cell, datatype=dt).value
except ValueError:
if strict:
raise
warnings.warn(
f"Skipping '{cell}' in column '{colname}' "
"since it cannot be converted to datatype "
f"'{dt}'",
TypeConversionWarning,
)
continue
print(" val: ", repr(cell))
addnested(
d, colname.strip() if self.strip else colname, cell
)
Expand Down
Loading